mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[Other] Add onnx_ort_runtime cpp/python demos (#565)
* add onnx_ort_runtime demo * rm in requirements
This commit is contained in:
@@ -16,9 +16,6 @@ import fastdeploy as fd
|
|||||||
import cv2
|
import cv2
|
||||||
import os
|
import os
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pynvml
|
|
||||||
import psutil
|
|
||||||
import GPUtil
|
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
|
||||||
@@ -112,6 +109,8 @@ def build_option(args):
|
|||||||
|
|
||||||
|
|
||||||
def get_current_memory_mb(gpu_id=None):
|
def get_current_memory_mb(gpu_id=None):
|
||||||
|
import pynvml
|
||||||
|
import psutil
|
||||||
pid = os.getpid()
|
pid = os.getpid()
|
||||||
p = psutil.Process(pid)
|
p = psutil.Process(pid)
|
||||||
info = p.memory_full_info()
|
info = p.memory_full_info()
|
||||||
@@ -126,6 +125,7 @@ def get_current_memory_mb(gpu_id=None):
|
|||||||
|
|
||||||
|
|
||||||
def get_current_gputil(gpu_id):
|
def get_current_gputil(gpu_id):
|
||||||
|
import GPUtil
|
||||||
GPUs = GPUtil.getGPUs()
|
GPUs = GPUtil.getGPUs()
|
||||||
gpu_load = GPUs[gpu_id].load
|
gpu_load = GPUs[gpu_id].load
|
||||||
return gpu_load
|
return gpu_load
|
||||||
|
@@ -16,11 +16,6 @@ import fastdeploy as fd
|
|||||||
import cv2
|
import cv2
|
||||||
import os
|
import os
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import datetime
|
|
||||||
import json
|
|
||||||
import pynvml
|
|
||||||
import psutil
|
|
||||||
import GPUtil
|
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
|
||||||
@@ -114,6 +109,8 @@ def build_option(args):
|
|||||||
|
|
||||||
|
|
||||||
def get_current_memory_mb(gpu_id=None):
|
def get_current_memory_mb(gpu_id=None):
|
||||||
|
import pynvml
|
||||||
|
import psutil
|
||||||
pid = os.getpid()
|
pid = os.getpid()
|
||||||
p = psutil.Process(pid)
|
p = psutil.Process(pid)
|
||||||
info = p.memory_full_info()
|
info = p.memory_full_info()
|
||||||
@@ -128,6 +125,7 @@ def get_current_memory_mb(gpu_id=None):
|
|||||||
|
|
||||||
|
|
||||||
def get_current_gputil(gpu_id):
|
def get_current_gputil(gpu_id):
|
||||||
|
import GPUtil
|
||||||
GPUs = GPUtil.getGPUs()
|
GPUs = GPUtil.getGPUs()
|
||||||
gpu_load = GPUs[gpu_id].load
|
gpu_load = GPUs[gpu_id].load
|
||||||
return gpu_load
|
return gpu_load
|
||||||
|
@@ -16,9 +16,6 @@ import fastdeploy as fd
|
|||||||
import cv2
|
import cv2
|
||||||
import os
|
import os
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pynvml
|
|
||||||
import psutil
|
|
||||||
import GPUtil
|
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
|
||||||
@@ -112,6 +109,8 @@ def build_option(args):
|
|||||||
|
|
||||||
|
|
||||||
def get_current_memory_mb(gpu_id=None):
|
def get_current_memory_mb(gpu_id=None):
|
||||||
|
import pynvml
|
||||||
|
import psutil
|
||||||
pid = os.getpid()
|
pid = os.getpid()
|
||||||
p = psutil.Process(pid)
|
p = psutil.Process(pid)
|
||||||
info = p.memory_full_info()
|
info = p.memory_full_info()
|
||||||
@@ -126,6 +125,7 @@ def get_current_memory_mb(gpu_id=None):
|
|||||||
|
|
||||||
|
|
||||||
def get_current_gputil(gpu_id):
|
def get_current_gputil(gpu_id):
|
||||||
|
import GPUtil
|
||||||
GPUs = GPUtil.getGPUs()
|
GPUs = GPUtil.getGPUs()
|
||||||
gpu_load = GPUs[gpu_id].load
|
gpu_load = GPUs[gpu_id].load
|
||||||
return gpu_load
|
return gpu_load
|
||||||
|
@@ -16,11 +16,6 @@ import fastdeploy as fd
|
|||||||
import cv2
|
import cv2
|
||||||
import os
|
import os
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import datetime
|
|
||||||
import json
|
|
||||||
import pynvml
|
|
||||||
import psutil
|
|
||||||
import GPUtil
|
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
|
||||||
@@ -114,6 +109,8 @@ def build_option(args):
|
|||||||
|
|
||||||
|
|
||||||
def get_current_memory_mb(gpu_id=None):
|
def get_current_memory_mb(gpu_id=None):
|
||||||
|
import pynvml
|
||||||
|
import psutil
|
||||||
pid = os.getpid()
|
pid = os.getpid()
|
||||||
p = psutil.Process(pid)
|
p = psutil.Process(pid)
|
||||||
info = p.memory_full_info()
|
info = p.memory_full_info()
|
||||||
@@ -128,6 +125,7 @@ def get_current_memory_mb(gpu_id=None):
|
|||||||
|
|
||||||
|
|
||||||
def get_current_gputil(gpu_id):
|
def get_current_gputil(gpu_id):
|
||||||
|
import GPUtil
|
||||||
GPUs = GPUtil.getGPUs()
|
GPUs = GPUtil.getGPUs()
|
||||||
gpu_load = GPUs[gpu_id].load
|
gpu_load = GPUs[gpu_id].load
|
||||||
return gpu_load
|
return gpu_load
|
||||||
|
@@ -1,4 +1 @@
|
|||||||
numpy
|
numpy
|
||||||
pynvml
|
|
||||||
psutil
|
|
||||||
GPUtil
|
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
# FastDeploy Runtime examples
|
# FastDeploy Runtime examples
|
||||||
|
|
||||||
FastDeploy Runtime C++ 推理示例如下
|
FastDeploy Runtime 推理示例如下
|
||||||
|
|
||||||
## Python 示例
|
## Python 示例
|
||||||
|
|
||||||
@@ -12,6 +12,7 @@ FastDeploy Runtime C++ 推理示例如下
|
|||||||
| python/infer_paddle_onnxruntime.py | Python | Deploy Paddle model with ONNX Runtime(CPU/GPU) |
|
| python/infer_paddle_onnxruntime.py | Python | Deploy Paddle model with ONNX Runtime(CPU/GPU) |
|
||||||
| python/infer_onnx_openvino.py | Python | Deploy ONNX model with OpenVINO(CPU) |
|
| python/infer_onnx_openvino.py | Python | Deploy ONNX model with OpenVINO(CPU) |
|
||||||
| python/infer_onnx_tensorrt.py | Python | Deploy ONNX model with TensorRT(GPU) |
|
| python/infer_onnx_tensorrt.py | Python | Deploy ONNX model with TensorRT(GPU) |
|
||||||
|
| python/infer_onnx_onnxruntime.py | Python | Deploy ONNX model with ONNX Runtime(CPU/GPU) |
|
||||||
|
|
||||||
## C++ 示例
|
## C++ 示例
|
||||||
|
|
||||||
@@ -23,6 +24,7 @@ FastDeploy Runtime C++ 推理示例如下
|
|||||||
| cpp/infer_paddle_onnxruntime.cc | C++ | Deploy Paddle model with ONNX Runtime(CPU/GPU) |
|
| cpp/infer_paddle_onnxruntime.cc | C++ | Deploy Paddle model with ONNX Runtime(CPU/GPU) |
|
||||||
| cpp/infer_onnx_openvino.cc | C++ | Deploy ONNX model with OpenVINO(CPU) |
|
| cpp/infer_onnx_openvino.cc | C++ | Deploy ONNX model with OpenVINO(CPU) |
|
||||||
| cpp/infer_onnx_tensorrt.cc | C++ | Deploy ONNX model with TensorRT(GPU) |
|
| cpp/infer_onnx_tensorrt.cc | C++ | Deploy ONNX model with TensorRT(GPU) |
|
||||||
|
| cpp/infer_onnx_onnxruntime.cc | C++ | Deploy ONNX model with ONNX Runtime(CPU/GPU) |
|
||||||
|
|
||||||
## 详细部署文档
|
## 详细部署文档
|
||||||
|
|
||||||
|
64
examples/runtime/cpp/infer_onnx_onnxruntime.cc
Normal file
64
examples/runtime/cpp/infer_onnx_onnxruntime.cc
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "fastdeploy/runtime.h"
|
||||||
|
|
||||||
|
namespace fd = fastdeploy;
|
||||||
|
|
||||||
|
int main(int argc, char* argv[]) {
|
||||||
|
std::string model_file = "mobilenetv2.onnx";
|
||||||
|
|
||||||
|
// setup option
|
||||||
|
fd::RuntimeOption runtime_option;
|
||||||
|
runtime_option.SetModelPath(model_file, "", fd::ModelFormat::ONNX);
|
||||||
|
runtime_option.UseOrtBackend();
|
||||||
|
runtime_option.SetCpuThreadNum(12);
|
||||||
|
|
||||||
|
// **** GPU ****
|
||||||
|
// To use GPU, use the following commented code
|
||||||
|
// runtime_option.UseGpu(0);
|
||||||
|
|
||||||
|
// init runtime
|
||||||
|
std::unique_ptr<fd::Runtime> runtime =
|
||||||
|
std::unique_ptr<fd::Runtime>(new fd::Runtime());
|
||||||
|
if (!runtime->Init(runtime_option)) {
|
||||||
|
std::cerr << "--- Init FastDeploy Runitme Failed! "
|
||||||
|
<< "\n--- Model: " << model_file << std::endl;
|
||||||
|
return -1;
|
||||||
|
} else {
|
||||||
|
std::cout << "--- Init FastDeploy Runitme Done! "
|
||||||
|
<< "\n--- Model: " << model_file << std::endl;
|
||||||
|
}
|
||||||
|
// init input tensor shape
|
||||||
|
fd::TensorInfo info = runtime->GetInputInfo(0);
|
||||||
|
info.shape = {1, 3, 224, 224};
|
||||||
|
|
||||||
|
std::vector<fd::FDTensor> input_tensors(1);
|
||||||
|
std::vector<fd::FDTensor> output_tensors(1);
|
||||||
|
|
||||||
|
std::vector<float> inputs_data;
|
||||||
|
inputs_data.resize(1 * 3 * 224 * 224);
|
||||||
|
for (size_t i = 0; i < inputs_data.size(); ++i) {
|
||||||
|
inputs_data[i] = std::rand() % 1000 / 1000.0f;
|
||||||
|
}
|
||||||
|
input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
|
||||||
|
|
||||||
|
//get input name
|
||||||
|
input_tensors[0].name = info.name;
|
||||||
|
|
||||||
|
runtime->Infer(input_tensors, &output_tensors);
|
||||||
|
|
||||||
|
output_tensors[0].PrintInfo();
|
||||||
|
return 0;
|
||||||
|
}
|
@@ -25,6 +25,11 @@ int main(int argc, char* argv[]) {
|
|||||||
runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
|
runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
|
||||||
runtime_option.UseOrtBackend();
|
runtime_option.UseOrtBackend();
|
||||||
runtime_option.SetCpuThreadNum(12);
|
runtime_option.SetCpuThreadNum(12);
|
||||||
|
|
||||||
|
// **** GPU ****
|
||||||
|
// To use GPU, use the following commented code
|
||||||
|
// runtime_option.UseGpu(0);
|
||||||
|
|
||||||
// init runtime
|
// init runtime
|
||||||
std::unique_ptr<fd::Runtime> runtime =
|
std::unique_ptr<fd::Runtime> runtime =
|
||||||
std::unique_ptr<fd::Runtime>(new fd::Runtime());
|
std::unique_ptr<fd::Runtime>(new fd::Runtime());
|
||||||
|
47
examples/runtime/python/infer_onnx_onnxruntime.py
Normal file
47
examples/runtime/python/infer_onnx_onnxruntime.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import fastdeploy as fd
|
||||||
|
from fastdeploy import ModelFormat
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# 下载模型并解压
|
||||||
|
model_url = "https://bj.bcebos.com/fastdeploy/models/mobilenetv2.onnx"
|
||||||
|
fd.download(model_url, path=".")
|
||||||
|
|
||||||
|
option = fd.RuntimeOption()
|
||||||
|
|
||||||
|
option.set_model_path("mobilenetv2.onnx", model_format=ModelFormat.ONNX)
|
||||||
|
|
||||||
|
# **** CPU 配置 ****
|
||||||
|
option.use_cpu()
|
||||||
|
option.use_ort_backend()
|
||||||
|
option.set_cpu_thread_num(12)
|
||||||
|
|
||||||
|
# **** GPU 配置 ****
|
||||||
|
# 如需使用GPU,使用如下注释代码
|
||||||
|
# option.use_gpu(0)
|
||||||
|
|
||||||
|
# 初始化构造runtime
|
||||||
|
runtime = fd.Runtime(option)
|
||||||
|
|
||||||
|
# 获取模型输入名
|
||||||
|
input_name = runtime.get_input_info(0).name
|
||||||
|
|
||||||
|
# 构造随机数据进行推理
|
||||||
|
results = runtime.infer({
|
||||||
|
input_name: np.random.rand(1, 3, 224, 224).astype("float32")
|
||||||
|
})
|
||||||
|
|
||||||
|
print(results[0].shape)
|
Reference in New Issue
Block a user