mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-09-27 12:52:29 +08:00
[Example] Update runtime examples (#1542)
* Add notes for tensors * Optimize some apis * move some warnings
This commit is contained in:
@@ -9,6 +9,6 @@ include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
|
||||
# 添加FastDeploy依赖头文件
|
||||
include_directories(${FASTDEPLOY_INCS})
|
||||
|
||||
add_executable(runtime_demo ${PROJECT_SOURCE_DIR}/infer_onnx_openvino.cc)
|
||||
add_executable(runtime_demo ${PROJECT_SOURCE_DIR}/infer_paddle_paddle_inference.cc)
|
||||
# 添加FastDeploy库依赖
|
||||
target_link_libraries(runtime_demo ${FASTDEPLOY_LIBS})
|
||||
|
@@ -13,52 +13,60 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/runtime.h"
|
||||
#include <cassert>
|
||||
|
||||
namespace fd = fastdeploy;
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
std::string model_file = "mobilenetv2.onnx";
|
||||
// Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.onnx
|
||||
std::string model_file = "pplcnet.onnx";
|
||||
|
||||
// setup option
|
||||
// configure runtime
|
||||
// How to configure by RuntimeOption, refer its api doc for more information
|
||||
// https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
|
||||
fd::RuntimeOption runtime_option;
|
||||
runtime_option.SetModelPath(model_file, "", fd::ModelFormat::ONNX);
|
||||
runtime_option.UseOrtBackend();
|
||||
|
||||
// Use CPU to inference
|
||||
runtime_option.UseCpu();
|
||||
runtime_option.SetCpuThreadNum(12);
|
||||
|
||||
// **** GPU ****
|
||||
// To use GPU, use the following commented code
|
||||
// Use Gpu to inference
|
||||
// runtime_option.UseGpu(0);
|
||||
// If need to configure ONNX Runtime backend for more option, we can configure runtime_option.ort_option
|
||||
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1OrtBackendOption.html
|
||||
|
||||
// init runtime
|
||||
std::unique_ptr<fd::Runtime> runtime =
|
||||
std::unique_ptr<fd::Runtime>(new fd::Runtime());
|
||||
if (!runtime->Init(runtime_option)) {
|
||||
std::cerr << "--- Init FastDeploy Runitme Failed! "
|
||||
<< "\n--- Model: " << model_file << std::endl;
|
||||
return -1;
|
||||
} else {
|
||||
std::cout << "--- Init FastDeploy Runitme Done! "
|
||||
<< "\n--- Model: " << model_file << std::endl;
|
||||
fd::Runtime runtime;
|
||||
assert(runtime.Init(runtime_option));
|
||||
|
||||
// Get model's inputs information
|
||||
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
|
||||
std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
|
||||
|
||||
// Create dummy data fill with 0.5
|
||||
std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
|
||||
|
||||
// Create inputs/outputs tensors
|
||||
std::vector<fd::FDTensor> inputs(inputs_info.size());
|
||||
std::vector<fd::FDTensor> outputs;
|
||||
|
||||
// Initialize input tensors
|
||||
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
|
||||
inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
|
||||
inputs[0].name = inputs_info[0].name;
|
||||
|
||||
// Inference
|
||||
assert(runtime.Infer(inputs, &outputs));
|
||||
|
||||
// Print debug information of outputs
|
||||
outputs[0].PrintInfo();
|
||||
|
||||
// Get data pointer and print it's elements
|
||||
const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
|
||||
for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
|
||||
std::cout << data_ptr[i] << " ";
|
||||
}
|
||||
// init input tensor shape
|
||||
fd::TensorInfo info = runtime->GetInputInfo(0);
|
||||
info.shape = {1, 3, 224, 224};
|
||||
|
||||
std::vector<fd::FDTensor> input_tensors(1);
|
||||
std::vector<fd::FDTensor> output_tensors(1);
|
||||
|
||||
std::vector<float> inputs_data;
|
||||
inputs_data.resize(1 * 3 * 224 * 224);
|
||||
for (size_t i = 0; i < inputs_data.size(); ++i) {
|
||||
inputs_data[i] = std::rand() % 1000 / 1000.0f;
|
||||
}
|
||||
input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
|
||||
|
||||
//get input name
|
||||
input_tensors[0].name = info.name;
|
||||
|
||||
runtime->Infer(input_tensors, &output_tensors);
|
||||
|
||||
output_tensors[0].PrintInfo();
|
||||
std::cout << std::endl;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
@@ -13,47 +13,57 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/runtime.h"
|
||||
#include <cassert>
|
||||
|
||||
namespace fd = fastdeploy;
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
std::string model_file = "mobilenetv2.onnx";
|
||||
// Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.onnx
|
||||
std::string model_file = "pplcnet.onnx";
|
||||
|
||||
// setup option
|
||||
// configure runtime
|
||||
// How to configure by RuntimeOption, refer its api doc for more information
|
||||
// https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
|
||||
fd::RuntimeOption runtime_option;
|
||||
runtime_option.SetModelPath(model_file, "", fd::ModelFormat::ONNX);
|
||||
runtime_option.UseOpenVINOBackend();
|
||||
runtime_option.SetCpuThreadNum(12);
|
||||
// init runtime
|
||||
std::unique_ptr<fd::Runtime> runtime =
|
||||
std::unique_ptr<fd::Runtime>(new fd::Runtime());
|
||||
if (!runtime->Init(runtime_option)) {
|
||||
std::cerr << "--- Init FastDeploy Runitme Failed! "
|
||||
<< "\n--- Model: " << model_file << std::endl;
|
||||
return -1;
|
||||
} else {
|
||||
std::cout << "--- Init FastDeploy Runitme Done! "
|
||||
<< "\n--- Model: " << model_file << std::endl;
|
||||
}
|
||||
// init input tensor shape
|
||||
fd::TensorInfo info = runtime->GetInputInfo(0);
|
||||
info.shape = {1, 3, 224, 224};
|
||||
|
||||
std::vector<fd::FDTensor> input_tensors(1);
|
||||
std::vector<fd::FDTensor> output_tensors(1);
|
||||
|
||||
std::vector<float> inputs_data;
|
||||
inputs_data.resize(1 * 3 * 224 * 224);
|
||||
for (size_t i = 0; i < inputs_data.size(); ++i) {
|
||||
inputs_data[i] = std::rand() % 1000 / 1000.0f;
|
||||
}
|
||||
input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
|
||||
|
||||
//get input name
|
||||
input_tensors[0].name = info.name;
|
||||
// Use CPU to inference
|
||||
// If need to configure OpenVINO backend for more option, we can configure runtime_option.openvino_option
|
||||
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1OpenVINOBackendOption.html
|
||||
runtime_option.UseCpu();
|
||||
runtime_option.SetCpuThreadNum(12);
|
||||
|
||||
runtime->Infer(input_tensors, &output_tensors);
|
||||
fd::Runtime runtime;
|
||||
assert(runtime.Init(runtime_option));
|
||||
|
||||
output_tensors[0].PrintInfo();
|
||||
// Get model's inputs information
|
||||
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
|
||||
std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
|
||||
|
||||
// Create dummy data fill with 0.5
|
||||
std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
|
||||
|
||||
// Create inputs/outputs tensors
|
||||
std::vector<fd::FDTensor> inputs(inputs_info.size());
|
||||
std::vector<fd::FDTensor> outputs;
|
||||
|
||||
// Initialize input tensors
|
||||
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
|
||||
inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
|
||||
inputs[0].name = inputs_info[0].name;
|
||||
|
||||
// Inference
|
||||
assert(runtime.Infer(inputs, &outputs));
|
||||
|
||||
// Print debug information of outputs
|
||||
outputs[0].PrintInfo();
|
||||
|
||||
// Get data pointer and print it's elements
|
||||
const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
|
||||
for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
|
||||
std::cout << data_ptr[i] << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
@@ -13,48 +13,60 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/runtime.h"
|
||||
#include <cassert>
|
||||
|
||||
namespace fd = fastdeploy;
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
std::string model_file = "mobilenetv2.onnx";
|
||||
// Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.onnx
|
||||
std::string model_file = "pplcnet.onnx";
|
||||
|
||||
// setup option
|
||||
// configure runtime
|
||||
// How to configure by RuntimeOption, refer its api doc for more information
|
||||
// https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
|
||||
fd::RuntimeOption runtime_option;
|
||||
runtime_option.SetModelPath(model_file, "", fd::ModelFormat::ONNX);
|
||||
runtime_option.UseGpu(0);
|
||||
runtime_option.UseTrtBackend();
|
||||
runtime_option.SetTrtInputShape("inputs", {1, 3, 224, 224});
|
||||
// init runtime
|
||||
std::unique_ptr<fd::Runtime> runtime =
|
||||
std::unique_ptr<fd::Runtime>(new fd::Runtime());
|
||||
if (!runtime->Init(runtime_option)) {
|
||||
std::cerr << "--- Init FastDeploy Runitme Failed! "
|
||||
<< "\n--- Model: " << model_file << std::endl;
|
||||
return -1;
|
||||
} else {
|
||||
std::cout << "--- Init FastDeploy Runitme Done! "
|
||||
<< "\n--- Model: " << model_file << std::endl;
|
||||
}
|
||||
// init input tensor shape
|
||||
fd::TensorInfo info = runtime->GetInputInfo(0);
|
||||
info.shape = {1, 3, 224, 224};
|
||||
|
||||
std::vector<fd::FDTensor> input_tensors(1);
|
||||
std::vector<fd::FDTensor> output_tensors(1);
|
||||
|
||||
std::vector<float> inputs_data;
|
||||
inputs_data.resize(1 * 3 * 224 * 224);
|
||||
for (size_t i = 0; i < inputs_data.size(); ++i) {
|
||||
inputs_data[i] = std::rand() % 1000 / 1000.0f;
|
||||
}
|
||||
input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
|
||||
|
||||
//get input name
|
||||
input_tensors[0].name = info.name;
|
||||
// Use NVIDIA GPU to inference
|
||||
// If need to configure TensorRT backend for more option, we can configure runtime_option.trt_option
|
||||
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1TrtBackendOption.html
|
||||
runtime_option.UseGpu(0);
|
||||
// Use float16 inference to improve performance
|
||||
runtime_option.trt_option.enable_fp16 = true;
|
||||
// Cache trt engine to reduce time cost in model initialize
|
||||
runtime_option.trt_option.serialize_file = "./model.trt";
|
||||
|
||||
runtime->Infer(input_tensors, &output_tensors);
|
||||
fd::Runtime runtime;
|
||||
assert(runtime.Init(runtime_option));
|
||||
|
||||
output_tensors[0].PrintInfo();
|
||||
// Get model's inputs information
|
||||
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
|
||||
std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
|
||||
|
||||
// Create dummy data fill with 0.5
|
||||
std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
|
||||
|
||||
// Create inputs/outputs tensors
|
||||
std::vector<fd::FDTensor> inputs(inputs_info.size());
|
||||
std::vector<fd::FDTensor> outputs;
|
||||
|
||||
// Initialize input tensors
|
||||
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
|
||||
inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
|
||||
inputs[0].name = inputs_info[0].name;
|
||||
|
||||
// Inference
|
||||
assert(runtime.Infer(inputs, &outputs));
|
||||
|
||||
// Print debug information of outputs
|
||||
outputs[0].PrintInfo();
|
||||
|
||||
// Get data pointer and print it's elements
|
||||
const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
|
||||
for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
|
||||
std::cout << data_ptr[i] << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
@@ -13,53 +13,61 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/runtime.h"
|
||||
#include <cassert>
|
||||
|
||||
namespace fd = fastdeploy;
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
std::string model_file = "mobilenetv2/inference.pdmodel";
|
||||
std::string params_file = "mobilenetv2/inference.pdiparams";
|
||||
// Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.tar.gz
|
||||
std::string model_file = "pplcnet/inference.pdmodel";
|
||||
std::string params_file = "pplcnet/inference.pdiparams";
|
||||
|
||||
// setup option
|
||||
// configure runtime
|
||||
// How to configure by RuntimeOption, refer its api doc for more information
|
||||
// https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
|
||||
fd::RuntimeOption runtime_option;
|
||||
runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
|
||||
runtime_option.SetModelPath(model_file, params_file);
|
||||
runtime_option.UseOrtBackend();
|
||||
|
||||
// Use CPU to inference
|
||||
runtime_option.UseCpu();
|
||||
runtime_option.SetCpuThreadNum(12);
|
||||
|
||||
// **** GPU ****
|
||||
// To use GPU, use the following commented code
|
||||
// Use Gpu to inference
|
||||
// runtime_option.UseGpu(0);
|
||||
// If need to configure ONNX Runtime backend for more option, we can configure runtime_option.ort_option
|
||||
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1OrtBackendOption.html
|
||||
|
||||
// init runtime
|
||||
std::unique_ptr<fd::Runtime> runtime =
|
||||
std::unique_ptr<fd::Runtime>(new fd::Runtime());
|
||||
if (!runtime->Init(runtime_option)) {
|
||||
std::cerr << "--- Init FastDeploy Runitme Failed! "
|
||||
<< "\n--- Model: " << model_file << std::endl;
|
||||
return -1;
|
||||
} else {
|
||||
std::cout << "--- Init FastDeploy Runitme Done! "
|
||||
<< "\n--- Model: " << model_file << std::endl;
|
||||
fd::Runtime runtime;
|
||||
assert(runtime.Init(runtime_option));
|
||||
|
||||
// Get model's inputs information
|
||||
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
|
||||
std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
|
||||
|
||||
// Create dummy data fill with 0.5
|
||||
std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
|
||||
|
||||
// Create inputs/outputs tensors
|
||||
std::vector<fd::FDTensor> inputs(inputs_info.size());
|
||||
std::vector<fd::FDTensor> outputs;
|
||||
|
||||
// Initialize input tensors
|
||||
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
|
||||
inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
|
||||
inputs[0].name = inputs_info[0].name;
|
||||
|
||||
// Inference
|
||||
assert(runtime.Infer(inputs, &outputs));
|
||||
|
||||
// Print debug information of outputs
|
||||
outputs[0].PrintInfo();
|
||||
|
||||
// Get data pointer and print it's elements
|
||||
const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
|
||||
for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
|
||||
std::cout << data_ptr[i] << " ";
|
||||
}
|
||||
// init input tensor shape
|
||||
fd::TensorInfo info = runtime->GetInputInfo(0);
|
||||
info.shape = {1, 3, 224, 224};
|
||||
|
||||
std::vector<fd::FDTensor> input_tensors(1);
|
||||
std::vector<fd::FDTensor> output_tensors(1);
|
||||
|
||||
std::vector<float> inputs_data;
|
||||
inputs_data.resize(1 * 3 * 224 * 224);
|
||||
for (size_t i = 0; i < inputs_data.size(); ++i) {
|
||||
inputs_data[i] = std::rand() % 1000 / 1000.0f;
|
||||
}
|
||||
input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
|
||||
|
||||
//get input name
|
||||
input_tensors[0].name = info.name;
|
||||
|
||||
runtime->Infer(input_tensors, &output_tensors);
|
||||
|
||||
output_tensors[0].PrintInfo();
|
||||
std::cout << std::endl;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
@@ -13,48 +13,58 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/runtime.h"
|
||||
#include <cassert>
|
||||
|
||||
namespace fd = fastdeploy;
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
std::string model_file = "mobilenetv2/inference.pdmodel";
|
||||
std::string params_file = "mobilenetv2/inference.pdiparams";
|
||||
// Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.tar.gz
|
||||
std::string model_file = "pplcnet/inference.pdmodel";
|
||||
std::string params_file = "pplcnet/inference.pdiparams";
|
||||
|
||||
// setup option
|
||||
// configure runtime
|
||||
// How to configure by RuntimeOption, refer its api doc for more information
|
||||
// https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
|
||||
fd::RuntimeOption runtime_option;
|
||||
runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
|
||||
runtime_option.SetModelPath(model_file, params_file);
|
||||
runtime_option.UseOpenVINOBackend();
|
||||
runtime_option.SetCpuThreadNum(12);
|
||||
// init runtime
|
||||
std::unique_ptr<fd::Runtime> runtime =
|
||||
std::unique_ptr<fd::Runtime>(new fd::Runtime());
|
||||
if (!runtime->Init(runtime_option)) {
|
||||
std::cerr << "--- Init FastDeploy Runitme Failed! "
|
||||
<< "\n--- Model: " << model_file << std::endl;
|
||||
return -1;
|
||||
} else {
|
||||
std::cout << "--- Init FastDeploy Runitme Done! "
|
||||
<< "\n--- Model: " << model_file << std::endl;
|
||||
}
|
||||
// init input tensor shape
|
||||
fd::TensorInfo info = runtime->GetInputInfo(0);
|
||||
info.shape = {1, 3, 224, 224};
|
||||
|
||||
std::vector<fd::FDTensor> input_tensors(1);
|
||||
std::vector<fd::FDTensor> output_tensors(1);
|
||||
|
||||
std::vector<float> inputs_data;
|
||||
inputs_data.resize(1 * 3 * 224 * 224);
|
||||
for (size_t i = 0; i < inputs_data.size(); ++i) {
|
||||
inputs_data[i] = std::rand() % 1000 / 1000.0f;
|
||||
}
|
||||
input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
|
||||
|
||||
//get input name
|
||||
input_tensors[0].name = info.name;
|
||||
// Use CPU to inference
|
||||
// If need to configure OpenVINO backend for more option, we can configure runtime_option.openvino_option
|
||||
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1OpenVINOBackendOption.html
|
||||
runtime_option.UseCpu();
|
||||
runtime_option.SetCpuThreadNum(12);
|
||||
|
||||
runtime->Infer(input_tensors, &output_tensors);
|
||||
fd::Runtime runtime;
|
||||
assert(runtime.Init(runtime_option));
|
||||
|
||||
output_tensors[0].PrintInfo();
|
||||
// Get model's inputs information
|
||||
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
|
||||
std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
|
||||
|
||||
// Create dummy data fill with 0.5
|
||||
std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
|
||||
|
||||
// Create inputs/outputs tensors
|
||||
std::vector<fd::FDTensor> inputs(inputs_info.size());
|
||||
std::vector<fd::FDTensor> outputs;
|
||||
|
||||
// Initialize input tensors
|
||||
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
|
||||
inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
|
||||
inputs[0].name = inputs_info[0].name;
|
||||
|
||||
// Inference
|
||||
assert(runtime.Infer(inputs, &outputs));
|
||||
|
||||
// Print debug information of outputs
|
||||
outputs[0].PrintInfo();
|
||||
|
||||
// Get data pointer and print it's elements
|
||||
const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
|
||||
for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
|
||||
std::cout << data_ptr[i] << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
@@ -13,53 +13,57 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/runtime.h"
|
||||
#include <cassert>
|
||||
|
||||
namespace fd = fastdeploy;
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
std::string model_file = "mobilenetv2/inference.pdmodel";
|
||||
std::string params_file = "mobilenetv2/inference.pdiparams";
|
||||
// Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.tar.gz
|
||||
std::string model_file = "pplcnet/inference.pdmodel";
|
||||
std::string params_file = "pplcnet/inference.pdiparams";
|
||||
|
||||
// setup option
|
||||
// configure runtime
|
||||
// How to configure by RuntimeOption, refer its api doc for more information
|
||||
// https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
|
||||
fd::RuntimeOption runtime_option;
|
||||
runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
|
||||
// CPU
|
||||
runtime_option.SetModelPath(model_file, params_file);
|
||||
runtime_option.UsePaddleInferBackend();
|
||||
runtime_option.SetCpuThreadNum(12);
|
||||
// GPU
|
||||
// runtime_option.UseGpu(0);
|
||||
// IPU
|
||||
// runtime_option.UseIpu();
|
||||
// init runtime
|
||||
std::unique_ptr<fd::Runtime> runtime =
|
||||
std::unique_ptr<fd::Runtime>(new fd::Runtime());
|
||||
if (!runtime->Init(runtime_option)) {
|
||||
std::cerr << "--- Init FastDeploy Runitme Failed! "
|
||||
<< "\n--- Model: " << model_file << std::endl;
|
||||
return -1;
|
||||
} else {
|
||||
std::cout << "--- Init FastDeploy Runitme Done! "
|
||||
<< "\n--- Model: " << model_file << std::endl;
|
||||
runtime_option.UseCpu();
|
||||
|
||||
// If need to configure Paddle Inference backend for more option, we can configure runtime_option.paddle_infer_option
|
||||
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1PaddleBackendOption.html
|
||||
runtime_option.paddle_infer_option.enable_mkldnn = true;
|
||||
|
||||
fd::Runtime runtime;
|
||||
assert(runtime.Init(runtime_option));
|
||||
|
||||
// Get model's inputs information
|
||||
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
|
||||
std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
|
||||
|
||||
// Create dummy data fill with 0.5
|
||||
std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
|
||||
|
||||
// Create inputs/outputs tensors
|
||||
std::vector<fd::FDTensor> inputs(inputs_info.size());
|
||||
std::vector<fd::FDTensor> outputs;
|
||||
|
||||
// Initialize input tensors
|
||||
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
|
||||
inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
|
||||
inputs[0].name = inputs_info[0].name;
|
||||
|
||||
// Inference
|
||||
assert(runtime.Infer(inputs, &outputs));
|
||||
|
||||
// Print debug information of outputs
|
||||
outputs[0].PrintInfo();
|
||||
|
||||
// Get data pointer and print it's elements
|
||||
const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
|
||||
for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
|
||||
std::cout << data_ptr[i] << " ";
|
||||
}
|
||||
// init input tensor shape
|
||||
fd::TensorInfo info = runtime->GetInputInfo(0);
|
||||
info.shape = {1, 3, 224, 224};
|
||||
|
||||
std::vector<fd::FDTensor> input_tensors(1);
|
||||
std::vector<fd::FDTensor> output_tensors(1);
|
||||
|
||||
std::vector<float> inputs_data;
|
||||
inputs_data.resize(1 * 3 * 224 * 224);
|
||||
for (size_t i = 0; i < inputs_data.size(); ++i) {
|
||||
inputs_data[i] = std::rand() % 1000 / 1000.0f;
|
||||
}
|
||||
input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
|
||||
|
||||
//get input name
|
||||
input_tensors[0].name = info.name;
|
||||
|
||||
runtime->Infer(input_tensors, &output_tensors);
|
||||
|
||||
output_tensors[0].PrintInfo();
|
||||
std::cout << std::endl;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
77
examples/runtime/cpp/infer_paddle_paddle_inference_trt.cc
Normal file
77
examples/runtime/cpp/infer_paddle_paddle_inference_trt.cc
Normal file
@@ -0,0 +1,77 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/runtime.h"
|
||||
#include <cassert>
|
||||
|
||||
namespace fd = fastdeploy;
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
// Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.tar.gz
|
||||
std::string model_file = "pplcnet/inference.pdmodel";
|
||||
std::string params_file = "pplcnet/inference.pdiparams";
|
||||
|
||||
// configure runtime
|
||||
// How to configure by RuntimeOption, refer its api doc for more information
|
||||
// https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
|
||||
fd::RuntimeOption runtime_option;
|
||||
runtime_option.SetModelPath(model_file, params_file);
|
||||
runtime_option.UsePaddleInferBackend();
|
||||
runtime_option.UseGpu(0);
|
||||
|
||||
// Enable Paddle Inference + TensorRT
|
||||
// If need to configure Paddle Inference backend for more option, we can configure runtime_option.paddle_infer_option
|
||||
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1PaddleBackendOption.html
|
||||
runtime_option.paddle_infer_option.enable_trt = true;
|
||||
|
||||
// If need to configure TensorRT backend for more option, we can configure runtime_option.trt_option
|
||||
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1TrtBackendOption.html
|
||||
// Use float16 inference to improve performance
|
||||
runtime_option.trt_option.enable_fp16 = true;
|
||||
// Cache trt engine to reduce time cost in model initialize
|
||||
runtime_option.trt_option.serialize_file = "./pplcnet_model.trt";
|
||||
|
||||
fd::Runtime runtime;
|
||||
assert(runtime.Init(runtime_option));
|
||||
|
||||
// Get model's inputs information
|
||||
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
|
||||
std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
|
||||
|
||||
// Create dummy data fill with 0.5
|
||||
std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
|
||||
|
||||
// Create inputs/outputs tensors
|
||||
std::vector<fd::FDTensor> inputs(inputs_info.size());
|
||||
std::vector<fd::FDTensor> outputs;
|
||||
|
||||
// Initialize input tensors
|
||||
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
|
||||
inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
|
||||
inputs[0].name = inputs_info[0].name;
|
||||
|
||||
// Inference
|
||||
assert(runtime.Infer(inputs, &outputs));
|
||||
|
||||
// Print debug information of outputs
|
||||
outputs[0].PrintInfo();
|
||||
|
||||
// Get data pointer and print it's elements
|
||||
const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
|
||||
for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
|
||||
std::cout << data_ptr[i] << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
return 0;
|
||||
}
|
@@ -13,49 +13,61 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include "fastdeploy/runtime.h"
|
||||
#include <cassert>
|
||||
|
||||
namespace fd = fastdeploy;
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
std::string model_file = "mobilenetv2/inference.pdmodel";
|
||||
std::string params_file = "mobilenetv2/inference.pdiparams";
|
||||
// Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.tar.gz
|
||||
std::string model_file = "pplcnet/inference.pdmodel";
|
||||
std::string params_file = "pplcnet/inference.pdiparams";
|
||||
|
||||
// setup option
|
||||
// configure runtime
|
||||
// How to configure by RuntimeOption, refer its api doc for more information
|
||||
// https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
|
||||
fd::RuntimeOption runtime_option;
|
||||
runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
|
||||
runtime_option.UseGpu(0);
|
||||
runtime_option.SetModelPath(model_file, params_file);
|
||||
runtime_option.UseTrtBackend();
|
||||
runtime_option.EnablePaddleToTrt();
|
||||
// init runtime
|
||||
std::unique_ptr<fd::Runtime> runtime =
|
||||
std::unique_ptr<fd::Runtime>(new fd::Runtime());
|
||||
if (!runtime->Init(runtime_option)) {
|
||||
std::cerr << "--- Init FastDeploy Runitme Failed! "
|
||||
<< "\n--- Model: " << model_file << std::endl;
|
||||
return -1;
|
||||
} else {
|
||||
std::cout << "--- Init FastDeploy Runitme Done! "
|
||||
<< "\n--- Model: " << model_file << std::endl;
|
||||
}
|
||||
// init input tensor shape
|
||||
fd::TensorInfo info = runtime->GetInputInfo(0);
|
||||
info.shape = {1, 3, 224, 224};
|
||||
|
||||
std::vector<fd::FDTensor> input_tensors(1);
|
||||
std::vector<fd::FDTensor> output_tensors(1);
|
||||
|
||||
std::vector<float> inputs_data;
|
||||
inputs_data.resize(1 * 3 * 224 * 224);
|
||||
for (size_t i = 0; i < inputs_data.size(); ++i) {
|
||||
inputs_data[i] = std::rand() % 1000 / 1000.0f;
|
||||
}
|
||||
input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
|
||||
|
||||
//get input name
|
||||
input_tensors[0].name = info.name;
|
||||
// Use NVIDIA GPU to inference
|
||||
// If need to configure TensorRT backend for more option, we can configure runtime_option.trt_option
|
||||
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1TrtBackendOption.html
|
||||
runtime_option.UseGpu(0);
|
||||
// Use float16 inference to improve performance
|
||||
runtime_option.trt_option.enable_fp16 = true;
|
||||
// Cache trt engine to reduce time cost in model initialize
|
||||
runtime_option.trt_option.serialize_file = "./pplcnet_model.trt";
|
||||
|
||||
runtime->Infer(input_tensors, &output_tensors);
|
||||
fd::Runtime runtime;
|
||||
assert(runtime.Init(runtime_option));
|
||||
|
||||
output_tensors[0].PrintInfo();
|
||||
// Get model's inputs information
|
||||
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
|
||||
std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
|
||||
|
||||
// Create dummy data fill with 0.5
|
||||
std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
|
||||
|
||||
// Create inputs/outputs tensors
|
||||
std::vector<fd::FDTensor> inputs(inputs_info.size());
|
||||
std::vector<fd::FDTensor> outputs;
|
||||
|
||||
// Initialize input tensors
|
||||
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
|
||||
inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
|
||||
inputs[0].name = inputs_info[0].name;
|
||||
|
||||
// Inference
|
||||
assert(runtime.Infer(inputs, &outputs));
|
||||
|
||||
// Print debug information of outputs
|
||||
outputs[0].PrintInfo();
|
||||
|
||||
// Get data pointer and print it's elements
|
||||
const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
|
||||
for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
|
||||
std::cout << data_ptr[i] << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
@@ -85,10 +85,10 @@ class BaseBackend {
|
||||
bool copy_to_fd = true) = 0;
|
||||
// Optional: For those backends which can share memory
|
||||
// while creating multiple inference engines with same model file
|
||||
virtual std::unique_ptr<BaseBackend> Clone(RuntimeOption &runtime_option,
|
||||
void *stream = nullptr,
|
||||
virtual std::unique_ptr<BaseBackend> Clone(RuntimeOption& runtime_option,
|
||||
void* stream = nullptr,
|
||||
int device_id = -1) {
|
||||
FDERROR << "Clone no support" << std::endl;
|
||||
FDERROR << "Clone no support " << runtime_option.backend << " " << stream << " " << device_id << std::endl;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
@@ -53,32 +53,46 @@ enum LitePowerMode {
|
||||
struct LiteBackendOption {
|
||||
/// Paddle Lite power mode for mobile device.
|
||||
int power_mode = 3;
|
||||
/// Number of threads while use CPU
|
||||
// Number of threads while use CPU
|
||||
int cpu_threads = 1;
|
||||
/// Enable use half precision
|
||||
bool enable_fp16 = false;
|
||||
/// Inference device, Paddle Lite support CPU/KUNLUNXIN/TIMVX/ASCEND
|
||||
// Inference device, Paddle Lite support CPU/KUNLUNXIN/TIMVX/ASCEND
|
||||
Device device = Device::CPU;
|
||||
/// Index of inference device
|
||||
// Index of inference device
|
||||
int device_id = 0;
|
||||
|
||||
/// kunlunxin_l3_workspace_size
|
||||
int kunlunxin_l3_workspace_size = 0xfffc00;
|
||||
/// kunlunxin_locked
|
||||
bool kunlunxin_locked = false;
|
||||
/// kunlunxin_autotune
|
||||
bool kunlunxin_autotune = true;
|
||||
/// kunlunxin_autotune_file
|
||||
std::string kunlunxin_autotune_file = "";
|
||||
/// kunlunxin_precision
|
||||
std::string kunlunxin_precision = "int16";
|
||||
/// kunlunxin_adaptive_seqlen
|
||||
bool kunlunxin_adaptive_seqlen = false;
|
||||
/// kunlunxin_enable_multi_stream
|
||||
bool kunlunxin_enable_multi_stream = false;
|
||||
|
||||
/// Optimized model dir for CxxConfig
|
||||
std::string optimized_model_dir = "";
|
||||
/// nnadapter_subgraph_partition_config_path
|
||||
std::string nnadapter_subgraph_partition_config_path = "";
|
||||
/// nnadapter_subgraph_partition_config_buffer
|
||||
std::string nnadapter_subgraph_partition_config_buffer = "";
|
||||
/// nnadapter_context_properties
|
||||
std::string nnadapter_context_properties = "";
|
||||
/// nnadapter_model_cache_dir
|
||||
std::string nnadapter_model_cache_dir = "";
|
||||
/// nnadapter_mixed_precision_quantization_config_path
|
||||
std::string nnadapter_mixed_precision_quantization_config_path = "";
|
||||
/// nnadapter_dynamic_shape_info
|
||||
std::map<std::string, std::vector<std::vector<int64_t>>>
|
||||
nnadapter_dynamic_shape_info = {{"", {{0}}}};
|
||||
/// nnadapter_device_names
|
||||
std::vector<std::string> nnadapter_device_names = {};
|
||||
};
|
||||
} // namespace fastdeploy
|
||||
|
@@ -25,27 +25,18 @@ namespace fastdeploy {
|
||||
/*! @brief Option object to configure ONNX Runtime backend
|
||||
*/
|
||||
struct OrtBackendOption {
|
||||
/*
|
||||
* @brief Level of graph optimization, -1: mean default(Enable all the optimization strategy)/0: disable all the optimization strategy/1: enable basic strategy/2:enable extend strategy/99: enable all
|
||||
*/
|
||||
/// Level of graph optimization, -1: mean default(Enable all the optimization strategy)/0: disable all the optimization strategy/1: enable basic strategy/2:enable extend strategy/99: enable all
|
||||
int graph_optimization_level = -1;
|
||||
/*
|
||||
* @brief Number of threads to execute the operator, -1: default
|
||||
*/
|
||||
/// Number of threads to execute the operator, -1: default
|
||||
int intra_op_num_threads = -1;
|
||||
/*
|
||||
* @brief Number of threads to execute the graph, -1: default. This parameter only will bring effects while the `OrtBackendOption::execution_mode` set to 1.
|
||||
*/
|
||||
/// Number of threads to execute the graph, -1: default. This parameter only will bring effects while the `OrtBackendOption::execution_mode` set to 1.
|
||||
int inter_op_num_threads = -1;
|
||||
/*
|
||||
* @brief Execution mode for the graph, -1: default(Sequential mode)/0: Sequential mode, execute the operators in graph one by one. /1: Parallel mode, execute the operators in graph parallelly.
|
||||
*/
|
||||
/// Execution mode for the graph, -1: default(Sequential mode)/0: Sequential mode, execute the operators in graph one by one. /1: Parallel mode, execute the operators in graph parallelly.
|
||||
int execution_mode = -1;
|
||||
/// Inference device, OrtBackend supports CPU/GPU
|
||||
// Inference device, OrtBackend supports CPU/GPU
|
||||
Device device = Device::CPU;
|
||||
/// Inference device id
|
||||
// Inference device id
|
||||
int device_id = 0;
|
||||
|
||||
void* external_stream_ = nullptr;
|
||||
};
|
||||
} // namespace fastdeploy
|
||||
|
@@ -54,6 +54,8 @@ struct PaddleBackendOption {
|
||||
bool enable_mkldnn = true;
|
||||
/// Use Paddle Inference + TensorRT to inference model on GPU
|
||||
bool enable_trt = false;
|
||||
/// Whether enable memory optimize, default true
|
||||
bool enable_memory_optimize = true;
|
||||
|
||||
/*
|
||||
* @brief IPU option, this will configure the IPU hardware, if inference model in IPU
|
||||
|
@@ -41,6 +41,7 @@ void BindPaddleOption(pybind11::module& m) {
|
||||
.def_readwrite("enable_log_info", &PaddleBackendOption::enable_log_info)
|
||||
.def_readwrite("enable_mkldnn", &PaddleBackendOption::enable_mkldnn)
|
||||
.def_readwrite("enable_trt", &PaddleBackendOption::enable_trt)
|
||||
.def_readwrite("enable_memory_optimize", &PaddleBackendOption::enable_memory_optimize)
|
||||
.def_readwrite("ipu_option", &PaddleBackendOption::ipu_option)
|
||||
.def_readwrite("collect_trt_shape",
|
||||
&PaddleBackendOption::collect_trt_shape)
|
||||
|
@@ -147,7 +147,9 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_buffer,
|
||||
}
|
||||
config_.SetModelBuffer(model_buffer.c_str(), model_buffer.size(),
|
||||
params_buffer.c_str(), params_buffer.size());
|
||||
config_.EnableMemoryOptim();
|
||||
if (option.enable_memory_optimize) {
|
||||
config_.EnableMemoryOptim();
|
||||
}
|
||||
BuildOption(option);
|
||||
|
||||
// The input/output information get from predictor is not right, use
|
||||
|
@@ -33,9 +33,8 @@ struct TrtBackendOption {
|
||||
/// Enable log while converting onnx model to tensorrt
|
||||
bool enable_log_info = false;
|
||||
|
||||
/*
|
||||
* @brief Enable half precison inference, on some device not support half precision, it will fallback to float32 mode
|
||||
*/
|
||||
|
||||
/// Enable half precison inference, on some device not support half precision, it will fallback to float32 mode
|
||||
bool enable_fp16 = false;
|
||||
|
||||
/** \brief Set shape range of input tensor for the model that contain dynamic input shape while using TensorRT backend
|
||||
@@ -64,9 +63,7 @@ struct TrtBackendOption {
|
||||
max_shape[tensor_name].assign(max.begin(), max.end());
|
||||
}
|
||||
}
|
||||
/**
|
||||
* @brief Set cache file path while use TensorRT backend. Loadding a Paddle/ONNX model and initialize TensorRT will take a long time, by this interface it will save the tensorrt engine to `cache_file_path`, and load it directly while execute the code again
|
||||
*/
|
||||
/// Set cache file path while use TensorRT backend. Loadding a Paddle/ONNX model and initialize TensorRT will take a long time, by this interface it will save the tensorrt engine to `cache_file_path`, and load it directly while execute the code again
|
||||
std::string serialize_file = "";
|
||||
|
||||
// The below parameters may be removed in next version, please do not
|
||||
|
@@ -22,4 +22,4 @@ for root, dirs, files in os.walk(third_libs):
|
||||
for f in files:
|
||||
if f.strip().count(".so") > 0 or f.strip() == "plugins.xml":
|
||||
full_path = os.path.join(root, f)
|
||||
shutil.copy(full_path, os.path.join(bc_dirname, "lib"))
|
||||
shutil.copy(full_path, os.path.join(bc_dirname, "lib"), follow_symlinks=False)
|
||||
|
Reference in New Issue
Block a user