From 6be2c0367b91c6228c9ca5dec77c3c9138fb032e Mon Sep 17 00:00:00 2001 From: Jason Date: Wed, 8 Mar 2023 16:56:04 +0800 Subject: [PATCH] [Example] Update runtime examples (#1542) * Add notes for tensors * Optimize some apis * move some warnings --- examples/runtime/cpp/CMakeLists.txt | 2 +- .../runtime/cpp/infer_onnx_onnxruntime.cc | 78 +++++++++-------- examples/runtime/cpp/infer_onnx_openvino.cc | 74 +++++++++------- examples/runtime/cpp/infer_onnx_tensorrt.cc | 78 ++++++++++------- .../runtime/cpp/infer_paddle_onnxruntime.cc | 82 ++++++++++-------- examples/runtime/cpp/infer_paddle_openvino.cc | 78 +++++++++-------- .../cpp/infer_paddle_paddle_inference.cc | 86 ++++++++++--------- .../cpp/infer_paddle_paddle_inference_trt.cc | 77 +++++++++++++++++ examples/runtime/cpp/infer_paddle_tensorrt.cc | 82 ++++++++++-------- fastdeploy/runtime/backends/backend.h | 6 +- fastdeploy/runtime/backends/lite/option.h | 20 ++++- fastdeploy/runtime/backends/ort/option.h | 21 ++--- fastdeploy/runtime/backends/paddle/option.h | 2 + .../runtime/backends/paddle/option_pybind.cc | 1 + .../runtime/backends/paddle/paddle_backend.cc | 4 +- fastdeploy/runtime/backends/tensorrt/option.h | 9 +- scripts/build_bcloud_lib.py | 2 +- 17 files changed, 425 insertions(+), 277 deletions(-) create mode 100644 examples/runtime/cpp/infer_paddle_paddle_inference_trt.cc diff --git a/examples/runtime/cpp/CMakeLists.txt b/examples/runtime/cpp/CMakeLists.txt index 09ea45c3b..b90580bba 100644 --- a/examples/runtime/cpp/CMakeLists.txt +++ b/examples/runtime/cpp/CMakeLists.txt @@ -9,6 +9,6 @@ include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) # 添加FastDeploy依赖头文件 include_directories(${FASTDEPLOY_INCS}) -add_executable(runtime_demo ${PROJECT_SOURCE_DIR}/infer_onnx_openvino.cc) +add_executable(runtime_demo ${PROJECT_SOURCE_DIR}/infer_paddle_paddle_inference.cc) # 添加FastDeploy库依赖 target_link_libraries(runtime_demo ${FASTDEPLOY_LIBS}) diff --git a/examples/runtime/cpp/infer_onnx_onnxruntime.cc b/examples/runtime/cpp/infer_onnx_onnxruntime.cc index 4c27c1f65..48d7957f3 100644 --- a/examples/runtime/cpp/infer_onnx_onnxruntime.cc +++ b/examples/runtime/cpp/infer_onnx_onnxruntime.cc @@ -13,52 +13,60 @@ // limitations under the License. #include "fastdeploy/runtime.h" +#include namespace fd = fastdeploy; int main(int argc, char* argv[]) { - std::string model_file = "mobilenetv2.onnx"; + // Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.onnx + std::string model_file = "pplcnet.onnx"; - // setup option + // configure runtime + // How to configure by RuntimeOption, refer its api doc for more information + // https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html fd::RuntimeOption runtime_option; runtime_option.SetModelPath(model_file, "", fd::ModelFormat::ONNX); runtime_option.UseOrtBackend(); + + // Use CPU to inference + runtime_option.UseCpu(); runtime_option.SetCpuThreadNum(12); - // **** GPU **** - // To use GPU, use the following commented code + // Use Gpu to inference // runtime_option.UseGpu(0); + // If need to configure ONNX Runtime backend for more option, we can configure runtime_option.ort_option + // refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1OrtBackendOption.html - // init runtime - std::unique_ptr runtime = - std::unique_ptr(new fd::Runtime()); - if (!runtime->Init(runtime_option)) { - std::cerr << "--- Init FastDeploy Runitme Failed! " - << "\n--- Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "--- Init FastDeploy Runitme Done! " - << "\n--- Model: " << model_file << std::endl; + fd::Runtime runtime; + assert(runtime.Init(runtime_option)); + + // Get model's inputs information + // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html + std::vector inputs_info = runtime.GetInputInfos(); + + // Create dummy data fill with 0.5 + std::vector dummy_data(1 * 3 * 224 * 224, 0.5); + + // Create inputs/outputs tensors + std::vector inputs(inputs_info.size()); + std::vector outputs; + + // Initialize input tensors + // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html + inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data()); + inputs[0].name = inputs_info[0].name; + + // Inference + assert(runtime.Infer(inputs, &outputs)); + + // Print debug information of outputs + outputs[0].PrintInfo(); + + // Get data pointer and print it's elements + const float* data_ptr = reinterpret_cast(outputs[0].GetData()); + for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) { + std::cout << data_ptr[i] << " "; } - // init input tensor shape - fd::TensorInfo info = runtime->GetInputInfo(0); - info.shape = {1, 3, 224, 224}; - - std::vector input_tensors(1); - std::vector output_tensors(1); - - std::vector inputs_data; - inputs_data.resize(1 * 3 * 224 * 224); - for (size_t i = 0; i < inputs_data.size(); ++i) { - inputs_data[i] = std::rand() % 1000 / 1000.0f; - } - input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data()); - - //get input name - input_tensors[0].name = info.name; - - runtime->Infer(input_tensors, &output_tensors); - - output_tensors[0].PrintInfo(); + std::cout << std::endl; return 0; -} \ No newline at end of file +} diff --git a/examples/runtime/cpp/infer_onnx_openvino.cc b/examples/runtime/cpp/infer_onnx_openvino.cc index c2f270be9..e49d4d0ac 100644 --- a/examples/runtime/cpp/infer_onnx_openvino.cc +++ b/examples/runtime/cpp/infer_onnx_openvino.cc @@ -13,47 +13,57 @@ // limitations under the License. #include "fastdeploy/runtime.h" +#include namespace fd = fastdeploy; int main(int argc, char* argv[]) { - std::string model_file = "mobilenetv2.onnx"; + // Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.onnx + std::string model_file = "pplcnet.onnx"; - // setup option + // configure runtime + // How to configure by RuntimeOption, refer its api doc for more information + // https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html fd::RuntimeOption runtime_option; runtime_option.SetModelPath(model_file, "", fd::ModelFormat::ONNX); runtime_option.UseOpenVINOBackend(); - runtime_option.SetCpuThreadNum(12); - // init runtime - std::unique_ptr runtime = - std::unique_ptr(new fd::Runtime()); - if (!runtime->Init(runtime_option)) { - std::cerr << "--- Init FastDeploy Runitme Failed! " - << "\n--- Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "--- Init FastDeploy Runitme Done! " - << "\n--- Model: " << model_file << std::endl; - } - // init input tensor shape - fd::TensorInfo info = runtime->GetInputInfo(0); - info.shape = {1, 3, 224, 224}; - - std::vector input_tensors(1); - std::vector output_tensors(1); - - std::vector inputs_data; - inputs_data.resize(1 * 3 * 224 * 224); - for (size_t i = 0; i < inputs_data.size(); ++i) { - inputs_data[i] = std::rand() % 1000 / 1000.0f; - } - input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data()); - //get input name - input_tensors[0].name = info.name; + // Use CPU to inference + // If need to configure OpenVINO backend for more option, we can configure runtime_option.openvino_option + // refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1OpenVINOBackendOption.html + runtime_option.UseCpu(); + runtime_option.SetCpuThreadNum(12); - runtime->Infer(input_tensors, &output_tensors); + fd::Runtime runtime; + assert(runtime.Init(runtime_option)); - output_tensors[0].PrintInfo(); + // Get model's inputs information + // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html + std::vector inputs_info = runtime.GetInputInfos(); + + // Create dummy data fill with 0.5 + std::vector dummy_data(1 * 3 * 224 * 224, 0.5); + + // Create inputs/outputs tensors + std::vector inputs(inputs_info.size()); + std::vector outputs; + + // Initialize input tensors + // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html + inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data()); + inputs[0].name = inputs_info[0].name; + + // Inference + assert(runtime.Infer(inputs, &outputs)); + + // Print debug information of outputs + outputs[0].PrintInfo(); + + // Get data pointer and print it's elements + const float* data_ptr = reinterpret_cast(outputs[0].GetData()); + for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) { + std::cout << data_ptr[i] << " "; + } + std::cout << std::endl; return 0; -} \ No newline at end of file +} diff --git a/examples/runtime/cpp/infer_onnx_tensorrt.cc b/examples/runtime/cpp/infer_onnx_tensorrt.cc index 084c1dfae..3b74b03c8 100644 --- a/examples/runtime/cpp/infer_onnx_tensorrt.cc +++ b/examples/runtime/cpp/infer_onnx_tensorrt.cc @@ -13,48 +13,60 @@ // limitations under the License. #include "fastdeploy/runtime.h" +#include namespace fd = fastdeploy; int main(int argc, char* argv[]) { - std::string model_file = "mobilenetv2.onnx"; + // Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.onnx + std::string model_file = "pplcnet.onnx"; - // setup option + // configure runtime + // How to configure by RuntimeOption, refer its api doc for more information + // https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html fd::RuntimeOption runtime_option; runtime_option.SetModelPath(model_file, "", fd::ModelFormat::ONNX); - runtime_option.UseGpu(0); runtime_option.UseTrtBackend(); - runtime_option.SetTrtInputShape("inputs", {1, 3, 224, 224}); - // init runtime - std::unique_ptr runtime = - std::unique_ptr(new fd::Runtime()); - if (!runtime->Init(runtime_option)) { - std::cerr << "--- Init FastDeploy Runitme Failed! " - << "\n--- Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "--- Init FastDeploy Runitme Done! " - << "\n--- Model: " << model_file << std::endl; - } - // init input tensor shape - fd::TensorInfo info = runtime->GetInputInfo(0); - info.shape = {1, 3, 224, 224}; - - std::vector input_tensors(1); - std::vector output_tensors(1); - - std::vector inputs_data; - inputs_data.resize(1 * 3 * 224 * 224); - for (size_t i = 0; i < inputs_data.size(); ++i) { - inputs_data[i] = std::rand() % 1000 / 1000.0f; - } - input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data()); - //get input name - input_tensors[0].name = info.name; + // Use NVIDIA GPU to inference + // If need to configure TensorRT backend for more option, we can configure runtime_option.trt_option + // refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1TrtBackendOption.html + runtime_option.UseGpu(0); + // Use float16 inference to improve performance + runtime_option.trt_option.enable_fp16 = true; + // Cache trt engine to reduce time cost in model initialize + runtime_option.trt_option.serialize_file = "./model.trt"; - runtime->Infer(input_tensors, &output_tensors); + fd::Runtime runtime; + assert(runtime.Init(runtime_option)); - output_tensors[0].PrintInfo(); + // Get model's inputs information + // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html + std::vector inputs_info = runtime.GetInputInfos(); + + // Create dummy data fill with 0.5 + std::vector dummy_data(1 * 3 * 224 * 224, 0.5); + + // Create inputs/outputs tensors + std::vector inputs(inputs_info.size()); + std::vector outputs; + + // Initialize input tensors + // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html + inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data()); + inputs[0].name = inputs_info[0].name; + + // Inference + assert(runtime.Infer(inputs, &outputs)); + + // Print debug information of outputs + outputs[0].PrintInfo(); + + // Get data pointer and print it's elements + const float* data_ptr = reinterpret_cast(outputs[0].GetData()); + for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) { + std::cout << data_ptr[i] << " "; + } + std::cout << std::endl; return 0; -} \ No newline at end of file +} diff --git a/examples/runtime/cpp/infer_paddle_onnxruntime.cc b/examples/runtime/cpp/infer_paddle_onnxruntime.cc index 612966d73..10a32201d 100644 --- a/examples/runtime/cpp/infer_paddle_onnxruntime.cc +++ b/examples/runtime/cpp/infer_paddle_onnxruntime.cc @@ -13,53 +13,61 @@ // limitations under the License. #include "fastdeploy/runtime.h" +#include namespace fd = fastdeploy; int main(int argc, char* argv[]) { - std::string model_file = "mobilenetv2/inference.pdmodel"; - std::string params_file = "mobilenetv2/inference.pdiparams"; + // Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.tar.gz + std::string model_file = "pplcnet/inference.pdmodel"; + std::string params_file = "pplcnet/inference.pdiparams"; - // setup option + // configure runtime + // How to configure by RuntimeOption, refer its api doc for more information + // https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html fd::RuntimeOption runtime_option; - runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE); + runtime_option.SetModelPath(model_file, params_file); runtime_option.UseOrtBackend(); + + // Use CPU to inference + runtime_option.UseCpu(); runtime_option.SetCpuThreadNum(12); - // **** GPU **** - // To use GPU, use the following commented code + // Use Gpu to inference // runtime_option.UseGpu(0); + // If need to configure ONNX Runtime backend for more option, we can configure runtime_option.ort_option + // refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1OrtBackendOption.html - // init runtime - std::unique_ptr runtime = - std::unique_ptr(new fd::Runtime()); - if (!runtime->Init(runtime_option)) { - std::cerr << "--- Init FastDeploy Runitme Failed! " - << "\n--- Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "--- Init FastDeploy Runitme Done! " - << "\n--- Model: " << model_file << std::endl; + fd::Runtime runtime; + assert(runtime.Init(runtime_option)); + + // Get model's inputs information + // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html + std::vector inputs_info = runtime.GetInputInfos(); + + // Create dummy data fill with 0.5 + std::vector dummy_data(1 * 3 * 224 * 224, 0.5); + + // Create inputs/outputs tensors + std::vector inputs(inputs_info.size()); + std::vector outputs; + + // Initialize input tensors + // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html + inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data()); + inputs[0].name = inputs_info[0].name; + + // Inference + assert(runtime.Infer(inputs, &outputs)); + + // Print debug information of outputs + outputs[0].PrintInfo(); + + // Get data pointer and print it's elements + const float* data_ptr = reinterpret_cast(outputs[0].GetData()); + for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) { + std::cout << data_ptr[i] << " "; } - // init input tensor shape - fd::TensorInfo info = runtime->GetInputInfo(0); - info.shape = {1, 3, 224, 224}; - - std::vector input_tensors(1); - std::vector output_tensors(1); - - std::vector inputs_data; - inputs_data.resize(1 * 3 * 224 * 224); - for (size_t i = 0; i < inputs_data.size(); ++i) { - inputs_data[i] = std::rand() % 1000 / 1000.0f; - } - input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data()); - - //get input name - input_tensors[0].name = info.name; - - runtime->Infer(input_tensors, &output_tensors); - - output_tensors[0].PrintInfo(); + std::cout << std::endl; return 0; -} \ No newline at end of file +} diff --git a/examples/runtime/cpp/infer_paddle_openvino.cc b/examples/runtime/cpp/infer_paddle_openvino.cc index 3958cdcf0..b1b9b36ff 100644 --- a/examples/runtime/cpp/infer_paddle_openvino.cc +++ b/examples/runtime/cpp/infer_paddle_openvino.cc @@ -13,48 +13,58 @@ // limitations under the License. #include "fastdeploy/runtime.h" +#include namespace fd = fastdeploy; int main(int argc, char* argv[]) { - std::string model_file = "mobilenetv2/inference.pdmodel"; - std::string params_file = "mobilenetv2/inference.pdiparams"; + // Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.tar.gz + std::string model_file = "pplcnet/inference.pdmodel"; + std::string params_file = "pplcnet/inference.pdiparams"; - // setup option + // configure runtime + // How to configure by RuntimeOption, refer its api doc for more information + // https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html fd::RuntimeOption runtime_option; - runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE); + runtime_option.SetModelPath(model_file, params_file); runtime_option.UseOpenVINOBackend(); - runtime_option.SetCpuThreadNum(12); - // init runtime - std::unique_ptr runtime = - std::unique_ptr(new fd::Runtime()); - if (!runtime->Init(runtime_option)) { - std::cerr << "--- Init FastDeploy Runitme Failed! " - << "\n--- Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "--- Init FastDeploy Runitme Done! " - << "\n--- Model: " << model_file << std::endl; - } - // init input tensor shape - fd::TensorInfo info = runtime->GetInputInfo(0); - info.shape = {1, 3, 224, 224}; - - std::vector input_tensors(1); - std::vector output_tensors(1); - - std::vector inputs_data; - inputs_data.resize(1 * 3 * 224 * 224); - for (size_t i = 0; i < inputs_data.size(); ++i) { - inputs_data[i] = std::rand() % 1000 / 1000.0f; - } - input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data()); - //get input name - input_tensors[0].name = info.name; + // Use CPU to inference + // If need to configure OpenVINO backend for more option, we can configure runtime_option.openvino_option + // refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1OpenVINOBackendOption.html + runtime_option.UseCpu(); + runtime_option.SetCpuThreadNum(12); - runtime->Infer(input_tensors, &output_tensors); + fd::Runtime runtime; + assert(runtime.Init(runtime_option)); - output_tensors[0].PrintInfo(); + // Get model's inputs information + // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html + std::vector inputs_info = runtime.GetInputInfos(); + + // Create dummy data fill with 0.5 + std::vector dummy_data(1 * 3 * 224 * 224, 0.5); + + // Create inputs/outputs tensors + std::vector inputs(inputs_info.size()); + std::vector outputs; + + // Initialize input tensors + // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html + inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data()); + inputs[0].name = inputs_info[0].name; + + // Inference + assert(runtime.Infer(inputs, &outputs)); + + // Print debug information of outputs + outputs[0].PrintInfo(); + + // Get data pointer and print it's elements + const float* data_ptr = reinterpret_cast(outputs[0].GetData()); + for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) { + std::cout << data_ptr[i] << " "; + } + std::cout << std::endl; return 0; -} \ No newline at end of file +} diff --git a/examples/runtime/cpp/infer_paddle_paddle_inference.cc b/examples/runtime/cpp/infer_paddle_paddle_inference.cc index 0df04fafa..454534f93 100644 --- a/examples/runtime/cpp/infer_paddle_paddle_inference.cc +++ b/examples/runtime/cpp/infer_paddle_paddle_inference.cc @@ -13,53 +13,57 @@ // limitations under the License. #include "fastdeploy/runtime.h" +#include namespace fd = fastdeploy; int main(int argc, char* argv[]) { - std::string model_file = "mobilenetv2/inference.pdmodel"; - std::string params_file = "mobilenetv2/inference.pdiparams"; + // Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.tar.gz + std::string model_file = "pplcnet/inference.pdmodel"; + std::string params_file = "pplcnet/inference.pdiparams"; - // setup option + // configure runtime + // How to configure by RuntimeOption, refer its api doc for more information + // https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html fd::RuntimeOption runtime_option; - runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE); - // CPU + runtime_option.SetModelPath(model_file, params_file); runtime_option.UsePaddleInferBackend(); - runtime_option.SetCpuThreadNum(12); - // GPU - // runtime_option.UseGpu(0); - // IPU - // runtime_option.UseIpu(); - // init runtime - std::unique_ptr runtime = - std::unique_ptr(new fd::Runtime()); - if (!runtime->Init(runtime_option)) { - std::cerr << "--- Init FastDeploy Runitme Failed! " - << "\n--- Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "--- Init FastDeploy Runitme Done! " - << "\n--- Model: " << model_file << std::endl; + runtime_option.UseCpu(); + + // If need to configure Paddle Inference backend for more option, we can configure runtime_option.paddle_infer_option + // refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1PaddleBackendOption.html + runtime_option.paddle_infer_option.enable_mkldnn = true; + + fd::Runtime runtime; + assert(runtime.Init(runtime_option)); + + // Get model's inputs information + // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html + std::vector inputs_info = runtime.GetInputInfos(); + + // Create dummy data fill with 0.5 + std::vector dummy_data(1 * 3 * 224 * 224, 0.5); + + // Create inputs/outputs tensors + std::vector inputs(inputs_info.size()); + std::vector outputs; + + // Initialize input tensors + // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html + inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data()); + inputs[0].name = inputs_info[0].name; + + // Inference + assert(runtime.Infer(inputs, &outputs)); + + // Print debug information of outputs + outputs[0].PrintInfo(); + + // Get data pointer and print it's elements + const float* data_ptr = reinterpret_cast(outputs[0].GetData()); + for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) { + std::cout << data_ptr[i] << " "; } - // init input tensor shape - fd::TensorInfo info = runtime->GetInputInfo(0); - info.shape = {1, 3, 224, 224}; - - std::vector input_tensors(1); - std::vector output_tensors(1); - - std::vector inputs_data; - inputs_data.resize(1 * 3 * 224 * 224); - for (size_t i = 0; i < inputs_data.size(); ++i) { - inputs_data[i] = std::rand() % 1000 / 1000.0f; - } - input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data()); - - //get input name - input_tensors[0].name = info.name; - - runtime->Infer(input_tensors, &output_tensors); - - output_tensors[0].PrintInfo(); + std::cout << std::endl; return 0; -} \ No newline at end of file +} diff --git a/examples/runtime/cpp/infer_paddle_paddle_inference_trt.cc b/examples/runtime/cpp/infer_paddle_paddle_inference_trt.cc new file mode 100644 index 000000000..d2dc2019d --- /dev/null +++ b/examples/runtime/cpp/infer_paddle_paddle_inference_trt.cc @@ -0,0 +1,77 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/runtime.h" +#include + +namespace fd = fastdeploy; + +int main(int argc, char* argv[]) { + // Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.tar.gz + std::string model_file = "pplcnet/inference.pdmodel"; + std::string params_file = "pplcnet/inference.pdiparams"; + + // configure runtime + // How to configure by RuntimeOption, refer its api doc for more information + // https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html + fd::RuntimeOption runtime_option; + runtime_option.SetModelPath(model_file, params_file); + runtime_option.UsePaddleInferBackend(); + runtime_option.UseGpu(0); + + // Enable Paddle Inference + TensorRT + // If need to configure Paddle Inference backend for more option, we can configure runtime_option.paddle_infer_option + // refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1PaddleBackendOption.html + runtime_option.paddle_infer_option.enable_trt = true; + + // If need to configure TensorRT backend for more option, we can configure runtime_option.trt_option + // refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1TrtBackendOption.html + // Use float16 inference to improve performance + runtime_option.trt_option.enable_fp16 = true; + // Cache trt engine to reduce time cost in model initialize + runtime_option.trt_option.serialize_file = "./pplcnet_model.trt"; + + fd::Runtime runtime; + assert(runtime.Init(runtime_option)); + + // Get model's inputs information + // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html + std::vector inputs_info = runtime.GetInputInfos(); + + // Create dummy data fill with 0.5 + std::vector dummy_data(1 * 3 * 224 * 224, 0.5); + + // Create inputs/outputs tensors + std::vector inputs(inputs_info.size()); + std::vector outputs; + + // Initialize input tensors + // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html + inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data()); + inputs[0].name = inputs_info[0].name; + + // Inference + assert(runtime.Infer(inputs, &outputs)); + + // Print debug information of outputs + outputs[0].PrintInfo(); + + // Get data pointer and print it's elements + const float* data_ptr = reinterpret_cast(outputs[0].GetData()); + for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) { + std::cout << data_ptr[i] << " "; + } + std::cout << std::endl; + return 0; +} diff --git a/examples/runtime/cpp/infer_paddle_tensorrt.cc b/examples/runtime/cpp/infer_paddle_tensorrt.cc index 04fe311b2..5e133f428 100644 --- a/examples/runtime/cpp/infer_paddle_tensorrt.cc +++ b/examples/runtime/cpp/infer_paddle_tensorrt.cc @@ -13,49 +13,61 @@ // limitations under the License. #include "fastdeploy/runtime.h" +#include namespace fd = fastdeploy; int main(int argc, char* argv[]) { - std::string model_file = "mobilenetv2/inference.pdmodel"; - std::string params_file = "mobilenetv2/inference.pdiparams"; + // Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.tar.gz + std::string model_file = "pplcnet/inference.pdmodel"; + std::string params_file = "pplcnet/inference.pdiparams"; - // setup option + // configure runtime + // How to configure by RuntimeOption, refer its api doc for more information + // https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html fd::RuntimeOption runtime_option; - runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE); - runtime_option.UseGpu(0); + runtime_option.SetModelPath(model_file, params_file); runtime_option.UseTrtBackend(); - runtime_option.EnablePaddleToTrt(); - // init runtime - std::unique_ptr runtime = - std::unique_ptr(new fd::Runtime()); - if (!runtime->Init(runtime_option)) { - std::cerr << "--- Init FastDeploy Runitme Failed! " - << "\n--- Model: " << model_file << std::endl; - return -1; - } else { - std::cout << "--- Init FastDeploy Runitme Done! " - << "\n--- Model: " << model_file << std::endl; - } - // init input tensor shape - fd::TensorInfo info = runtime->GetInputInfo(0); - info.shape = {1, 3, 224, 224}; - - std::vector input_tensors(1); - std::vector output_tensors(1); - - std::vector inputs_data; - inputs_data.resize(1 * 3 * 224 * 224); - for (size_t i = 0; i < inputs_data.size(); ++i) { - inputs_data[i] = std::rand() % 1000 / 1000.0f; - } - input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data()); - //get input name - input_tensors[0].name = info.name; + // Use NVIDIA GPU to inference + // If need to configure TensorRT backend for more option, we can configure runtime_option.trt_option + // refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1TrtBackendOption.html + runtime_option.UseGpu(0); + // Use float16 inference to improve performance + runtime_option.trt_option.enable_fp16 = true; + // Cache trt engine to reduce time cost in model initialize + runtime_option.trt_option.serialize_file = "./pplcnet_model.trt"; - runtime->Infer(input_tensors, &output_tensors); + fd::Runtime runtime; + assert(runtime.Init(runtime_option)); - output_tensors[0].PrintInfo(); + // Get model's inputs information + // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html + std::vector inputs_info = runtime.GetInputInfos(); + + // Create dummy data fill with 0.5 + std::vector dummy_data(1 * 3 * 224 * 224, 0.5); + + // Create inputs/outputs tensors + std::vector inputs(inputs_info.size()); + std::vector outputs; + + // Initialize input tensors + // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html + inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data()); + inputs[0].name = inputs_info[0].name; + + // Inference + assert(runtime.Infer(inputs, &outputs)); + + // Print debug information of outputs + outputs[0].PrintInfo(); + + // Get data pointer and print it's elements + const float* data_ptr = reinterpret_cast(outputs[0].GetData()); + for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) { + std::cout << data_ptr[i] << " "; + } + std::cout << std::endl; return 0; -} \ No newline at end of file +} diff --git a/fastdeploy/runtime/backends/backend.h b/fastdeploy/runtime/backends/backend.h index 7566806e5..8b940cc2d 100644 --- a/fastdeploy/runtime/backends/backend.h +++ b/fastdeploy/runtime/backends/backend.h @@ -85,10 +85,10 @@ class BaseBackend { bool copy_to_fd = true) = 0; // Optional: For those backends which can share memory // while creating multiple inference engines with same model file - virtual std::unique_ptr Clone(RuntimeOption &runtime_option, - void *stream = nullptr, + virtual std::unique_ptr Clone(RuntimeOption& runtime_option, + void* stream = nullptr, int device_id = -1) { - FDERROR << "Clone no support" << std::endl; + FDERROR << "Clone no support " << runtime_option.backend << " " << stream << " " << device_id << std::endl; return nullptr; } diff --git a/fastdeploy/runtime/backends/lite/option.h b/fastdeploy/runtime/backends/lite/option.h index 70781d80f..dd76bf7df 100755 --- a/fastdeploy/runtime/backends/lite/option.h +++ b/fastdeploy/runtime/backends/lite/option.h @@ -53,32 +53,46 @@ enum LitePowerMode { struct LiteBackendOption { /// Paddle Lite power mode for mobile device. int power_mode = 3; - /// Number of threads while use CPU + // Number of threads while use CPU int cpu_threads = 1; /// Enable use half precision bool enable_fp16 = false; - /// Inference device, Paddle Lite support CPU/KUNLUNXIN/TIMVX/ASCEND + // Inference device, Paddle Lite support CPU/KUNLUNXIN/TIMVX/ASCEND Device device = Device::CPU; - /// Index of inference device + // Index of inference device int device_id = 0; + /// kunlunxin_l3_workspace_size int kunlunxin_l3_workspace_size = 0xfffc00; + /// kunlunxin_locked bool kunlunxin_locked = false; + /// kunlunxin_autotune bool kunlunxin_autotune = true; + /// kunlunxin_autotune_file std::string kunlunxin_autotune_file = ""; + /// kunlunxin_precision std::string kunlunxin_precision = "int16"; + /// kunlunxin_adaptive_seqlen bool kunlunxin_adaptive_seqlen = false; + /// kunlunxin_enable_multi_stream bool kunlunxin_enable_multi_stream = false; /// Optimized model dir for CxxConfig std::string optimized_model_dir = ""; + /// nnadapter_subgraph_partition_config_path std::string nnadapter_subgraph_partition_config_path = ""; + /// nnadapter_subgraph_partition_config_buffer std::string nnadapter_subgraph_partition_config_buffer = ""; + /// nnadapter_context_properties std::string nnadapter_context_properties = ""; + /// nnadapter_model_cache_dir std::string nnadapter_model_cache_dir = ""; + /// nnadapter_mixed_precision_quantization_config_path std::string nnadapter_mixed_precision_quantization_config_path = ""; + /// nnadapter_dynamic_shape_info std::map>> nnadapter_dynamic_shape_info = {{"", {{0}}}}; + /// nnadapter_device_names std::vector nnadapter_device_names = {}; }; } // namespace fastdeploy diff --git a/fastdeploy/runtime/backends/ort/option.h b/fastdeploy/runtime/backends/ort/option.h index 9487e5da9..1509fe0bb 100644 --- a/fastdeploy/runtime/backends/ort/option.h +++ b/fastdeploy/runtime/backends/ort/option.h @@ -25,27 +25,18 @@ namespace fastdeploy { /*! @brief Option object to configure ONNX Runtime backend */ struct OrtBackendOption { - /* - * @brief Level of graph optimization, -1: mean default(Enable all the optimization strategy)/0: disable all the optimization strategy/1: enable basic strategy/2:enable extend strategy/99: enable all - */ + /// Level of graph optimization, -1: mean default(Enable all the optimization strategy)/0: disable all the optimization strategy/1: enable basic strategy/2:enable extend strategy/99: enable all int graph_optimization_level = -1; - /* - * @brief Number of threads to execute the operator, -1: default - */ + /// Number of threads to execute the operator, -1: default int intra_op_num_threads = -1; - /* - * @brief Number of threads to execute the graph, -1: default. This parameter only will bring effects while the `OrtBackendOption::execution_mode` set to 1. - */ + /// Number of threads to execute the graph, -1: default. This parameter only will bring effects while the `OrtBackendOption::execution_mode` set to 1. int inter_op_num_threads = -1; - /* - * @brief Execution mode for the graph, -1: default(Sequential mode)/0: Sequential mode, execute the operators in graph one by one. /1: Parallel mode, execute the operators in graph parallelly. - */ + /// Execution mode for the graph, -1: default(Sequential mode)/0: Sequential mode, execute the operators in graph one by one. /1: Parallel mode, execute the operators in graph parallelly. int execution_mode = -1; - /// Inference device, OrtBackend supports CPU/GPU + // Inference device, OrtBackend supports CPU/GPU Device device = Device::CPU; - /// Inference device id + // Inference device id int device_id = 0; - void* external_stream_ = nullptr; }; } // namespace fastdeploy diff --git a/fastdeploy/runtime/backends/paddle/option.h b/fastdeploy/runtime/backends/paddle/option.h index 6802b8da4..b05cc6f6f 100755 --- a/fastdeploy/runtime/backends/paddle/option.h +++ b/fastdeploy/runtime/backends/paddle/option.h @@ -54,6 +54,8 @@ struct PaddleBackendOption { bool enable_mkldnn = true; /// Use Paddle Inference + TensorRT to inference model on GPU bool enable_trt = false; + /// Whether enable memory optimize, default true + bool enable_memory_optimize = true; /* * @brief IPU option, this will configure the IPU hardware, if inference model in IPU diff --git a/fastdeploy/runtime/backends/paddle/option_pybind.cc b/fastdeploy/runtime/backends/paddle/option_pybind.cc index 60b66e672..46e6a94b7 100644 --- a/fastdeploy/runtime/backends/paddle/option_pybind.cc +++ b/fastdeploy/runtime/backends/paddle/option_pybind.cc @@ -41,6 +41,7 @@ void BindPaddleOption(pybind11::module& m) { .def_readwrite("enable_log_info", &PaddleBackendOption::enable_log_info) .def_readwrite("enable_mkldnn", &PaddleBackendOption::enable_mkldnn) .def_readwrite("enable_trt", &PaddleBackendOption::enable_trt) + .def_readwrite("enable_memory_optimize", &PaddleBackendOption::enable_memory_optimize) .def_readwrite("ipu_option", &PaddleBackendOption::ipu_option) .def_readwrite("collect_trt_shape", &PaddleBackendOption::collect_trt_shape) diff --git a/fastdeploy/runtime/backends/paddle/paddle_backend.cc b/fastdeploy/runtime/backends/paddle/paddle_backend.cc index bfe122f97..e817f272b 100644 --- a/fastdeploy/runtime/backends/paddle/paddle_backend.cc +++ b/fastdeploy/runtime/backends/paddle/paddle_backend.cc @@ -147,7 +147,9 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_buffer, } config_.SetModelBuffer(model_buffer.c_str(), model_buffer.size(), params_buffer.c_str(), params_buffer.size()); - config_.EnableMemoryOptim(); + if (option.enable_memory_optimize) { + config_.EnableMemoryOptim(); + } BuildOption(option); // The input/output information get from predictor is not right, use diff --git a/fastdeploy/runtime/backends/tensorrt/option.h b/fastdeploy/runtime/backends/tensorrt/option.h index ff28e3e3b..2825076b1 100755 --- a/fastdeploy/runtime/backends/tensorrt/option.h +++ b/fastdeploy/runtime/backends/tensorrt/option.h @@ -33,9 +33,8 @@ struct TrtBackendOption { /// Enable log while converting onnx model to tensorrt bool enable_log_info = false; - /* - * @brief Enable half precison inference, on some device not support half precision, it will fallback to float32 mode - */ + + /// Enable half precison inference, on some device not support half precision, it will fallback to float32 mode bool enable_fp16 = false; /** \brief Set shape range of input tensor for the model that contain dynamic input shape while using TensorRT backend @@ -64,9 +63,7 @@ struct TrtBackendOption { max_shape[tensor_name].assign(max.begin(), max.end()); } } - /** - * @brief Set cache file path while use TensorRT backend. Loadding a Paddle/ONNX model and initialize TensorRT will take a long time, by this interface it will save the tensorrt engine to `cache_file_path`, and load it directly while execute the code again - */ + /// Set cache file path while use TensorRT backend. Loadding a Paddle/ONNX model and initialize TensorRT will take a long time, by this interface it will save the tensorrt engine to `cache_file_path`, and load it directly while execute the code again std::string serialize_file = ""; // The below parameters may be removed in next version, please do not diff --git a/scripts/build_bcloud_lib.py b/scripts/build_bcloud_lib.py index aec0d21f5..7e11357e1 100644 --- a/scripts/build_bcloud_lib.py +++ b/scripts/build_bcloud_lib.py @@ -22,4 +22,4 @@ for root, dirs, files in os.walk(third_libs): for f in files: if f.strip().count(".so") > 0 or f.strip() == "plugins.xml": full_path = os.path.join(root, f) - shutil.copy(full_path, os.path.join(bc_dirname, "lib")) + shutil.copy(full_path, os.path.join(bc_dirname, "lib"), follow_symlinks=False)