From 6be2c0367b91c6228c9ca5dec77c3c9138fb032e Mon Sep 17 00:00:00 2001
From: Jason <jiangjiajun@baidu.com>
Date: Wed, 8 Mar 2023 16:56:04 +0800
Subject: [PATCH] [Example] Update runtime examples (#1542)

* Add notes for tensors

* Optimize some apis

* move some warnings
---
 examples/runtime/cpp/CMakeLists.txt           |  2 +-
 .../runtime/cpp/infer_onnx_onnxruntime.cc     | 78 +++++++++--------
 examples/runtime/cpp/infer_onnx_openvino.cc   | 74 +++++++++-------
 examples/runtime/cpp/infer_onnx_tensorrt.cc   | 78 ++++++++++-------
 .../runtime/cpp/infer_paddle_onnxruntime.cc   | 82 ++++++++++--------
 examples/runtime/cpp/infer_paddle_openvino.cc | 78 +++++++++--------
 .../cpp/infer_paddle_paddle_inference.cc      | 86 ++++++++++---------
 .../cpp/infer_paddle_paddle_inference_trt.cc  | 77 +++++++++++++++++
 examples/runtime/cpp/infer_paddle_tensorrt.cc | 82 ++++++++++--------
 fastdeploy/runtime/backends/backend.h         |  6 +-
 fastdeploy/runtime/backends/lite/option.h     | 20 ++++-
 fastdeploy/runtime/backends/ort/option.h      | 21 ++---
 fastdeploy/runtime/backends/paddle/option.h   |  2 +
 .../runtime/backends/paddle/option_pybind.cc  |  1 +
 .../runtime/backends/paddle/paddle_backend.cc |  4 +-
 fastdeploy/runtime/backends/tensorrt/option.h |  9 +-
 scripts/build_bcloud_lib.py                   |  2 +-
 17 files changed, 425 insertions(+), 277 deletions(-)
 create mode 100644 examples/runtime/cpp/infer_paddle_paddle_inference_trt.cc
diff --git a/examples/runtime/cpp/CMakeLists.txt b/examples/runtime/cpp/CMakeLists.txt
index 09ea45c3b..b90580bba 100644
--- a/examples/runtime/cpp/CMakeLists.txt
+++ b/examples/runtime/cpp/CMakeLists.txt
@@ -9,6 +9,6 @@ include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
 # 添加FastDeploy依赖头文件
 include_directories(${FASTDEPLOY_INCS})
 
-add_executable(runtime_demo ${PROJECT_SOURCE_DIR}/infer_onnx_openvino.cc)
+add_executable(runtime_demo ${PROJECT_SOURCE_DIR}/infer_paddle_paddle_inference.cc)
 # 添加FastDeploy库依赖
 target_link_libraries(runtime_demo ${FASTDEPLOY_LIBS})
diff --git a/examples/runtime/cpp/infer_onnx_onnxruntime.cc b/examples/runtime/cpp/infer_onnx_onnxruntime.cc
index 4c27c1f65..48d7957f3 100644
--- a/examples/runtime/cpp/infer_onnx_onnxruntime.cc
+++ b/examples/runtime/cpp/infer_onnx_onnxruntime.cc
@@ -13,52 +13,60 @@
 // limitations under the License.
 
 #include "fastdeploy/runtime.h"
+#include <cassert>
 
 namespace fd = fastdeploy;
 
 int main(int argc, char* argv[]) {
-  std::string model_file = "mobilenetv2.onnx";
+  // Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.onnx
+  std::string model_file = "pplcnet.onnx";
 
-  // setup option
+  // configure runtime
+  // How to configure by RuntimeOption, refer its api doc for more information
+  // https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
   fd::RuntimeOption runtime_option;
   runtime_option.SetModelPath(model_file, "", fd::ModelFormat::ONNX);
   runtime_option.UseOrtBackend();
+  
+  // Use CPU to inference
+  runtime_option.UseCpu();
   runtime_option.SetCpuThreadNum(12);
 
-  // **** GPU ****
-  // To use GPU, use the following commented code
+  // Use Gpu to inference
   // runtime_option.UseGpu(0);
+  // If need to configure ONNX Runtime backend for more option, we can configure runtime_option.ort_option
+  // refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1OrtBackendOption.html
 
-  // init runtime
-  std::unique_ptr<fd::Runtime> runtime =
-      std::unique_ptr<fd::Runtime>(new fd::Runtime());
-  if (!runtime->Init(runtime_option)) {
-    std::cerr << "--- Init FastDeploy Runitme Failed! "
-              << "\n--- Model:  " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "--- Init FastDeploy Runitme Done! "
-              << "\n--- Model:  " << model_file << std::endl;
+  fd::Runtime runtime;
+  assert(runtime.Init(runtime_option));
+
+  // Get model's inputs information
+  // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
+  std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
+
+  // Create dummy data fill with 0.5
+  std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
+
+  // Create inputs/outputs tensors
+  std::vector<fd::FDTensor> inputs(inputs_info.size());
+  std::vector<fd::FDTensor> outputs;
+
+  // Initialize input tensors
+  // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
+  inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
+  inputs[0].name = inputs_info[0].name;
+
+  // Inference
+  assert(runtime.Infer(inputs, &outputs));
+ 
+  // Print debug information of outputs 
+  outputs[0].PrintInfo();
+
+  // Get data pointer and print it's elements
+  const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
+  for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
+    std::cout << data_ptr[i] << " ";
   }
-  // init input tensor shape
-  fd::TensorInfo info = runtime->GetInputInfo(0);
-  info.shape = {1, 3, 224, 224};
-
-  std::vector<fd::FDTensor> input_tensors(1);
-  std::vector<fd::FDTensor> output_tensors(1);
-
-  std::vector<float> inputs_data;
-  inputs_data.resize(1 * 3 * 224 * 224);
-  for (size_t i = 0; i < inputs_data.size(); ++i) {
-    inputs_data[i] = std::rand() % 1000 / 1000.0f;
-  }
-  input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
-  
-  //get input name
-  input_tensors[0].name = info.name;
-
-  runtime->Infer(input_tensors, &output_tensors);
-
-  output_tensors[0].PrintInfo();
+  std::cout << std::endl;
   return 0;
-}
\ No newline at end of file
+}
diff --git a/examples/runtime/cpp/infer_onnx_openvino.cc b/examples/runtime/cpp/infer_onnx_openvino.cc
index c2f270be9..e49d4d0ac 100644
--- a/examples/runtime/cpp/infer_onnx_openvino.cc
+++ b/examples/runtime/cpp/infer_onnx_openvino.cc
@@ -13,47 +13,57 @@
 // limitations under the License.
 
 #include "fastdeploy/runtime.h"
+#include <cassert>
 
 namespace fd = fastdeploy;
 
 int main(int argc, char* argv[]) {
-  std::string model_file = "mobilenetv2.onnx";
+  // Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.onnx
+  std::string model_file = "pplcnet.onnx";
 
-  // setup option
+  // configure runtime
+  // How to configure by RuntimeOption, refer its api doc for more information
+  // https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
   fd::RuntimeOption runtime_option;
   runtime_option.SetModelPath(model_file, "", fd::ModelFormat::ONNX);
   runtime_option.UseOpenVINOBackend();
-  runtime_option.SetCpuThreadNum(12);
-  // init runtime
-  std::unique_ptr<fd::Runtime> runtime =
-      std::unique_ptr<fd::Runtime>(new fd::Runtime());
-  if (!runtime->Init(runtime_option)) {
-    std::cerr << "--- Init FastDeploy Runitme Failed! "
-              << "\n--- Model:  " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "--- Init FastDeploy Runitme Done! "
-              << "\n--- Model:  " << model_file << std::endl;
-  }
-  // init input tensor shape
-  fd::TensorInfo info = runtime->GetInputInfo(0);
-  info.shape = {1, 3, 224, 224};
-
-  std::vector<fd::FDTensor> input_tensors(1);
-  std::vector<fd::FDTensor> output_tensors(1);
-
-  std::vector<float> inputs_data;
-  inputs_data.resize(1 * 3 * 224 * 224);
-  for (size_t i = 0; i < inputs_data.size(); ++i) {
-    inputs_data[i] = std::rand() % 1000 / 1000.0f;
-  }
-  input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
   
-  //get input name
-  input_tensors[0].name = info.name;
+  // Use CPU to inference
+  // If need to configure OpenVINO backend for more option, we can configure runtime_option.openvino_option
+  // refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1OpenVINOBackendOption.html
+  runtime_option.UseCpu();
+  runtime_option.SetCpuThreadNum(12);
 
-  runtime->Infer(input_tensors, &output_tensors);
+  fd::Runtime runtime;
+  assert(runtime.Init(runtime_option));
 
-  output_tensors[0].PrintInfo();
+  // Get model's inputs information
+  // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
+  std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
+
+  // Create dummy data fill with 0.5
+  std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
+
+  // Create inputs/outputs tensors
+  std::vector<fd::FDTensor> inputs(inputs_info.size());
+  std::vector<fd::FDTensor> outputs;
+
+  // Initialize input tensors
+  // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
+  inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
+  inputs[0].name = inputs_info[0].name;
+
+  // Inference
+  assert(runtime.Infer(inputs, &outputs));
+ 
+  // Print debug information of outputs 
+  outputs[0].PrintInfo();
+
+  // Get data pointer and print it's elements
+  const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
+  for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
+    std::cout << data_ptr[i] << " ";
+  }
+  std::cout << std::endl;
   return 0;
-}
\ No newline at end of file
+}
diff --git a/examples/runtime/cpp/infer_onnx_tensorrt.cc b/examples/runtime/cpp/infer_onnx_tensorrt.cc
index 084c1dfae..3b74b03c8 100644
--- a/examples/runtime/cpp/infer_onnx_tensorrt.cc
+++ b/examples/runtime/cpp/infer_onnx_tensorrt.cc
@@ -13,48 +13,60 @@
 // limitations under the License.
 
 #include "fastdeploy/runtime.h"
+#include <cassert>
 
 namespace fd = fastdeploy;
 
 int main(int argc, char* argv[]) {
-  std::string model_file = "mobilenetv2.onnx";
+  // Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.onnx
+  std::string model_file = "pplcnet.onnx";
 
-  // setup option
+  // configure runtime
+  // How to configure by RuntimeOption, refer its api doc for more information
+  // https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
   fd::RuntimeOption runtime_option;
   runtime_option.SetModelPath(model_file, "", fd::ModelFormat::ONNX);
-  runtime_option.UseGpu(0);
   runtime_option.UseTrtBackend();
-  runtime_option.SetTrtInputShape("inputs", {1, 3, 224, 224});
-  // init runtime
-  std::unique_ptr<fd::Runtime> runtime =
-      std::unique_ptr<fd::Runtime>(new fd::Runtime());
-  if (!runtime->Init(runtime_option)) {
-    std::cerr << "--- Init FastDeploy Runitme Failed! "
-              << "\n--- Model:  " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "--- Init FastDeploy Runitme Done! "
-              << "\n--- Model:  " << model_file << std::endl;
-  }
-  // init input tensor shape
-  fd::TensorInfo info = runtime->GetInputInfo(0);
-  info.shape = {1, 3, 224, 224};
-
-  std::vector<fd::FDTensor> input_tensors(1);
-  std::vector<fd::FDTensor> output_tensors(1);
-
-  std::vector<float> inputs_data;
-  inputs_data.resize(1 * 3 * 224 * 224);
-  for (size_t i = 0; i < inputs_data.size(); ++i) {
-    inputs_data[i] = std::rand() % 1000 / 1000.0f;
-  }
-  input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
   
-  //get input name
-  input_tensors[0].name = info.name;
+  // Use NVIDIA GPU to inference
+  // If need to configure TensorRT backend for more option, we can configure runtime_option.trt_option
+  // refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1TrtBackendOption.html
+  runtime_option.UseGpu(0);
+  // Use float16 inference to improve performance
+  runtime_option.trt_option.enable_fp16 = true;
+  // Cache trt engine to reduce time cost in model initialize
+  runtime_option.trt_option.serialize_file = "./model.trt";
 
-  runtime->Infer(input_tensors, &output_tensors);
+  fd::Runtime runtime;
+  assert(runtime.Init(runtime_option));
 
-  output_tensors[0].PrintInfo();
+  // Get model's inputs information
+  // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
+  std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
+
+  // Create dummy data fill with 0.5
+  std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
+
+  // Create inputs/outputs tensors
+  std::vector<fd::FDTensor> inputs(inputs_info.size());
+  std::vector<fd::FDTensor> outputs;
+
+  // Initialize input tensors
+  // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
+  inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
+  inputs[0].name = inputs_info[0].name;
+
+  // Inference
+  assert(runtime.Infer(inputs, &outputs));
+ 
+  // Print debug information of outputs 
+  outputs[0].PrintInfo();
+
+  // Get data pointer and print it's elements
+  const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
+  for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
+    std::cout << data_ptr[i] << " ";
+  }
+  std::cout << std::endl;
   return 0;
-}
\ No newline at end of file
+}
diff --git a/examples/runtime/cpp/infer_paddle_onnxruntime.cc b/examples/runtime/cpp/infer_paddle_onnxruntime.cc
index 612966d73..10a32201d 100644
--- a/examples/runtime/cpp/infer_paddle_onnxruntime.cc
+++ b/examples/runtime/cpp/infer_paddle_onnxruntime.cc
@@ -13,53 +13,61 @@
 // limitations under the License.
 
 #include "fastdeploy/runtime.h"
+#include <cassert>
 
 namespace fd = fastdeploy;
 
 int main(int argc, char* argv[]) {
-  std::string model_file = "mobilenetv2/inference.pdmodel";
-  std::string params_file = "mobilenetv2/inference.pdiparams";
+  // Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.tar.gz
+  std::string model_file = "pplcnet/inference.pdmodel";
+  std::string params_file = "pplcnet/inference.pdiparams";
 
-  // setup option
+  // configure runtime
+  // How to configure by RuntimeOption, refer its api doc for more information
+  // https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
   fd::RuntimeOption runtime_option;
-  runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
+  runtime_option.SetModelPath(model_file, params_file);
   runtime_option.UseOrtBackend();
+  
+  // Use CPU to inference
+  runtime_option.UseCpu();
   runtime_option.SetCpuThreadNum(12);
 
-  // **** GPU ****
-  // To use GPU, use the following commented code
+  // Use Gpu to inference
   // runtime_option.UseGpu(0);
+  // If need to configure ONNX Runtime backend for more option, we can configure runtime_option.ort_option
+  // refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1OrtBackendOption.html
 
-  // init runtime
-  std::unique_ptr<fd::Runtime> runtime =
-      std::unique_ptr<fd::Runtime>(new fd::Runtime());
-  if (!runtime->Init(runtime_option)) {
-    std::cerr << "--- Init FastDeploy Runitme Failed! "
-              << "\n--- Model:  " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "--- Init FastDeploy Runitme Done! "
-              << "\n--- Model:  " << model_file << std::endl;
+  fd::Runtime runtime;
+  assert(runtime.Init(runtime_option));
+
+  // Get model's inputs information
+  // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
+  std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
+
+  // Create dummy data fill with 0.5
+  std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
+
+  // Create inputs/outputs tensors
+  std::vector<fd::FDTensor> inputs(inputs_info.size());
+  std::vector<fd::FDTensor> outputs;
+
+  // Initialize input tensors
+  // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
+  inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
+  inputs[0].name = inputs_info[0].name;
+
+  // Inference
+  assert(runtime.Infer(inputs, &outputs));
+ 
+  // Print debug information of outputs 
+  outputs[0].PrintInfo();
+
+  // Get data pointer and print it's elements
+  const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
+  for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
+    std::cout << data_ptr[i] << " ";
   }
-  // init input tensor shape
-  fd::TensorInfo info = runtime->GetInputInfo(0);
-  info.shape = {1, 3, 224, 224};
-
-  std::vector<fd::FDTensor> input_tensors(1);
-  std::vector<fd::FDTensor> output_tensors(1);
-
-  std::vector<float> inputs_data;
-  inputs_data.resize(1 * 3 * 224 * 224);
-  for (size_t i = 0; i < inputs_data.size(); ++i) {
-    inputs_data[i] = std::rand() % 1000 / 1000.0f;
-  }
-  input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
-  
-  //get input name
-  input_tensors[0].name = info.name;
-
-  runtime->Infer(input_tensors, &output_tensors);
-
-  output_tensors[0].PrintInfo();
+  std::cout << std::endl;
   return 0;
-}
\ No newline at end of file
+}
diff --git a/examples/runtime/cpp/infer_paddle_openvino.cc b/examples/runtime/cpp/infer_paddle_openvino.cc
index 3958cdcf0..b1b9b36ff 100644
--- a/examples/runtime/cpp/infer_paddle_openvino.cc
+++ b/examples/runtime/cpp/infer_paddle_openvino.cc
@@ -13,48 +13,58 @@
 // limitations under the License.
 
 #include "fastdeploy/runtime.h"
+#include <cassert>
 
 namespace fd = fastdeploy;
 
 int main(int argc, char* argv[]) {
-  std::string model_file = "mobilenetv2/inference.pdmodel";
-  std::string params_file = "mobilenetv2/inference.pdiparams";
+  // Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.tar.gz
+  std::string model_file = "pplcnet/inference.pdmodel";
+  std::string params_file = "pplcnet/inference.pdiparams";
 
-  // setup option
+  // configure runtime
+  // How to configure by RuntimeOption, refer its api doc for more information
+  // https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
   fd::RuntimeOption runtime_option;
-  runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
+  runtime_option.SetModelPath(model_file, params_file);
   runtime_option.UseOpenVINOBackend();
-  runtime_option.SetCpuThreadNum(12);
-  // init runtime
-  std::unique_ptr<fd::Runtime> runtime =
-      std::unique_ptr<fd::Runtime>(new fd::Runtime());
-  if (!runtime->Init(runtime_option)) {
-    std::cerr << "--- Init FastDeploy Runitme Failed! "
-              << "\n--- Model:  " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "--- Init FastDeploy Runitme Done! "
-              << "\n--- Model:  " << model_file << std::endl;
-  }
-  // init input tensor shape
-  fd::TensorInfo info = runtime->GetInputInfo(0);
-  info.shape = {1, 3, 224, 224};
-
-  std::vector<fd::FDTensor> input_tensors(1);
-  std::vector<fd::FDTensor> output_tensors(1);
-
-  std::vector<float> inputs_data;
-  inputs_data.resize(1 * 3 * 224 * 224);
-  for (size_t i = 0; i < inputs_data.size(); ++i) {
-    inputs_data[i] = std::rand() % 1000 / 1000.0f;
-  }
-  input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
   
-  //get input name
-  input_tensors[0].name = info.name;
+  // Use CPU to inference
+  // If need to configure OpenVINO backend for more option, we can configure runtime_option.openvino_option
+  // refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1OpenVINOBackendOption.html
+  runtime_option.UseCpu();
+  runtime_option.SetCpuThreadNum(12);
 
-  runtime->Infer(input_tensors, &output_tensors);
+  fd::Runtime runtime;
+  assert(runtime.Init(runtime_option));
 
-  output_tensors[0].PrintInfo();
+  // Get model's inputs information
+  // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
+  std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
+
+  // Create dummy data fill with 0.5
+  std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
+
+  // Create inputs/outputs tensors
+  std::vector<fd::FDTensor> inputs(inputs_info.size());
+  std::vector<fd::FDTensor> outputs;
+
+  // Initialize input tensors
+  // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
+  inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
+  inputs[0].name = inputs_info[0].name;
+
+  // Inference
+  assert(runtime.Infer(inputs, &outputs));
+ 
+  // Print debug information of outputs 
+  outputs[0].PrintInfo();
+
+  // Get data pointer and print it's elements
+  const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
+  for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
+    std::cout << data_ptr[i] << " ";
+  }
+  std::cout << std::endl;
   return 0;
-}
\ No newline at end of file
+}
diff --git a/examples/runtime/cpp/infer_paddle_paddle_inference.cc b/examples/runtime/cpp/infer_paddle_paddle_inference.cc
index 0df04fafa..454534f93 100644
--- a/examples/runtime/cpp/infer_paddle_paddle_inference.cc
+++ b/examples/runtime/cpp/infer_paddle_paddle_inference.cc
@@ -13,53 +13,57 @@
 // limitations under the License.
 
 #include "fastdeploy/runtime.h"
+#include <cassert>
 
 namespace fd = fastdeploy;
 
 int main(int argc, char* argv[]) {
-  std::string model_file = "mobilenetv2/inference.pdmodel";
-  std::string params_file = "mobilenetv2/inference.pdiparams";
+  // Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.tar.gz
+  std::string model_file = "pplcnet/inference.pdmodel";
+  std::string params_file = "pplcnet/inference.pdiparams";
 
-  // setup option
+  // configure runtime
+  // How to configure by RuntimeOption, refer its api doc for more information
+  // https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
   fd::RuntimeOption runtime_option;
-  runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
-  // CPU
+  runtime_option.SetModelPath(model_file, params_file);
   runtime_option.UsePaddleInferBackend();
-  runtime_option.SetCpuThreadNum(12);
-  // GPU
-  // runtime_option.UseGpu(0);
-  // IPU
-  // runtime_option.UseIpu();
-  // init runtime
-  std::unique_ptr<fd::Runtime> runtime =
-      std::unique_ptr<fd::Runtime>(new fd::Runtime());
-  if (!runtime->Init(runtime_option)) {
-    std::cerr << "--- Init FastDeploy Runitme Failed! "
-              << "\n--- Model:  " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "--- Init FastDeploy Runitme Done! "
-              << "\n--- Model:  " << model_file << std::endl;
+  runtime_option.UseCpu();
+ 
+  // If need to configure Paddle Inference backend for more option, we can configure runtime_option.paddle_infer_option
+  // refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1PaddleBackendOption.html
+  runtime_option.paddle_infer_option.enable_mkldnn = true;
+
+  fd::Runtime runtime;
+  assert(runtime.Init(runtime_option));
+
+  // Get model's inputs information
+  // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
+  std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
+
+  // Create dummy data fill with 0.5
+  std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
+
+  // Create inputs/outputs tensors
+  std::vector<fd::FDTensor> inputs(inputs_info.size());
+  std::vector<fd::FDTensor> outputs;
+
+  // Initialize input tensors
+  // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
+  inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
+  inputs[0].name = inputs_info[0].name;
+
+  // Inference
+  assert(runtime.Infer(inputs, &outputs));
+ 
+  // Print debug information of outputs 
+  outputs[0].PrintInfo();
+
+  // Get data pointer and print it's elements
+  const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
+  for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
+    std::cout << data_ptr[i] << " ";
   }
-  // init input tensor shape
-  fd::TensorInfo info = runtime->GetInputInfo(0);
-  info.shape = {1, 3, 224, 224};
-
-  std::vector<fd::FDTensor> input_tensors(1);
-  std::vector<fd::FDTensor> output_tensors(1);
-
-  std::vector<float> inputs_data;
-  inputs_data.resize(1 * 3 * 224 * 224);
-  for (size_t i = 0; i < inputs_data.size(); ++i) {
-    inputs_data[i] = std::rand() % 1000 / 1000.0f;
-  }
-  input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
-  
-  //get input name
-  input_tensors[0].name = info.name;
-
-  runtime->Infer(input_tensors, &output_tensors);
-
-  output_tensors[0].PrintInfo();
+  std::cout << std::endl;
   return 0;
-}
\ No newline at end of file
+}
diff --git a/examples/runtime/cpp/infer_paddle_paddle_inference_trt.cc b/examples/runtime/cpp/infer_paddle_paddle_inference_trt.cc
new file mode 100644
index 000000000..d2dc2019d
--- /dev/null
+++ b/examples/runtime/cpp/infer_paddle_paddle_inference_trt.cc
@@ -0,0 +1,77 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/runtime.h"
+#include <cassert>
+
+namespace fd = fastdeploy;
+
+int main(int argc, char* argv[]) {
+  // Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.tar.gz
+  std::string model_file = "pplcnet/inference.pdmodel";
+  std::string params_file = "pplcnet/inference.pdiparams";
+
+  // configure runtime
+  // How to configure by RuntimeOption, refer its api doc for more information
+  // https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
+  fd::RuntimeOption runtime_option;
+  runtime_option.SetModelPath(model_file, params_file);
+  runtime_option.UsePaddleInferBackend();
+   runtime_option.UseGpu(0);
+ 
+  // Enable Paddle Inference + TensorRT
+  // If need to configure Paddle Inference backend for more option, we can configure runtime_option.paddle_infer_option
+  // refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1PaddleBackendOption.html
+  runtime_option.paddle_infer_option.enable_trt = true;
+
+  // If need to configure TensorRT backend for more option, we can configure runtime_option.trt_option
+  // refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1TrtBackendOption.html
+  // Use float16 inference to improve performance
+  runtime_option.trt_option.enable_fp16 = true;
+  // Cache trt engine to reduce time cost in model initialize
+  runtime_option.trt_option.serialize_file = "./pplcnet_model.trt";
+
+  fd::Runtime runtime;
+  assert(runtime.Init(runtime_option));
+
+  // Get model's inputs information
+  // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
+  std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
+
+  // Create dummy data fill with 0.5
+  std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
+
+  // Create inputs/outputs tensors
+  std::vector<fd::FDTensor> inputs(inputs_info.size());
+  std::vector<fd::FDTensor> outputs;
+
+  // Initialize input tensors
+  // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
+  inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
+  inputs[0].name = inputs_info[0].name;
+
+  // Inference
+  assert(runtime.Infer(inputs, &outputs));
+ 
+  // Print debug information of outputs 
+  outputs[0].PrintInfo();
+
+  // Get data pointer and print it's elements
+  const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
+  for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
+    std::cout << data_ptr[i] << " ";
+  }
+  std::cout << std::endl;
+  return 0;
+}
diff --git a/examples/runtime/cpp/infer_paddle_tensorrt.cc b/examples/runtime/cpp/infer_paddle_tensorrt.cc
index 04fe311b2..5e133f428 100644
--- a/examples/runtime/cpp/infer_paddle_tensorrt.cc
+++ b/examples/runtime/cpp/infer_paddle_tensorrt.cc
@@ -13,49 +13,61 @@
 // limitations under the License.
 
 #include "fastdeploy/runtime.h"
+#include <cassert>
 
 namespace fd = fastdeploy;
 
 int main(int argc, char* argv[]) {
-  std::string model_file = "mobilenetv2/inference.pdmodel";
-  std::string params_file = "mobilenetv2/inference.pdiparams";
+  // Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.tar.gz
+  std::string model_file = "pplcnet/inference.pdmodel";
+  std::string params_file = "pplcnet/inference.pdiparams";
 
-  // setup option
+  // configure runtime
+  // How to configure by RuntimeOption, refer its api doc for more information
+  // https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
   fd::RuntimeOption runtime_option;
-  runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
-  runtime_option.UseGpu(0);
+  runtime_option.SetModelPath(model_file, params_file);
   runtime_option.UseTrtBackend();
-  runtime_option.EnablePaddleToTrt();
-  // init runtime
-  std::unique_ptr<fd::Runtime> runtime =
-      std::unique_ptr<fd::Runtime>(new fd::Runtime());
-  if (!runtime->Init(runtime_option)) {
-    std::cerr << "--- Init FastDeploy Runitme Failed! "
-              << "\n--- Model:  " << model_file << std::endl;
-    return -1;
-  } else {
-    std::cout << "--- Init FastDeploy Runitme Done! "
-              << "\n--- Model:  " << model_file << std::endl;
-  }
-  // init input tensor shape
-  fd::TensorInfo info = runtime->GetInputInfo(0);
-  info.shape = {1, 3, 224, 224};
-
-  std::vector<fd::FDTensor> input_tensors(1);
-  std::vector<fd::FDTensor> output_tensors(1);
-
-  std::vector<float> inputs_data;
-  inputs_data.resize(1 * 3 * 224 * 224);
-  for (size_t i = 0; i < inputs_data.size(); ++i) {
-    inputs_data[i] = std::rand() % 1000 / 1000.0f;
-  }
-  input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
   
-  //get input name
-  input_tensors[0].name = info.name;
+  // Use NVIDIA GPU to inference
+  // If need to configure TensorRT backend for more option, we can configure runtime_option.trt_option
+  // refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1TrtBackendOption.html
+  runtime_option.UseGpu(0);
+  // Use float16 inference to improve performance
+  runtime_option.trt_option.enable_fp16 = true;
+  // Cache trt engine to reduce time cost in model initialize
+  runtime_option.trt_option.serialize_file = "./pplcnet_model.trt";
 
-  runtime->Infer(input_tensors, &output_tensors);
+  fd::Runtime runtime;
+  assert(runtime.Init(runtime_option));
 
-  output_tensors[0].PrintInfo();
+  // Get model's inputs information
+  // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
+  std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
+
+  // Create dummy data fill with 0.5
+  std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
+
+  // Create inputs/outputs tensors
+  std::vector<fd::FDTensor> inputs(inputs_info.size());
+  std::vector<fd::FDTensor> outputs;
+
+  // Initialize input tensors
+  // API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
+  inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
+  inputs[0].name = inputs_info[0].name;
+
+  // Inference
+  assert(runtime.Infer(inputs, &outputs));
+ 
+  // Print debug information of outputs 
+  outputs[0].PrintInfo();
+
+  // Get data pointer and print it's elements
+  const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
+  for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
+    std::cout << data_ptr[i] << " ";
+  }
+  std::cout << std::endl;
   return 0;
-}
\ No newline at end of file
+}
diff --git a/fastdeploy/runtime/backends/backend.h b/fastdeploy/runtime/backends/backend.h
index 7566806e5..8b940cc2d 100644
--- a/fastdeploy/runtime/backends/backend.h
+++ b/fastdeploy/runtime/backends/backend.h
@@ -85,10 +85,10 @@ class BaseBackend {
                      bool copy_to_fd = true) = 0;
   // Optional: For those backends which can share memory
   // while creating multiple inference engines with same model file
-  virtual std::unique_ptr<BaseBackend> Clone(RuntimeOption &runtime_option,
-                                             void *stream = nullptr,
+  virtual std::unique_ptr<BaseBackend> Clone(RuntimeOption& runtime_option,
+                                             void* stream = nullptr,
                                              int device_id = -1) {
-    FDERROR << "Clone no support" << std::endl;
+    FDERROR << "Clone no support " << runtime_option.backend << " " << stream << " " << device_id << std::endl;
     return nullptr;
   }
 
diff --git a/fastdeploy/runtime/backends/lite/option.h b/fastdeploy/runtime/backends/lite/option.h
index 70781d80f..dd76bf7df 100755
--- a/fastdeploy/runtime/backends/lite/option.h
+++ b/fastdeploy/runtime/backends/lite/option.h
@@ -53,32 +53,46 @@ enum LitePowerMode {
 struct LiteBackendOption {
   /// Paddle Lite power mode for mobile device.
   int power_mode = 3;
-  /// Number of threads while use CPU
+  // Number of threads while use CPU
   int cpu_threads = 1;
   /// Enable use half precision
   bool enable_fp16 = false;
-  /// Inference device, Paddle Lite support CPU/KUNLUNXIN/TIMVX/ASCEND
+  // Inference device, Paddle Lite support CPU/KUNLUNXIN/TIMVX/ASCEND
   Device device = Device::CPU;
-  /// Index of inference device
+  // Index of inference device
   int device_id = 0;
 
+  /// kunlunxin_l3_workspace_size
   int kunlunxin_l3_workspace_size = 0xfffc00;
+  /// kunlunxin_locked
   bool kunlunxin_locked = false;
+  /// kunlunxin_autotune
   bool kunlunxin_autotune = true;
+  /// kunlunxin_autotune_file
   std::string kunlunxin_autotune_file = "";
+  /// kunlunxin_precision
   std::string kunlunxin_precision = "int16";
+  /// kunlunxin_adaptive_seqlen
   bool kunlunxin_adaptive_seqlen = false;
+  /// kunlunxin_enable_multi_stream
   bool kunlunxin_enable_multi_stream = false;
 
   /// Optimized model dir for CxxConfig
   std::string optimized_model_dir = "";
+  /// nnadapter_subgraph_partition_config_path
   std::string nnadapter_subgraph_partition_config_path = "";
+  /// nnadapter_subgraph_partition_config_buffer
   std::string nnadapter_subgraph_partition_config_buffer = "";
+  /// nnadapter_context_properties
   std::string nnadapter_context_properties = "";
+  /// nnadapter_model_cache_dir
   std::string nnadapter_model_cache_dir = "";
+  /// nnadapter_mixed_precision_quantization_config_path
   std::string nnadapter_mixed_precision_quantization_config_path = "";
+  /// nnadapter_dynamic_shape_info
   std::map<std::string, std::vector<std::vector<int64_t>>>
     nnadapter_dynamic_shape_info = {{"", {{0}}}};
+  /// nnadapter_device_names
   std::vector<std::string> nnadapter_device_names = {};
 };
 }  // namespace fastdeploy
diff --git a/fastdeploy/runtime/backends/ort/option.h b/fastdeploy/runtime/backends/ort/option.h
index 9487e5da9..1509fe0bb 100644
--- a/fastdeploy/runtime/backends/ort/option.h
+++ b/fastdeploy/runtime/backends/ort/option.h
@@ -25,27 +25,18 @@ namespace fastdeploy {
 /*! @brief Option object to configure ONNX Runtime backend
  */
 struct OrtBackendOption {
-  /*
-   * @brief Level of graph optimization, -1: mean default(Enable all the optimization strategy)/0: disable all the optimization strategy/1: enable basic strategy/2:enable extend strategy/99: enable all
-   */
+  /// Level of graph optimization, -1: mean default(Enable all the optimization strategy)/0: disable all the optimization strategy/1: enable basic strategy/2:enable extend strategy/99: enable all
   int graph_optimization_level = -1;
-  /*
-   * @brief Number of threads to execute the operator, -1: default
-   */
+  /// Number of threads to execute the operator, -1: default
   int intra_op_num_threads = -1;
-  /*
-   * @brief Number of threads to execute the graph, -1: default. This parameter only will bring effects while the `OrtBackendOption::execution_mode` set to 1.
-   */
+  /// Number of threads to execute the graph, -1: default. This parameter only will bring effects while the `OrtBackendOption::execution_mode` set to 1.
   int inter_op_num_threads = -1;
-  /*
-   * @brief Execution mode for the graph, -1: default(Sequential mode)/0: Sequential mode, execute the operators in graph one by one. /1: Parallel mode, execute the operators in graph parallelly.
-   */
+  /// Execution mode for the graph, -1: default(Sequential mode)/0: Sequential mode, execute the operators in graph one by one. /1: Parallel mode, execute the operators in graph parallelly.
   int execution_mode = -1;
-  /// Inference device, OrtBackend supports CPU/GPU
+  // Inference device, OrtBackend supports CPU/GPU
   Device device = Device::CPU;
-  /// Inference device id
+  // Inference device id
   int device_id = 0;
-
   void* external_stream_ = nullptr;
 };
 }  // namespace fastdeploy
diff --git a/fastdeploy/runtime/backends/paddle/option.h b/fastdeploy/runtime/backends/paddle/option.h
index 6802b8da4..b05cc6f6f 100755
--- a/fastdeploy/runtime/backends/paddle/option.h
+++ b/fastdeploy/runtime/backends/paddle/option.h
@@ -54,6 +54,8 @@ struct PaddleBackendOption {
   bool enable_mkldnn = true;
   /// Use Paddle Inference + TensorRT to inference model on GPU
   bool enable_trt = false;
+  /// Whether enable memory optimize, default true
+  bool enable_memory_optimize = true;
 
   /*
    * @brief IPU option, this will configure the IPU hardware, if inference model in IPU
diff --git a/fastdeploy/runtime/backends/paddle/option_pybind.cc b/fastdeploy/runtime/backends/paddle/option_pybind.cc
index 60b66e672..46e6a94b7 100644
--- a/fastdeploy/runtime/backends/paddle/option_pybind.cc
+++ b/fastdeploy/runtime/backends/paddle/option_pybind.cc
@@ -41,6 +41,7 @@ void BindPaddleOption(pybind11::module& m) {
       .def_readwrite("enable_log_info", &PaddleBackendOption::enable_log_info)
       .def_readwrite("enable_mkldnn", &PaddleBackendOption::enable_mkldnn)
       .def_readwrite("enable_trt", &PaddleBackendOption::enable_trt)
+      .def_readwrite("enable_memory_optimize", &PaddleBackendOption::enable_memory_optimize)
       .def_readwrite("ipu_option", &PaddleBackendOption::ipu_option)
       .def_readwrite("collect_trt_shape",
                      &PaddleBackendOption::collect_trt_shape)
diff --git a/fastdeploy/runtime/backends/paddle/paddle_backend.cc b/fastdeploy/runtime/backends/paddle/paddle_backend.cc
index bfe122f97..e817f272b 100644
--- a/fastdeploy/runtime/backends/paddle/paddle_backend.cc
+++ b/fastdeploy/runtime/backends/paddle/paddle_backend.cc
@@ -147,7 +147,9 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_buffer,
   }
   config_.SetModelBuffer(model_buffer.c_str(), model_buffer.size(),
                          params_buffer.c_str(), params_buffer.size());
-  config_.EnableMemoryOptim();
+  if (option.enable_memory_optimize) {
+    config_.EnableMemoryOptim();
+  }
   BuildOption(option);
 
   // The input/output information get from predictor is not right, use
diff --git a/fastdeploy/runtime/backends/tensorrt/option.h b/fastdeploy/runtime/backends/tensorrt/option.h
index ff28e3e3b..2825076b1 100755
--- a/fastdeploy/runtime/backends/tensorrt/option.h
+++ b/fastdeploy/runtime/backends/tensorrt/option.h
@@ -33,9 +33,8 @@ struct TrtBackendOption {
   /// Enable log while converting onnx model to tensorrt
   bool enable_log_info = false;
 
-  /*
-   * @brief Enable half precison inference, on some device not support half precision, it will fallback to float32 mode
-   */
+  
+  /// Enable half precison inference, on some device not support half precision, it will fallback to float32 mode
   bool enable_fp16 = false;
 
   /** \brief Set shape range of input tensor for the model that contain dynamic input shape while using TensorRT backend
@@ -64,9 +63,7 @@ struct TrtBackendOption {
       max_shape[tensor_name].assign(max.begin(), max.end());
     }
   }
-  /**
-   * @brief Set cache file path while use TensorRT backend. Loadding a Paddle/ONNX model and initialize TensorRT will take a long time, by this interface it will save the tensorrt engine to `cache_file_path`, and load it directly while execute the code again
-   */
+  /// Set cache file path while use TensorRT backend. Loadding a Paddle/ONNX model and initialize TensorRT will take a long time, by this interface it will save the tensorrt engine to `cache_file_path`, and load it directly while execute the code again
   std::string serialize_file = "";
 
   // The below parameters may be removed in next version, please do not
diff --git a/scripts/build_bcloud_lib.py b/scripts/build_bcloud_lib.py
index aec0d21f5..7e11357e1 100644
--- a/scripts/build_bcloud_lib.py
+++ b/scripts/build_bcloud_lib.py
@@ -22,4 +22,4 @@ for root, dirs, files in os.walk(third_libs):
     for f in files:
         if f.strip().count(".so") > 0 or f.strip() == "plugins.xml":
             full_path = os.path.join(root, f)
-            shutil.copy(full_path, os.path.join(bc_dirname, "lib"))  
+            shutil.copy(full_path, os.path.join(bc_dirname, "lib"), follow_symlinks=False)