[Other] Optimize poros backend (#1331)

* Optimize poros backend * Fix pybind error --------- Co-authored-by: root <root@bjyz-sys-gpu-kongming3.bjyz.baidu.com>
2025-10-05 16:48:03 +08:00 · 2023-02-17 14:32:35 +08:00
parent ee41944f47
commit db471c3466
6 changed files with 66 additions and 45 deletions
--- a/examples/runtime/cpp/infer_torchscript_poros.cc
+++ b/examples/runtime/cpp/infer_torchscript_poros.cc
@@ -84,11 +84,13 @@ int main(int argc, char* argv[]) {
  runtime_option.SetModelPath(model_file, "", fd::ModelFormat::TORCHSCRIPT);
  runtime_option.UsePorosBackend();
  runtime_option.UseGpu(0);
  runtime_option.is_dynamic = true;
  // Compile runtime
  std::unique_ptr<fd::Runtime> runtime =
      std::unique_ptr<fd::Runtime>(new fd::Runtime());
  runtime->Init(runtime_option);
  if (!runtime->Compile(prewarm_datas, runtime_option)) {
    std::cerr << "--- Init FastDeploy Runitme Failed! "
              << "\n--- Model:  " << model_file << std::endl;
@@ -114,4 +116,4 @@ int main(int argc, char* argv[]) {
  output_tensors[0].PrintInfo();
  return 0;
-}
+}
--- a/fastdeploy/pybind/runtime.cc
+++ b/fastdeploy/pybind/runtime.cc
@@ -51,7 +51,7 @@ void BindRuntime(pybind11::module& m) {
                        warm_datas[i][j].nbytes());
               }
             }
-             return self.Compile(warm_tensors, _option);
+             return self.Compile(warm_tensors);
           })
      .def("infer",
           [](Runtime& self, std::map<std::string, pybind11::array>& data) {
--- a/fastdeploy/runtime/backends/backend.h
+++ b/fastdeploy/runtime/backends/backend.h
@@ -58,7 +58,10 @@ class BaseBackend {
  virtual bool Initialized() const { return initialized_; }
  virtual bool Init(const RuntimeOption& option) {
-    FDERROR << "Not Implement Yet." << std::endl;
+    FDERROR << "Not Implement for "
            << option.backend << " in "
            << option.device << "."
            << std::endl;
    return false;
  }
@@ -89,59 +92,59 @@ class BaseBackend {
    return nullptr;
  }
-  benchmark::BenchmarkOption benchmark_option_;  
+  benchmark::BenchmarkOption benchmark_option_;
-  benchmark::BenchmarkResult benchmark_result_; 
+  benchmark::BenchmarkResult benchmark_result_;
 };
-/** \brief Macros for Runtime benchmark profiling. 
+/** \brief Macros for Runtime benchmark profiling.
- * The param 'base_loop' for 'RUNTIME_PROFILE_LOOP_BEGIN' 
+ * The param 'base_loop' for 'RUNTIME_PROFILE_LOOP_BEGIN'
- * indicates that the least number of times the loop 
+ * indicates that the least number of times the loop
 * will repeat when profiling mode is not enabled.
- * In most cases, the value should be 1, i.e., results are 
+ * In most cases, the value should be 1, i.e., results are
- * obtained by running the inference process once, when 
+ * obtained by running the inference process once, when
- * the profile mode is turned off, such as ONNX Runtime, 
+ * the profile mode is turned off, such as ONNX Runtime,
- * OpenVINO, TensorRT, Paddle Inference, Paddle Lite, 
+ * OpenVINO, TensorRT, Paddle Inference, Paddle Lite,
- * RKNPU2, SOPHGO etc. 
+ * RKNPU2, SOPHGO etc.
- * 
+ *
 * example code @code
- * // OpenVINOBackend::Infer 
+ * // OpenVINOBackend::Infer
 * RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
- * // do something .... 
+ * // do something ....
 * RUNTIME_PROFILE_LOOP_BEGIN(1)
- * // The codes which wrapped by 'BEGIN(1) ~ END' scope 
+ * // The codes which wrapped by 'BEGIN(1) ~ END' scope
 * // will only run once when profiling mode is not enabled.
- * request_.infer();  
+ * request_.infer();
 * RUNTIME_PROFILE_LOOP_END
- * // do something .... 
+ * // do something ....
 * RUNTIME_PROFILE_LOOP_H2D_D2H_END
- * 
+ *
 * @endcode In this case, No global variables inside a function
- * are wrapped by BEGIN and END, which may be required for 
+ * are wrapped by BEGIN and END, which may be required for
 * subsequent tasks. But, some times we need to set 'base_loop'
 * as 0, such as POROS.
- * 
+ *
 * * example code @code
 * // PorosBackend::Infer
 * RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
- * // do something .... 
+ * // do something ....
 * RUNTIME_PROFILE_LOOP_BEGIN(0) // set 'base_loop' as 0
- * // The codes which wrapped by 'BEGIN(0) ~ END' scope 
+ * // The codes which wrapped by 'BEGIN(0) ~ END' scope
 * // will not run when profiling mode is not enabled.
- * auto poros_outputs = _poros_module->forward(poros_inputs); 
+ * auto poros_outputs = _poros_module->forward(poros_inputs);
 * RUNTIME_PROFILE_LOOP_END
 * // Run another inference beyond the scope of 'BEGIN ~ END'
 * // to get valid outputs for subsequent tasks.
- * auto poros_outputs = _poros_module->forward(poros_inputs); 
+ * auto poros_outputs = _poros_module->forward(poros_inputs);
 * // do something .... will use 'poros_outputs' ...
 * if (poros_outputs.isTensor()) {
 * // ...
 * }
 * RUNTIME_PROFILE_LOOP_H2D_D2H_END
- * 
+ *
 * @endcode In this case, 'poros_outputs' inside a function
- * are wrapped by BEGIN and END, which may be required for 
+ * are wrapped by BEGIN and END, which may be required for
 * subsequent tasks. So, we set 'base_loop' as 0 and lanuch
- * another infer to get the valid outputs beyond the scope 
+ * another infer to get the valid outputs beyond the scope
 * of 'BEGIN ~ END' for subsequent tasks.
 */
--- a/fastdeploy/runtime/backends/poros/poros_backend.h
+++ b/fastdeploy/runtime/backends/poros/poros_backend.h
@@ -51,6 +51,20 @@ class PorosBackend : public BaseBackend {
  void BuildOption(const PorosBackendOption& option);
  bool Init(const RuntimeOption& option) {
    if (!(Supported(option.model_format, Backend::POROS)
        && Supported(option.device, Backend::POROS))) {
      return false;
    }
    if (option.model_from_memory_) {
      FDERROR << "Poros backend doesn't support load model "
              << "from memory, please load model from disk."
              << std::endl;
      return false;
    }
    return true;
  }
  bool Compile(const std::string& model_file,
               std::vector<std::vector<FDTensor>>& prewarm_tensors,
               const PorosBackendOption& option = PorosBackendOption());
--- a/fastdeploy/runtime/runtime.cc
+++ b/fastdeploy/runtime/runtime.cc
@@ -417,25 +417,28 @@ Runtime* Runtime::Clone(void* stream, int device_id) {
  return runtime;
 }
-// only for poros backend
+void Runtime::CreatePorosBackend() {
-bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
+#ifdef ENABLE_POROS_BACKEND
-                      const RuntimeOption& _option) {
+  backend_ = utils::make_unique<PorosBackend>();
  FDASSERT(backend_->Init(option), "Failed to initialize Poros backend.");
 #else
  FDASSERT(false,
           "PorosBackend is not available, please compiled with "
           "ENABLE_POROS_BACKEND=ON.");
 #endif
  FDINFO << "Runtime initialized with Backend::POROS in " << option.device
         << "." << std::endl;
 }
 // only for poros backend
 bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors) {
 #ifdef ENABLE_POROS_BACKEND
  FDASSERT(
      option.model_format == ModelFormat::TORCHSCRIPT,
      "PorosBackend only support model format of ModelFormat::TORCHSCRIPT.");
  if (option.device != Device::CPU && option.device != Device::GPU) {
    FDERROR << "PorosBackend only supports CPU/GPU, but now its "
            << option.device << "." << std::endl;
    return false;
  }
  option.poros_option.device = option.device;
  option.poros_option.device_id = option.device_id;
  option.poros_option.enable_fp16 = option.trt_option.enable_fp16;
  option.poros_option.max_batch_size = option.trt_option.max_batch_size;
  option.poros_option.max_workspace_size = option.trt_option.max_workspace_size;
  backend_ = utils::make_unique<PorosBackend>();
  auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get());
  FDASSERT(
      casted_backend->Compile(option.model_file, prewarm_tensors,
--- a/fastdeploy/runtime/runtime.h
+++ b/fastdeploy/runtime/runtime.h
@@ -99,11 +99,9 @@ struct FASTDEPLOY_DECL Runtime {
  /** \brief Compile TorchScript Module, only for Poros backend
   *
   * \param[in] prewarm_tensors Prewarm datas for compile
   * \param[in] _option Runtime option
   * \return true if compile successed, otherwise false
   */
-  bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
+  bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors);
               const RuntimeOption& _option);
  /** \brief Get profile time of Runtime after the profile process is done.
   */
  double GetProfileTime() {
@@ -118,6 +116,7 @@ struct FASTDEPLOY_DECL Runtime {
  void CreateLiteBackend();
  void CreateRKNPU2Backend();
  void CreateSophgoNPUBackend();
  void CreatePorosBackend();
  std::unique_ptr<BaseBackend> backend_;
  std::vector<FDTensor> input_tensors_;
  std::vector<FDTensor> output_tensors_;