[Other] Optimize poros backend (#1331)

* Optimize poros backend * Fix pybind error --------- Co-authored-by: root <root@bjyz-sys-gpu-kongming3.bjyz.baidu.com>
2025-10-05 16:48:03 +08:00 · 2023-02-17 14:32:35 +08:00
parent ee41944f47
commit db471c3466
6 changed files with 66 additions and 45 deletions
--- a/examples/runtime/cpp/infer_torchscript_poros.cc
+++ b/examples/runtime/cpp/infer_torchscript_poros.cc
@@ -84,11 +84,13 @@ int main(int argc, char* argv[]) {
  runtime_option.SetModelPath(model_file, "", fd::ModelFormat::TORCHSCRIPT);
  runtime_option.UsePorosBackend();
  runtime_option.UseGpu(0);
-  runtime_option.is_dynamic = true;

  // Compile runtime
  std::unique_ptr<fd::Runtime> runtime =
      std::unique_ptr<fd::Runtime>(new fd::Runtime());
+
+  runtime->Init(runtime_option);
+
  if (!runtime->Compile(prewarm_datas, runtime_option)) {
    std::cerr << "--- Init FastDeploy Runitme Failed! "
              << "\n--- Model:  " << model_file << std::endl;
--- a/fastdeploy/pybind/runtime.cc
+++ b/fastdeploy/pybind/runtime.cc
@@ -51,7 +51,7 @@ void BindRuntime(pybind11::module& m) {
                        warm_datas[i][j].nbytes());
               }
             }
-             return self.Compile(warm_tensors, _option);
+             return self.Compile(warm_tensors);
           })
      .def("infer",
           [](Runtime& self, std::map<std::string, pybind11::array>& data) {
--- a/fastdeploy/runtime/backends/backend.h
+++ b/fastdeploy/runtime/backends/backend.h
@@ -58,7 +58,10 @@ class BaseBackend {
  virtual bool Initialized() const { return initialized_; }

  virtual bool Init(const RuntimeOption& option) {
-    FDERROR << "Not Implement Yet." << std::endl;
+    FDERROR << "Not Implement for "
+            << option.backend << " in "
+            << option.device << "."
+            << std::endl;
    return false;
  }

--- a/fastdeploy/runtime/backends/poros/poros_backend.h
+++ b/fastdeploy/runtime/backends/poros/poros_backend.h
@@ -51,6 +51,20 @@ class PorosBackend : public BaseBackend {

  void BuildOption(const PorosBackendOption& option);

+  bool Init(const RuntimeOption& option) {
+    if (!(Supported(option.model_format, Backend::POROS)
+        && Supported(option.device, Backend::POROS))) {
+      return false;
+    }
+    if (option.model_from_memory_) {
+      FDERROR << "Poros backend doesn't support load model "
+              << "from memory, please load model from disk."
+              << std::endl;
+      return false;
+    }
+    return true;
+  }
+
  bool Compile(const std::string& model_file,
               std::vector<std::vector<FDTensor>>& prewarm_tensors,
               const PorosBackendOption& option = PorosBackendOption());
--- a/fastdeploy/runtime/runtime.cc
+++ b/fastdeploy/runtime/runtime.cc
@@ -417,25 +417,28 @@ Runtime* Runtime::Clone(void* stream, int device_id) {
  return runtime;
 }

-// only for poros backend
-bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
-                      const RuntimeOption& _option) {
+void Runtime::CreatePorosBackend() {
 #ifdef ENABLE_POROS_BACKEND
-  FDASSERT(
-      option.model_format == ModelFormat::TORCHSCRIPT,
-      "PorosBackend only support model format of ModelFormat::TORCHSCRIPT.");
-  if (option.device != Device::CPU && option.device != Device::GPU) {
-    FDERROR << "PorosBackend only supports CPU/GPU, but now its "
-            << option.device << "." << std::endl;
-    return false;
+  backend_ = utils::make_unique<PorosBackend>();
+  FDASSERT(backend_->Init(option), "Failed to initialize Poros backend.");
+#else
+  FDASSERT(false,
+           "PorosBackend is not available, please compiled with "
+           "ENABLE_POROS_BACKEND=ON.");
+#endif
+  FDINFO << "Runtime initialized with Backend::POROS in " << option.device
+         << "." << std::endl;
 }
+
+// only for poros backend
+bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors) {
+#ifdef ENABLE_POROS_BACKEND
  option.poros_option.device = option.device;
  option.poros_option.device_id = option.device_id;
  option.poros_option.enable_fp16 = option.trt_option.enable_fp16;
  option.poros_option.max_batch_size = option.trt_option.max_batch_size;
  option.poros_option.max_workspace_size = option.trt_option.max_workspace_size;

-  backend_ = utils::make_unique<PorosBackend>();
  auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get());
  FDASSERT(
      casted_backend->Compile(option.model_file, prewarm_tensors,
--- a/fastdeploy/runtime/runtime.h
+++ b/fastdeploy/runtime/runtime.h
@@ -99,11 +99,9 @@ struct FASTDEPLOY_DECL Runtime {
  /** \brief Compile TorchScript Module, only for Poros backend
   *
   * \param[in] prewarm_tensors Prewarm datas for compile
-   * \param[in] _option Runtime option
   * \return true if compile successed, otherwise false
   */
-  bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
-               const RuntimeOption& _option);
+  bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors);
  /** \brief Get profile time of Runtime after the profile process is done.
   */
  double GetProfileTime() {
@@ -118,6 +116,7 @@ struct FASTDEPLOY_DECL Runtime {
  void CreateLiteBackend();
  void CreateRKNPU2Backend();
  void CreateSophgoNPUBackend();
+  void CreatePorosBackend();
  std::unique_ptr<BaseBackend> backend_;
  std::vector<FDTensor> input_tensors_;
  std::vector<FDTensor> output_tensors_;