[Other] Unify initialize api for lite/trt backend (#1249)

* Unify initialize api for lite/trt backend * Unify initialize api for lite/trt backend
2025-10-22 08:09:28 +08:00 · 2023-02-08 11:16:39 +08:00
parent 9712f250a5
commit c5b414a774
5 changed files with 94 additions and 75 deletions
--- a/fastdeploy/runtime/backends/lite/lite_backend.cc
+++ b/fastdeploy/runtime/backends/lite/lite_backend.cc
@@ -56,18 +56,39 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
  }
 }

-bool LiteBackend::InitFromPaddle(const std::string& model_file,
-                                 const std::string& params_file,
-                                 const LiteBackendOption& option) {
+bool LiteBackend::Init(const RuntimeOption& runtime_option) {
  if (initialized_) {
    FDERROR << "LiteBackend is already initialized, cannot initialize again."
            << std::endl;
    return false;
  }

-  config_.set_model_file(model_file);
-  config_.set_param_file(params_file);
-  BuildOption(option);
+  if (runtime_option.model_format != ModelFormat::PADDLE) {
+    FDERROR
+        << "PaddleLiteBackend only supports model format PADDLE, but now it's "
+        << runtime_option.model_format << "." << std::endl;
+    return false;
+  }
+  if (runtime_option.device != Device::CPU &&
+      runtime_option.device != Device::KUNLUNXIN &&
+      runtime_option.device != Device::ASCEND &&
+      runtime_option.device != Device::TIMVX) {
+    FDERROR << "PaddleLiteBackend only supports "
+               "Device::CPU/Device::TIMVX/Device::KUNLUNXIN/Device::ASCEND, "
+               "but now it's "
+            << runtime_option.device << "." << std::endl;
+    return false;
+  }
+  if (runtime_option.model_from_memory_) {
+    FDERROR << "PaddleLiteBackend doesn't support load model from memory, "
+               "please load model from disk."
+            << std::endl;
+    return false;
+  }
+
+  config_.set_model_file(runtime_option.model_file);
+  config_.set_param_file(runtime_option.params_file);
+  BuildOption(runtime_option.paddle_lite_option);
  predictor_ =
      paddle::lite_api::CreatePaddlePredictor<paddle::lite_api::CxxConfig>(
          config_);
@@ -177,7 +198,7 @@ bool LiteBackend::Infer(std::vector<FDTensor>& inputs,
      FDASSERT(false, "Unexpected data type of %d.", inputs[i].dtype);
    }
  }
-  
+
  RUNTIME_PROFILE_LOOP_BEGIN(1)
  predictor_->Run();
  RUNTIME_PROFILE_LOOP_END
--- a/fastdeploy/runtime/backends/lite/lite_backend.h
+++ b/fastdeploy/runtime/backends/lite/lite_backend.h
@@ -22,6 +22,7 @@
 #include "paddle_api.h"  // NOLINT

 #include "fastdeploy/runtime/backends/backend.h"
+#include "fastdeploy/runtime/runtime_option.h"
 #include "fastdeploy/runtime/backends/lite/option.h"

 namespace fastdeploy {
@@ -30,11 +31,8 @@ class LiteBackend : public BaseBackend {
 public:
  LiteBackend() {}
  virtual ~LiteBackend() = default;
-  void BuildOption(const LiteBackendOption& option);

-  bool InitFromPaddle(const std::string& model_file,
-                      const std::string& params_file,
-                      const LiteBackendOption& option = LiteBackendOption());
+  bool Init(const RuntimeOption& option);

  bool Infer(std::vector<FDTensor>& inputs,
            std::vector<FDTensor>* outputs,
@@ -50,6 +48,8 @@ class LiteBackend : public BaseBackend {
  std::vector<TensorInfo> GetOutputInfos() override;

 private:
+  void BuildOption(const LiteBackendOption& option);
+
  void ConfigureCpu(const LiteBackendOption& option);
  void ConfigureTimvx(const LiteBackendOption& option);
  void ConfigureAscend(const LiteBackendOption& option);
--- a/fastdeploy/runtime/backends/tensorrt/trt_backend.cc
+++ b/fastdeploy/runtime/backends/tensorrt/trt_backend.cc
@@ -113,6 +113,50 @@ bool TrtBackend::LoadTrtCache(const std::string& trt_engine_file) {
  return true;
 }

+bool TrtBackend::Init(const RuntimeOption& runtime_option) {
+  if (runtime_option.device != Device::GPU) {
+    FDERROR << "TrtBackend only supports Device::GPU, but now it's "
+            << runtime_option.device << "." << std::endl;
+    return false;
+  }
+  if (runtime_option.model_format != ModelFormat::PADDLE &&
+      runtime_option.model_format != ModelFormat::ONNX) {
+    FDERROR
+        << "TrtBackend only supports model format PADDLE/ONNX, but now it's "
+        << runtime_option.model_format << "." << std::endl;
+    return false;
+  }
+  if (runtime_option.model_format == ModelFormat::PADDLE) {
+    if (runtime_option.model_from_memory_) {
+      return InitFromPaddle(runtime_option.model_file,
+                            runtime_option.params_file,
+                            runtime_option.trt_option);
+    } else {
+      std::string model_buffer;
+      std::string params_buffer;
+      FDASSERT(ReadBinaryFromFile(runtime_option.model_file, &model_buffer),
+               "Failed to read model file %s.",
+               runtime_option.model_file.c_str());
+      FDASSERT(ReadBinaryFromFile(runtime_option.params_file, &params_buffer),
+               "Failed to read parameters file %s.",
+               runtime_option.params_file.c_str());
+      return InitFromPaddle(model_buffer, params_buffer,
+                            runtime_option.trt_option);
+    }
+  } else {
+    if (runtime_option.model_from_memory_) {
+      return InitFromOnnx(runtime_option.model_file, runtime_option.trt_option);
+    } else {
+      std::string model_buffer;
+      FDASSERT(ReadBinaryFromFile(runtime_option.model_file, &model_buffer),
+               "Failed to read model file %s.",
+               runtime_option.model_file.c_str());
+      return InitFromOnnx(model_buffer, runtime_option.trt_option);
+    }
+  }
+  return true;
+}
+
 bool TrtBackend::InitFromPaddle(const std::string& model_buffer,
                                const std::string& params_buffer,
                                const TrtBackendOption& option, bool verbose) {
@@ -291,14 +335,14 @@ bool TrtBackend::Infer(std::vector<FDTensor>& inputs,
  cudaSetDevice(option_.gpu_id);
  SetInputs(inputs);
  AllocateOutputsBuffer(outputs, copy_to_fd);
-  
+
  RUNTIME_PROFILE_LOOP_BEGIN(1)
  if (!context_->enqueueV2(bindings_.data(), stream_, nullptr)) {
    FDERROR << "Failed to Infer with TensorRT." << std::endl;
    return false;
  }
  RUNTIME_PROFILE_LOOP_END
-  
+
  for (size_t i = 0; i < outputs->size(); ++i) {
    // if the final output tensor's dtype is different from the model output
    // tensor's dtype, then we need cast the data to the final output's dtype
--- a/fastdeploy/runtime/backends/tensorrt/trt_backend.h
+++ b/fastdeploy/runtime/backends/tensorrt/trt_backend.h
@@ -70,14 +70,8 @@ FDDataType GetFDDataType(const nvinfer1::DataType& dtype);
 class TrtBackend : public BaseBackend {
 public:
  TrtBackend() : engine_(nullptr), context_(nullptr) {}
-  void BuildOption(const TrtBackendOption& option);

-  bool InitFromPaddle(const std::string& model_buffer,
-                      const std::string& params_buffer,
-                      const TrtBackendOption& option = TrtBackendOption(),
-                      bool verbose = false);
-  bool InitFromOnnx(const std::string& model_buffer,
-                    const TrtBackendOption& option = TrtBackendOption());
+  bool Init(const RuntimeOption& runtime_option);
  bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
             bool copy_to_fd = true) override;

@@ -98,6 +92,15 @@ class TrtBackend : public BaseBackend {
  }

 private:
+  void BuildOption(const TrtBackendOption& option);
+
+  bool InitFromPaddle(const std::string& model_buffer,
+                      const std::string& params_buffer,
+                      const TrtBackendOption& option = TrtBackendOption(),
+                      bool verbose = false);
+  bool InitFromOnnx(const std::string& model_buffer,
+                    const TrtBackendOption& option = TrtBackendOption());
+
  TrtBackendOption option_;
  std::shared_ptr<nvinfer1::ICudaEngine> engine_;
  std::shared_ptr<nvinfer1::IExecutionContext> context_;