[Other] Optimize runtime module (#1356)

* Optimize runtime * fix error * [Backend] Add option to print tensorrt conversion log (#1386) Add option to print tensorrt conversion log Co-authored-by: root <root@bjyz-sys-gpu-kongming3.bjyz.baidu.com> --------- Co-authored-by: root <root@bjyz-sys-gpu-kongming3.bjyz.baidu.com>
2025-10-05 16:48:03 +08:00 · 2023-02-21 17:01:32 +08:00
parent 42817ddc18
commit 18e33bae5c
7 changed files with 70 additions and 67 deletions
--- a/fastdeploy/runtime/backends/paddle/paddle_backend.cc
+++ b/fastdeploy/runtime/backends/paddle/paddle_backend.cc
@@ -98,6 +98,33 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
  }
 }
 bool PaddleBackend::Init(const RuntimeOption& runtime_option) {
  if (!(Supported(runtime_option.model_format, Backend::PDINFER) && Supported(runtime_option.device, Backend::PDINFER))) {
    return false;
  }
  auto option = runtime_option;
  option.paddle_infer_option.model_file = runtime_option.model_file;
  option.paddle_infer_option.params_file = runtime_option.params_file;
  option.paddle_infer_option.model_from_memory_ = runtime_option.model_from_memory_;
  option.paddle_infer_option.device = runtime_option.device;
  option.paddle_infer_option.device_id = runtime_option.device_id;
  option.paddle_infer_option.enable_pinned_memory = runtime_option.enable_pinned_memory;
  option.paddle_infer_option.external_stream_ = runtime_option.external_stream_;
  option.paddle_infer_option.trt_option = runtime_option.trt_option;
  option.paddle_infer_option.trt_option.gpu_id = runtime_option.device_id;
  if (option.model_from_memory_) {
    return InitFromPaddle(option.model_file, option.params_file, option.paddle_infer_option);
  } else {
    std::string model_buffer = "";
    std::string params_buffer = "";
    FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer), "Failed to read model file from %s.", option.model_file.c_str());
    FDASSERT(ReadBinaryFromFile(option.params_file, &params_buffer), "Failed to read parameters file from %s.", option.params_file.c_str());
    return InitFromPaddle(model_buffer, params_buffer, option.paddle_infer_option);
  }
  return false;
 }
 bool PaddleBackend::InitFromPaddle(const std::string& model_buffer,
                                   const std::string& params_buffer,
                                   const PaddleBackendOption& option) {
--- a/fastdeploy/runtime/backends/paddle/paddle_backend.h
+++ b/fastdeploy/runtime/backends/paddle/paddle_backend.h
@@ -54,12 +54,7 @@ class PaddleBackend : public BaseBackend {
 public:
  PaddleBackend() {}
  virtual ~PaddleBackend() = default;
-  void BuildOption(const PaddleBackendOption& option);
+  bool Init(const RuntimeOption& option);
  bool InitFromPaddle(const std::string& model_buffer,
                     const std::string& params_buffer,
                     const PaddleBackendOption& option = PaddleBackendOption());
  bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
             bool copy_to_fd = true) override;
@@ -77,6 +72,12 @@ class PaddleBackend : public BaseBackend {
  std::vector<TensorInfo> GetOutputInfos() override;
 private:
  void BuildOption(const PaddleBackendOption& option);
  bool InitFromPaddle(const std::string& model_buffer,
                     const std::string& params_buffer,
                     const PaddleBackendOption& option = PaddleBackendOption());
  void
  CollectShapeRun(paddle_infer::Predictor* predictor,
                  const std::map<std::string, std::vector<int>>& shape) const;
--- a/fastdeploy/runtime/backends/tensorrt/option.h
+++ b/fastdeploy/runtime/backends/tensorrt/option.h
@@ -30,6 +30,9 @@ struct TrtBackendOption {
  /// `max_workspace_size` for TensorRT
  size_t max_workspace_size = 1 << 30;
  /// Enable log while converting onnx model to tensorrt
  bool enable_log_info = false;
  /*
   * @brief Enable half precison inference, on some device not support half precision, it will fallback to float32 mode
   */
--- a/fastdeploy/runtime/backends/tensorrt/option_pybind.cc
+++ b/fastdeploy/runtime/backends/tensorrt/option_pybind.cc
@@ -21,6 +21,7 @@ void BindTrtOption(pybind11::module& m) {
  pybind11::class_<TrtBackendOption>(m, "TrtBackendOption")
      .def(pybind11::init())
      .def_readwrite("enable_fp16", &TrtBackendOption::enable_fp16)
      .def_readwrite("enable_log_info", &TrtBackendOption::enable_log_info)
      .def_readwrite("max_batch_size", &TrtBackendOption::max_batch_size)
      .def_readwrite("max_workspace_size",
                     &TrtBackendOption::max_workspace_size)
--- a/fastdeploy/runtime/backends/tensorrt/trt_backend.cc
+++ b/fastdeploy/runtime/backends/tensorrt/trt_backend.cc
@@ -114,6 +114,13 @@ bool TrtBackend::LoadTrtCache(const std::string& trt_engine_file) {
 }
 bool TrtBackend::Init(const RuntimeOption& runtime_option) {
  auto trt_option = runtime_option.trt_option;
  trt_option.model_file = runtime_option.model_file;
  trt_option.params_file = runtime_option.params_file;
  trt_option.model_format = runtime_option.model_format;
  trt_option.gpu_id = runtime_option.device_id;
  trt_option.enable_pinned_memory = runtime_option.enable_pinned_memory;
  trt_option.external_stream_ = runtime_option.external_stream_;
  if (runtime_option.device != Device::GPU) {
    FDERROR << "TrtBackend only supports Device::GPU, but now it's "
            << runtime_option.device << "." << std::endl;
@@ -130,7 +137,7 @@ bool TrtBackend::Init(const RuntimeOption& runtime_option) {
    if (runtime_option.model_from_memory_) {
      return InitFromPaddle(runtime_option.model_file,
                            runtime_option.params_file,
-                            runtime_option.trt_option);
+                            trt_option);
    } else {
      std::string model_buffer;
      std::string params_buffer;
@@ -141,17 +148,17 @@ bool TrtBackend::Init(const RuntimeOption& runtime_option) {
               "Failed to read parameters file %s.",
               runtime_option.params_file.c_str());
      return InitFromPaddle(model_buffer, params_buffer,
-                            runtime_option.trt_option);
+                            trt_option);
    }
  } else {
    if (runtime_option.model_from_memory_) {
-      return InitFromOnnx(runtime_option.model_file, runtime_option.trt_option);
+      return InitFromOnnx(runtime_option.model_file, trt_option);
    } else {
      std::string model_buffer;
      FDASSERT(ReadBinaryFromFile(runtime_option.model_file, &model_buffer),
               "Failed to read model file %s.",
               runtime_option.model_file.c_str());
-      return InitFromOnnx(model_buffer, runtime_option.trt_option);
+      return InitFromOnnx(model_buffer, trt_option);
    }
  }
  return true;
@@ -525,6 +532,9 @@ void TrtBackend::AllocateOutputsBuffer(std::vector<FDTensor>* outputs,
 }
 bool TrtBackend::BuildTrtEngine() {
  if (option_.enable_log_info) {
    FDTrtLogger::Get()->SetLog(true, true);
  }
  auto config =
      FDUniquePtr<nvinfer1::IBuilderConfig>(builder_->createBuilderConfig());
  if (!config) {
--- a/fastdeploy/runtime/backends/tensorrt/utils.h
+++ b/fastdeploy/runtime/backends/tensorrt/utils.h
@@ -220,20 +220,30 @@ class FDTrtLogger : public nvinfer1::ILogger {
    logger = new FDTrtLogger();
    return logger;
  }
  void SetLog(bool enable_info = false, bool enable_warning = false) {
    enable_info_ = enable_info;
    enable_warning_ = enable_warning;
  }
  void log(nvinfer1::ILogger::Severity severity,
           const char* msg) noexcept override {
    if (severity == nvinfer1::ILogger::Severity::kINFO) {
-      // Disable this log
+      if (enable_info_) {
-      //      FDINFO << msg << std::endl;
+        FDINFO << msg << std::endl;
      }
    } else if (severity == nvinfer1::ILogger::Severity::kWARNING) {
-      // Disable this log
+      if (enable_warning_) {
-      //      FDWARNING << msg << std::endl;
+        FDWARNING << msg << std::endl;
      }
    } else if (severity == nvinfer1::ILogger::Severity::kERROR) {
      FDERROR << msg << std::endl;
    } else if (severity == nvinfer1::ILogger::Severity::kINTERNAL_ERROR) {
      FDASSERT(false, "%s", msg);
    }
  }
 private:
  bool enable_info_ = false;
  bool enable_warning_ = false;
 };
 struct ShapeRangeInfo {
--- a/fastdeploy/runtime/runtime.cc
+++ b/fastdeploy/runtime/runtime.cc
@@ -154,19 +154,10 @@ bool Runtime::Init(const RuntimeOption& _option) {
  } else if (option.backend == Backend::SOPHGOTPU) {
    CreateSophgoNPUBackend();
  } else if (option.backend == Backend::POROS) {
-    FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
+    CreatePorosBackend();
             "Backend::POROS only supports Device::CPU/Device::GPU.");
    FDASSERT(option.model_format == ModelFormat::TORCHSCRIPT,
             "Backend::POROS only supports model format of "
             "ModelFormat::TORCHSCRIPT.");
    FDINFO << "Runtime initialized with Backend::POROS in " << option.device
           << "." << std::endl;
    return true;
  } else {
-    FDERROR << "Runtime only support "
+    std::string msg = Str(GetAvailableBackends());
-               "Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as "
+    FDERROR << "The compiled FastDeploy only supports " << msg << ", " << option.backend << " is not supported now." << std::endl;
               "backend now."
            << std::endl;
    return false;
  }
  backend_->benchmark_option_ = option.benchmark_option;
@@ -264,43 +255,9 @@ void Runtime::ReleaseModelMemoryBuffer() {
 }
 void Runtime::CreatePaddleBackend() {
  FDASSERT(
      option.device == Device::CPU || option.device == Device::GPU ||
          option.device == Device::IPU,
      "Backend::PDINFER only supports Device::CPU/Device::GPU/Device::IPU.");
  FDASSERT(
      option.model_format == ModelFormat::PADDLE,
      "Backend::PDINFER only supports model format of ModelFormat::PADDLE.");
 #ifdef ENABLE_PADDLE_BACKEND
  option.paddle_infer_option.model_file = option.model_file;
  option.paddle_infer_option.params_file = option.params_file;
  option.paddle_infer_option.model_from_memory_ = option.model_from_memory_;
  option.paddle_infer_option.device = option.device;
  option.paddle_infer_option.device_id = option.device_id;
  option.paddle_infer_option.enable_pinned_memory = option.enable_pinned_memory;
  option.paddle_infer_option.external_stream_ = option.external_stream_;
  option.paddle_infer_option.trt_option = option.trt_option;
  option.paddle_infer_option.trt_option.gpu_id = option.device_id;
  backend_ = utils::make_unique<PaddleBackend>();
-  auto casted_backend = dynamic_cast<PaddleBackend*>(backend_.get());
+  FDASSERT(backend_->Init(option), "Failed to initialized Paddle Inference backend.");
  if (option.model_from_memory_) {
    FDASSERT(
        casted_backend->InitFromPaddle(option.model_file, option.params_file,
                                       option.paddle_infer_option),
        "Load model from Paddle failed while initliazing PaddleBackend.");
    ReleaseModelMemoryBuffer();
  } else {
    std::string model_buffer = "";
    std::string params_buffer = "";
    FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
             "Fail to read binary from model file");
    FDASSERT(ReadBinaryFromFile(option.params_file, &params_buffer),
             "Fail to read binary from parameter file");
    FDASSERT(casted_backend->InitFromPaddle(model_buffer, params_buffer,
                                            option.paddle_infer_option),
             "Load model from Paddle failed while initliazing PaddleBackend.");
  }
 #else
  FDASSERT(false,
           "PaddleBackend is not available, please compiled with "
@@ -339,12 +296,6 @@ void Runtime::CreateOrtBackend() {
 void Runtime::CreateTrtBackend() {
 #ifdef ENABLE_TRT_BACKEND
  option.trt_option.model_file = option.model_file;
  option.trt_option.params_file = option.params_file;
  option.trt_option.model_format = option.model_format;
  option.trt_option.gpu_id = option.device_id;
  option.trt_option.enable_pinned_memory = option.enable_pinned_memory;
  option.trt_option.external_stream_ = option.external_stream_;
  backend_ = utils::make_unique<TrtBackend>();
  FDASSERT(backend_->Init(option), "Failed to initialize TensorRT backend.");
 #else