[Other] Optimize runtime module (#1211)

* modify ort option * update code * Unify backend
2025-10-06 00:57:33 +08:00 · 2023-02-02 13:10:40 +08:00
parent 147cb2c32e
commit a711f99c69
8 changed files with 63 additions and 76 deletions
--- a/fastdeploy/pybind/runtime.cc
+++ b/fastdeploy/pybind/runtime.cc
@@ -87,10 +87,6 @@ void BindRuntime(pybind11::module& m) {
      .def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num)
      .def_readwrite("device_id", &RuntimeOption::device_id)
      .def_readwrite("device", &RuntimeOption::device)
      .def_readwrite("ort_graph_opt_level", &RuntimeOption::ort_graph_opt_level)
      .def_readwrite("ort_inter_op_num_threads",
                     &RuntimeOption::ort_inter_op_num_threads)
      .def_readwrite("ort_execution_mode", &RuntimeOption::ort_execution_mode)
      .def_readwrite("trt_max_shape", &RuntimeOption::trt_max_shape)
      .def_readwrite("trt_opt_shape", &RuntimeOption::trt_opt_shape)
      .def_readwrite("trt_min_shape", &RuntimeOption::trt_min_shape)
--- a/fastdeploy/runtime/backends/backend.h
+++ b/fastdeploy/runtime/backends/backend.h
@@ -56,6 +56,11 @@ class BaseBackend {
  virtual bool Initialized() const { return initialized_; }
  virtual bool Init(const RuntimeOption& option) {
    FDERROR << "Not Implement Yet." << std::endl;
    return false;
  }
  // Get number of inputs of the model
  virtual int NumInputs() const = 0;
  // Get number of outputs of the model
--- a/fastdeploy/runtime/backends/ort/option.h
+++ b/fastdeploy/runtime/backends/ort/option.h
@@ -34,12 +34,8 @@ struct OrtBackendOption {
  // 0: ORT_SEQUENTIAL
  // 1: ORT_PARALLEL
  int execution_mode = -1;
-  bool use_gpu = false;
+  Device device = Device::CPU;
-  int gpu_id = 0;
+  int device_id = 0;
  void* external_stream_ = nullptr;
  // inside parameter, maybe remove next version
  bool remove_multiclass_nms_ = false;
  std::map<std::string, std::string> custom_op_info_;
 };
 }  // namespace fastdeploy
--- a/fastdeploy/runtime/backends/ort/ort_backend.cc
+++ b/fastdeploy/runtime/backends/ort/ort_backend.cc
@@ -45,7 +45,7 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) {
  if (option.execution_mode >= 0) {
    session_options_.SetExecutionMode(ExecutionMode(option.execution_mode));
  }
-  if (option.use_gpu) {
+  if (option.device == Device::GPU) {
    auto all_providers = Ort::GetAvailableProviders();
    bool support_cuda = false;
    std::string providers_msg = "";
@@ -60,10 +60,10 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) {
                   "support GPU, the available providers are "
                << providers_msg << "will fallback to CPUExecutionProvider."
                << std::endl;
-      option_.use_gpu = false;
+      option_.device = Device::CPU;
    } else {
      OrtCUDAProviderOptions cuda_options;
-      cuda_options.device_id = option.gpu_id;
+      cuda_options.device_id = option.device_id;
      if (option.external_stream_) {
        cuda_options.has_user_compute_stream = 1;
        cuda_options.user_compute_stream = option.external_stream_;
@@ -73,6 +73,44 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) {
  }
 }
 bool OrtBackend::Init(const RuntimeOption& option) {
  if (option.device != Device::CPU && option.device != Device::GPU) {
    FDERROR
        << "Backend::ORT only supports Device::CPU/Device::GPU, but now its "
        << option.device << "." << std::endl;
    return false;
  }
  OrtBackendOption ort_option = option.ort_option;
  ort_option.device = option.device;
  ort_option.device_id = option.device_id;
  ort_option.external_stream_ = option.external_stream_;
  if (option.model_format == ModelFormat::PADDLE) {
    if (option.model_from_memory_) {
      return InitFromPaddle(option.model_file, option.params_file, ort_option);
    }
    std::string model_buffer, params_buffer;
    FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
             "Failed to read model file.");
    FDASSERT(ReadBinaryFromFile(option.params_file, &params_buffer),
             "Failed to read parameters file.");
    return InitFromPaddle(model_buffer, params_buffer, ort_option);
  } else if (option.model_format == ModelFormat::ONNX) {
    if (option.model_from_memory_) {
      return InitFromOnnx(option.model_file, ort_option);
    }
    std::string model_buffer;
    FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
             "Failed to read model file.");
    return InitFromOnnx(model_buffer, ort_option);
  } else {
    FDERROR << "Only support Paddle/ONNX model format for OrtBackend."
            << std::endl;
    return false;
  }
  return false;
 }
 bool OrtBackend::InitFromPaddle(const std::string& model_buffer,
                                const std::string& params_buffer,
                                const OrtBackendOption& option, bool verbose) {
@@ -221,7 +259,7 @@ bool OrtBackend::Infer(std::vector<FDTensor>& inputs,
  // from FDTensor to Ort Inputs
  for (size_t i = 0; i < inputs.size(); ++i) {
-    auto ort_value = CreateOrtValue(inputs[i], option_.use_gpu);
+    auto ort_value = CreateOrtValue(inputs[i], option_.device == Device::GPU);
    binding_->BindInput(inputs[i].name.c_str(), ort_value);
  }
@@ -297,7 +335,7 @@ void OrtBackend::InitCustomOperators() {
  if (custom_operators_.size() == 0) {
    MultiClassNmsOp* multiclass_nms = new MultiClassNmsOp{};
    custom_operators_.push_back(multiclass_nms);
-    if (option_.use_gpu) {
+    if (option_.device == Device::GPU) {
      AdaptivePool2dOp* adaptive_pool2d =
          new AdaptivePool2dOp{"CUDAExecutionProvider"};
      custom_operators_.push_back(adaptive_pool2d);
--- a/fastdeploy/runtime/backends/ort/ort_backend.h
+++ b/fastdeploy/runtime/backends/ort/ort_backend.h
@@ -39,13 +39,7 @@ class OrtBackend : public BaseBackend {
  void BuildOption(const OrtBackendOption& option);
-  bool InitFromPaddle(const std::string& model_buffer,
+  bool Init(const RuntimeOption& option);
                      const std::string& params_buffer,
                      const OrtBackendOption& option = OrtBackendOption(),
                      bool verbose = false);
  bool InitFromOnnx(const std::string& model_buffer,
                    const OrtBackendOption& option = OrtBackendOption());
  bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
             bool copy_to_fd = true) override;
@@ -62,6 +56,14 @@ class OrtBackend : public BaseBackend {
  void InitCustomOperators();
 private:
  bool InitFromPaddle(const std::string& model_buffer,
                      const std::string& params_buffer,
                      const OrtBackendOption& option = OrtBackendOption(),
                      bool verbose = false);
  bool InitFromOnnx(const std::string& model_buffer,
                    const OrtBackendOption& option = OrtBackendOption());
  Ort::Env env_;
  Ort::Session session_{nullptr};
  Ort::SessionOptions session_options_;
--- a/fastdeploy/runtime/runtime.cc
+++ b/fastdeploy/runtime/runtime.cc
@@ -341,53 +341,9 @@ void Runtime::CreateOpenVINOBackend() {
 }
 void Runtime::CreateOrtBackend() {
  FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
           "Backend::ORT only supports Device::CPU/Device::GPU.");
  FDASSERT(option.model_format == ModelFormat::PADDLE ||
               option.model_format == ModelFormat::ONNX,
           "OrtBackend only support model format of ModelFormat::PADDLE / "
           "ModelFormat::ONNX.");
 #ifdef ENABLE_ORT_BACKEND
  auto ort_option = OrtBackendOption();
  ort_option.graph_optimization_level = option.ort_graph_opt_level;
  ort_option.intra_op_num_threads = option.cpu_thread_num;
  ort_option.inter_op_num_threads = option.ort_inter_op_num_threads;
  ort_option.execution_mode = option.ort_execution_mode;
  ort_option.use_gpu = (option.device == Device::GPU) ? true : false;
  ort_option.gpu_id = option.device_id;
  ort_option.external_stream_ = option.external_stream_;
  backend_ = utils::make_unique<OrtBackend>();
-  auto casted_backend = dynamic_cast<OrtBackend*>(backend_.get());
+  FDASSERT(backend_->Init(option), "Failed to initialize Backend::ORT.");
  if (option.model_format == ModelFormat::ONNX) {
    if (option.model_from_memory_) {
      FDASSERT(casted_backend->InitFromOnnx(option.model_file, ort_option),
               "Load model from ONNX failed while initliazing OrtBackend.");
      ReleaseModelMemoryBuffer();
    } else {
      std::string model_buffer = "";
      FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
               "Fail to read binary from model file");
      FDASSERT(casted_backend->InitFromOnnx(model_buffer, ort_option),
               "Load model from ONNX failed while initliazing OrtBackend.");
    }
  } else {
    if (option.model_from_memory_) {
      FDASSERT(casted_backend->InitFromPaddle(option.model_file,
                                              option.params_file, ort_option),
               "Load model from Paddle failed while initliazing OrtBackend.");
      ReleaseModelMemoryBuffer();
    } else {
      std::string model_buffer = "";
      std::string params_buffer = "";
      FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
               "Fail to read binary from model file");
      FDASSERT(ReadBinaryFromFile(option.params_file, &params_buffer),
               "Fail to read binary from parameter file");
      FDASSERT(casted_backend->InitFromPaddle(model_buffer, params_buffer,
                                              ort_option),
               "Load model from Paddle failed while initliazing OrtBackend.");
    }
  }
 #else
  FDASSERT(false,
           "OrtBackend is not available, please compiled with "
--- a/fastdeploy/runtime/runtime_option.cc
+++ b/fastdeploy/runtime/runtime_option.cc
@@ -97,6 +97,7 @@ void RuntimeOption::SetCpuThreadNum(int thread_num) {
  FDASSERT(thread_num > 0, "The thread_num must be greater than 0.");
  cpu_thread_num = thread_num;
  paddle_lite_option.threads = thread_num;
  ort_option.intra_op_num_threads = thread_num;
 }
 void RuntimeOption::SetOrtGraphOptLevel(int level) {
@@ -104,7 +105,7 @@ void RuntimeOption::SetOrtGraphOptLevel(int level) {
  auto valid_level = std::find(supported_level.begin(), supported_level.end(),
                               level) != supported_level.end();
  FDASSERT(valid_level, "The level must be -1, 0, 1, 2.");
-  ort_graph_opt_level = level;
+  ort_option.graph_optimization_level = level;
 }
 // use paddle inference backend
--- a/fastdeploy/runtime/runtime_option.h
+++ b/fastdeploy/runtime/runtime_option.h
@@ -360,14 +360,7 @@ struct FASTDEPLOY_DECL RuntimeOption {
  bool enable_pinned_memory = false;
-  // ======Only for ORT Backend========
+  OrtBackendOption ort_option;
  // -1 means use default value by ort
  // 0: ORT_DISABLE_ALL 1: ORT_ENABLE_BASIC 2: ORT_ENABLE_EXTENDED 3:
  // ORT_ENABLE_ALL
  int ort_graph_opt_level = -1;
  int ort_inter_op_num_threads = -1;
  // 0: ORT_SEQUENTIAL 1: ORT_PARALLEL
  int ort_execution_mode = -1;
  // ======Only for Paddle Backend=====
  bool pd_enable_mkldnn = true;