From a711f99c694b08c509c9c14ab32f90596eda06da Mon Sep 17 00:00:00 2001
From: Jason <jiangjiajun@baidu.com>
Date: Thu, 2 Feb 2023 13:10:40 +0800
Subject: [PATCH] [Other] Optimize runtime module (#1211)

* modify ort option

* update code

* Unify backend
---
 fastdeploy/pybind/runtime.cc                  |  4 --
 fastdeploy/runtime/backends/backend.h         |  5 ++
 fastdeploy/runtime/backends/ort/option.h      |  8 +---
 .../runtime/backends/ort/ort_backend.cc       | 48 +++++++++++++++++--
 fastdeploy/runtime/backends/ort/ort_backend.h | 16 ++++---
 fastdeploy/runtime/runtime.cc                 | 46 +-----------------
 fastdeploy/runtime/runtime_option.cc          |  3 +-
 fastdeploy/runtime/runtime_option.h           |  9 +---
 8 files changed, 63 insertions(+), 76 deletions(-)

diff --git a/fastdeploy/pybind/runtime.cc b/fastdeploy/pybind/runtime.cc
index bdd3f61f8..6c5c65bc2 100644
--- a/fastdeploy/pybind/runtime.cc
+++ b/fastdeploy/pybind/runtime.cc
@@ -87,10 +87,6 @@ void BindRuntime(pybind11::module& m) {
       .def_readwrite("cpu_thread_num", &RuntimeOption::cpu_thread_num)
       .def_readwrite("device_id", &RuntimeOption::device_id)
       .def_readwrite("device", &RuntimeOption::device)
-      .def_readwrite("ort_graph_opt_level", &RuntimeOption::ort_graph_opt_level)
-      .def_readwrite("ort_inter_op_num_threads",
-                     &RuntimeOption::ort_inter_op_num_threads)
-      .def_readwrite("ort_execution_mode", &RuntimeOption::ort_execution_mode)
       .def_readwrite("trt_max_shape", &RuntimeOption::trt_max_shape)
       .def_readwrite("trt_opt_shape", &RuntimeOption::trt_opt_shape)
       .def_readwrite("trt_min_shape", &RuntimeOption::trt_min_shape)
diff --git a/fastdeploy/runtime/backends/backend.h b/fastdeploy/runtime/backends/backend.h
index 5fc6b5d7f..88a8e78a0 100644
--- a/fastdeploy/runtime/backends/backend.h
+++ b/fastdeploy/runtime/backends/backend.h
@@ -56,6 +56,11 @@ class BaseBackend {
 
   virtual bool Initialized() const { return initialized_; }
 
+  virtual bool Init(const RuntimeOption& option) {
+    FDERROR << "Not Implement Yet." << std::endl;
+    return false;
+  }
+
   // Get number of inputs of the model
   virtual int NumInputs() const = 0;
   // Get number of outputs of the model
diff --git a/fastdeploy/runtime/backends/ort/option.h b/fastdeploy/runtime/backends/ort/option.h
index 78f117b99..ca4d3254c 100644
--- a/fastdeploy/runtime/backends/ort/option.h
+++ b/fastdeploy/runtime/backends/ort/option.h
@@ -34,12 +34,8 @@ struct OrtBackendOption {
   // 0: ORT_SEQUENTIAL
   // 1: ORT_PARALLEL
   int execution_mode = -1;
-  bool use_gpu = false;
-  int gpu_id = 0;
+  Device device = Device::CPU;
+  int device_id = 0;
   void* external_stream_ = nullptr;
-
-  // inside parameter, maybe remove next version
-  bool remove_multiclass_nms_ = false;
-  std::map<std::string, std::string> custom_op_info_;
 };
 }  // namespace fastdeploy
diff --git a/fastdeploy/runtime/backends/ort/ort_backend.cc b/fastdeploy/runtime/backends/ort/ort_backend.cc
index 219cce3cf..70cb18121 100644
--- a/fastdeploy/runtime/backends/ort/ort_backend.cc
+++ b/fastdeploy/runtime/backends/ort/ort_backend.cc
@@ -45,7 +45,7 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) {
   if (option.execution_mode >= 0) {
     session_options_.SetExecutionMode(ExecutionMode(option.execution_mode));
   }
-  if (option.use_gpu) {
+  if (option.device == Device::GPU) {
     auto all_providers = Ort::GetAvailableProviders();
     bool support_cuda = false;
     std::string providers_msg = "";
@@ -60,10 +60,10 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) {
                    "support GPU, the available providers are "
                 << providers_msg << "will fallback to CPUExecutionProvider."
                 << std::endl;
-      option_.use_gpu = false;
+      option_.device = Device::CPU;
     } else {
       OrtCUDAProviderOptions cuda_options;
-      cuda_options.device_id = option.gpu_id;
+      cuda_options.device_id = option.device_id;
       if (option.external_stream_) {
         cuda_options.has_user_compute_stream = 1;
         cuda_options.user_compute_stream = option.external_stream_;
@@ -73,6 +73,44 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) {
   }
 }
 
+bool OrtBackend::Init(const RuntimeOption& option) {
+  if (option.device != Device::CPU && option.device != Device::GPU) {
+    FDERROR
+        << "Backend::ORT only supports Device::CPU/Device::GPU, but now its "
+        << option.device << "." << std::endl;
+    return false;
+  }
+  OrtBackendOption ort_option = option.ort_option;
+  ort_option.device = option.device;
+  ort_option.device_id = option.device_id;
+  ort_option.external_stream_ = option.external_stream_;
+
+  if (option.model_format == ModelFormat::PADDLE) {
+    if (option.model_from_memory_) {
+      return InitFromPaddle(option.model_file, option.params_file, ort_option);
+    }
+    std::string model_buffer, params_buffer;
+    FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
+             "Failed to read model file.");
+    FDASSERT(ReadBinaryFromFile(option.params_file, &params_buffer),
+             "Failed to read parameters file.");
+    return InitFromPaddle(model_buffer, params_buffer, ort_option);
+  } else if (option.model_format == ModelFormat::ONNX) {
+    if (option.model_from_memory_) {
+      return InitFromOnnx(option.model_file, ort_option);
+    }
+    std::string model_buffer;
+    FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
+             "Failed to read model file.");
+    return InitFromOnnx(model_buffer, ort_option);
+  } else {
+    FDERROR << "Only support Paddle/ONNX model format for OrtBackend."
+            << std::endl;
+    return false;
+  }
+  return false;
+}
+
 bool OrtBackend::InitFromPaddle(const std::string& model_buffer,
                                 const std::string& params_buffer,
                                 const OrtBackendOption& option, bool verbose) {
@@ -221,7 +259,7 @@ bool OrtBackend::Infer(std::vector<FDTensor>& inputs,
 
   // from FDTensor to Ort Inputs
   for (size_t i = 0; i < inputs.size(); ++i) {
-    auto ort_value = CreateOrtValue(inputs[i], option_.use_gpu);
+    auto ort_value = CreateOrtValue(inputs[i], option_.device == Device::GPU);
     binding_->BindInput(inputs[i].name.c_str(), ort_value);
   }
 
@@ -297,7 +335,7 @@ void OrtBackend::InitCustomOperators() {
   if (custom_operators_.size() == 0) {
     MultiClassNmsOp* multiclass_nms = new MultiClassNmsOp{};
     custom_operators_.push_back(multiclass_nms);
-    if (option_.use_gpu) {
+    if (option_.device == Device::GPU) {
       AdaptivePool2dOp* adaptive_pool2d =
           new AdaptivePool2dOp{"CUDAExecutionProvider"};
       custom_operators_.push_back(adaptive_pool2d);
diff --git a/fastdeploy/runtime/backends/ort/ort_backend.h b/fastdeploy/runtime/backends/ort/ort_backend.h
index f3942dc45..61308b9da 100644
--- a/fastdeploy/runtime/backends/ort/ort_backend.h
+++ b/fastdeploy/runtime/backends/ort/ort_backend.h
@@ -39,13 +39,7 @@ class OrtBackend : public BaseBackend {
 
   void BuildOption(const OrtBackendOption& option);
 
-  bool InitFromPaddle(const std::string& model_buffer,
-                      const std::string& params_buffer,
-                      const OrtBackendOption& option = OrtBackendOption(),
-                      bool verbose = false);
-
-  bool InitFromOnnx(const std::string& model_buffer,
-                    const OrtBackendOption& option = OrtBackendOption());
+  bool Init(const RuntimeOption& option);
 
   bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
              bool copy_to_fd = true) override;
@@ -62,6 +56,14 @@ class OrtBackend : public BaseBackend {
   void InitCustomOperators();
 
  private:
+  bool InitFromPaddle(const std::string& model_buffer,
+                      const std::string& params_buffer,
+                      const OrtBackendOption& option = OrtBackendOption(),
+                      bool verbose = false);
+
+  bool InitFromOnnx(const std::string& model_buffer,
+                    const OrtBackendOption& option = OrtBackendOption());
+
   Ort::Env env_;
   Ort::Session session_{nullptr};
   Ort::SessionOptions session_options_;
diff --git a/fastdeploy/runtime/runtime.cc b/fastdeploy/runtime/runtime.cc
index e5df860e7..f0347805d 100644
--- a/fastdeploy/runtime/runtime.cc
+++ b/fastdeploy/runtime/runtime.cc
@@ -341,53 +341,9 @@ void Runtime::CreateOpenVINOBackend() {
 }
 
 void Runtime::CreateOrtBackend() {
-  FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
-           "Backend::ORT only supports Device::CPU/Device::GPU.");
-  FDASSERT(option.model_format == ModelFormat::PADDLE ||
-               option.model_format == ModelFormat::ONNX,
-           "OrtBackend only support model format of ModelFormat::PADDLE / "
-           "ModelFormat::ONNX.");
 #ifdef ENABLE_ORT_BACKEND
-  auto ort_option = OrtBackendOption();
-  ort_option.graph_optimization_level = option.ort_graph_opt_level;
-  ort_option.intra_op_num_threads = option.cpu_thread_num;
-  ort_option.inter_op_num_threads = option.ort_inter_op_num_threads;
-  ort_option.execution_mode = option.ort_execution_mode;
-  ort_option.use_gpu = (option.device == Device::GPU) ? true : false;
-  ort_option.gpu_id = option.device_id;
-  ort_option.external_stream_ = option.external_stream_;
   backend_ = utils::make_unique<OrtBackend>();
-  auto casted_backend = dynamic_cast<OrtBackend*>(backend_.get());
-  if (option.model_format == ModelFormat::ONNX) {
-    if (option.model_from_memory_) {
-      FDASSERT(casted_backend->InitFromOnnx(option.model_file, ort_option),
-               "Load model from ONNX failed while initliazing OrtBackend.");
-      ReleaseModelMemoryBuffer();
-    } else {
-      std::string model_buffer = "";
-      FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
-               "Fail to read binary from model file");
-      FDASSERT(casted_backend->InitFromOnnx(model_buffer, ort_option),
-               "Load model from ONNX failed while initliazing OrtBackend.");
-    }
-  } else {
-    if (option.model_from_memory_) {
-      FDASSERT(casted_backend->InitFromPaddle(option.model_file,
-                                              option.params_file, ort_option),
-               "Load model from Paddle failed while initliazing OrtBackend.");
-      ReleaseModelMemoryBuffer();
-    } else {
-      std::string model_buffer = "";
-      std::string params_buffer = "";
-      FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
-               "Fail to read binary from model file");
-      FDASSERT(ReadBinaryFromFile(option.params_file, &params_buffer),
-               "Fail to read binary from parameter file");
-      FDASSERT(casted_backend->InitFromPaddle(model_buffer, params_buffer,
-                                              ort_option),
-               "Load model from Paddle failed while initliazing OrtBackend.");
-    }
-  }
+  FDASSERT(backend_->Init(option), "Failed to initialize Backend::ORT.");
 #else
   FDASSERT(false,
            "OrtBackend is not available, please compiled with "
diff --git a/fastdeploy/runtime/runtime_option.cc b/fastdeploy/runtime/runtime_option.cc
index 29e94c1dd..0798786b9 100644
--- a/fastdeploy/runtime/runtime_option.cc
+++ b/fastdeploy/runtime/runtime_option.cc
@@ -97,6 +97,7 @@ void RuntimeOption::SetCpuThreadNum(int thread_num) {
   FDASSERT(thread_num > 0, "The thread_num must be greater than 0.");
   cpu_thread_num = thread_num;
   paddle_lite_option.threads = thread_num;
+  ort_option.intra_op_num_threads = thread_num;
 }
 
 void RuntimeOption::SetOrtGraphOptLevel(int level) {
@@ -104,7 +105,7 @@ void RuntimeOption::SetOrtGraphOptLevel(int level) {
   auto valid_level = std::find(supported_level.begin(), supported_level.end(),
                                level) != supported_level.end();
   FDASSERT(valid_level, "The level must be -1, 0, 1, 2.");
-  ort_graph_opt_level = level;
+  ort_option.graph_optimization_level = level;
 }
 
 // use paddle inference backend
diff --git a/fastdeploy/runtime/runtime_option.h b/fastdeploy/runtime/runtime_option.h
index 8681b0c9c..3aea02e0d 100644
--- a/fastdeploy/runtime/runtime_option.h
+++ b/fastdeploy/runtime/runtime_option.h
@@ -360,14 +360,7 @@ struct FASTDEPLOY_DECL RuntimeOption {
 
   bool enable_pinned_memory = false;
 
-  // ======Only for ORT Backend========
-  // -1 means use default value by ort
-  // 0: ORT_DISABLE_ALL 1: ORT_ENABLE_BASIC 2: ORT_ENABLE_EXTENDED 3:
-  // ORT_ENABLE_ALL
-  int ort_graph_opt_level = -1;
-  int ort_inter_op_num_threads = -1;
-  // 0: ORT_SEQUENTIAL 1: ORT_PARALLEL
-  int ort_execution_mode = -1;
+  OrtBackendOption ort_option;
 
   // ======Only for Paddle Backend=====
   bool pd_enable_mkldnn = true;