[Other] Optimize runtime module (#1195)

* Optimize code * optimize code * fix bug
2025-10-07 01:22:59 +08:00 · 2023-01-31 10:11:16 +08:00
parent c232ceca77
commit 54ceb93670
7 changed files with 188 additions and 130 deletions
--- a/fastdeploy/runtime/backends/backend.h
+++ b/fastdeploy/runtime/backends/backend.h
@@ -55,17 +55,27 @@ class BaseBackend {
  virtual bool Initialized() const { return initialized_; }
  // Get number of inputs of the model
  virtual int NumInputs() const = 0;
  // Get number of outputs of the model
  virtual int NumOutputs() const = 0;
  // Get information of input tensor
  virtual TensorInfo GetInputInfo(int index) = 0;
  // Get information of output tensor
  virtual TensorInfo GetOutputInfo(int index) = 0;
  // Get information of all the input tensors
  virtual std::vector<TensorInfo> GetInputInfos() = 0;
  // Get information of all the output tensors
  virtual std::vector<TensorInfo> GetOutputInfos() = 0;
  // if copy_to_fd is true, copy memory data to FDTensor
  // else share memory to FDTensor(only Paddle、ORT、TRT、OpenVINO support it)
  virtual bool Infer(std::vector<FDTensor>& inputs,
                     std::vector<FDTensor>* outputs,
                     bool copy_to_fd = true) = 0;
  // Optional: For those backends which can share memory
  // while creating multiple inference engines with same model file
  virtual std::unique_ptr<BaseBackend> Clone(void *stream = nullptr,
                                             int device_id = -1) {
    FDERROR << "Clone no support" << std::endl;
--- a/fastdeploy/runtime/enum_variables.cc
+++ b/fastdeploy/runtime/enum_variables.cc
@@ -78,8 +78,9 @@ std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
    out << "ModelFormat::SOPHGO";
  } else if (format == ModelFormat::TORCHSCRIPT) {
    out << "ModelFormat::TORCHSCRIPT";
-  }
+  } else {
    out << "UNKNOWN-ModelFormat";
  }
  return out;
 }
--- a/fastdeploy/runtime/enum_variables.h
+++ b/fastdeploy/runtime/enum_variables.h
@@ -72,7 +72,8 @@ enum ModelFormat {
 };
 /// Describle all the supported backends for specified model format
-static std::map<ModelFormat, std::vector<Backend>> s_default_backends_cfg = {
+static std::map<ModelFormat, std::vector<Backend>>
    s_default_backends_by_format = {
  {ModelFormat::PADDLE, {Backend::PDINFER, Backend::LITE,
                      Backend::ORT, Backend::OPENVINO, Backend::TRT}},
  {ModelFormat::ONNX, {Backend::ORT, Backend::OPENVINO, Backend::TRT}},
@@ -81,8 +82,22 @@ static std::map<ModelFormat, std::vector<Backend>> s_default_backends_cfg = {
  {ModelFormat::SOPHGO, {Backend::SOPHGOTPU}}
 };
 /// Describle all the supported backends for specified device
 static std::map<Device, std::vector<Backend>>
    s_default_backends_by_device = {
  {Device::CPU, {Backend::LITE, Backend::PDINFER, Backend::ORT,
                Backend::OPENVINO, Backend::POROS}},
  {Device::GPU, {Backend::PDINFER, Backend::ORT, Backend::TRT, Backend::POROS}},
  {Device::RKNPU, {Backend::RKNPU2}},
  {Device::IPU, {Backend::PDINFER}},
  {Device::TIMVX, {Backend::LITE}},
  {Device::KUNLUNXIN, {Backend::LITE}},
  {Device::ASCEND, {Backend::LITE}},
  {Device::SOPHGOTPUD, {Backend::SOPHGOTPU}}
 };
 FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const Backend& b);
 FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const Device& d);
 FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const ModelFormat& f);
 }  // namespace fastdeploy
--- a/fastdeploy/runtime/runtime.cc
+++ b/fastdeploy/runtime/runtime.cc
@@ -51,55 +51,82 @@
 namespace fastdeploy {
-bool Runtime::Init(const RuntimeOption& _option) {
+bool AutoSelectBackend(RuntimeOption& option) {
-  option = _option;
+  auto iter0 = s_default_backends_by_format.find(option.model_format);
-  // Choose default backend by model format
+  if (iter0 == s_default_backends_by_format.end()) {
  if (option.backend == Backend::UNKNOWN) {
    auto iter = s_default_backends_cfg.find(option.model_format);
    if (iter == s_default_backends_cfg.end()) {
    FDERROR << "Cannot found a default backend for model format: "
            << option.model_format
            << ", please define the inference backend in RuntimeOption."
            << std::endl;
    return false;
  }
-    for (const auto& b : iter->second) {
+
  auto iter1 = s_default_backends_by_device.find(option.device);
  if (iter1 == s_default_backends_by_device.end()) {
    FDERROR << "Cannot found a default backend for device: " << option.device
            << ", please define the inference backend in RuntimeOption."
            << std::endl;
    return false;
  }
  std::vector<Backend> candidates;
  for (const auto& b0 : iter0->second) {
    for (const auto& b1 : iter1->second) {
      if (b0 == b1) {
        candidates.push_back(b0);
      }
    }
  }
  if (candidates.size() == 0) {
    FDERROR << "Cannot found availabel inference backends by model format: "
            << option.model_format << " with device: " << option.device
            << std::endl;
    return false;
  }
  for (const auto& b : candidates) {
    if (IsBackendAvailable(b)) {
      option.backend = b;
      FDINFO << "FastDeploy will choose " << b << " to inference this model."
             << std::endl;
      return true;
    }
  }
  std::string debug_message = Str(candidates);
  FDERROR << "The candiate backends for " << option.model_format << " & "
          << option.device << " are " << debug_message
          << ", but both of them have not been compiled with current "
             "FastDeploy yet."
          << std::endl;
  return false;
 }
 bool Runtime::Init(const RuntimeOption& _option) {
  option = _option;
  // Choose default backend by model format and device if backend is not
  // specified
  if (option.backend == Backend::UNKNOWN) {
-      FDERROR << "Cannot found available backends for model format: "
+    if (!AutoSelectBackend(option)) {
              << option.model_format << "." << std::endl;
      return false;
    }
  }
  if (option.backend == Backend::ORT) {
    FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
             "Backend::ORT only supports Device::CPU/Device::GPU.");
    CreateOrtBackend();
    FDINFO << "Runtime initialized with Backend::ORT in " << option.device
           << "." << std::endl;
  } else if (option.backend == Backend::TRT) {
    FDASSERT(option.device == Device::GPU,
             "Backend::TRT only supports Device::GPU.");
    CreateTrtBackend();
    FDINFO << "Runtime initialized with Backend::TRT in " << option.device
           << "." << std::endl;
  } else if (option.backend == Backend::PDINFER) {
    FDASSERT(
        option.device == Device::CPU || option.device == Device::GPU ||
            option.device == Device::IPU,
        "Backend::PDINFER only supports Device::CPU/Device::GPU/Device::IPU.");
    FDASSERT(
        option.model_format == ModelFormat::PADDLE,
        "Backend::PDINFER only supports model format of ModelFormat::PADDLE.");
    CreatePaddleBackend();
-    FDINFO << "Runtime initialized with Backend::PDINFER in " << option.device
+  } else if (option.backend == Backend::OPENVINO) {
-           << "." << std::endl;
+    CreateOpenVINOBackend();
  } else if (option.backend == Backend::LITE) {
    CreateLiteBackend();
  } else if (option.backend == Backend::RKNPU2) {
    CreateRKNPU2Backend();
  } else if (option.backend == Backend::SOPHGOTPU) {
    CreateSophgoNPUBackend();
  } else if (option.backend == Backend::POROS) {
    FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
             "Backend::POROS only supports Device::CPU/Device::GPU.");
@@ -109,35 +136,6 @@ bool Runtime::Init(const RuntimeOption& _option) {
    FDINFO << "Runtime initialized with Backend::POROS in " << option.device
           << "." << std::endl;
    return true;
  } else if (option.backend == Backend::OPENVINO) {
    FDASSERT(option.device == Device::CPU,
             "Backend::OPENVINO only supports Device::CPU");
    CreateOpenVINOBackend();
    FDINFO << "Runtime initialized with Backend::OPENVINO in " << option.device
           << "." << std::endl;
  } else if (option.backend == Backend::LITE) {
    FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX ||
                 option.device == Device::KUNLUNXIN ||
                 option.device == Device::ASCEND,
             "Backend::LITE only supports "
             "Device::CPU/Device::TIMVX/Device::KUNLUNXIN.");
    CreateLiteBackend();
    FDINFO << "Runtime initialized with Backend::LITE in " << option.device
           << "." << std::endl;
  } else if (option.backend == Backend::RKNPU2) {
    FDASSERT(option.device == Device::RKNPU,
             "Backend::RKNPU2 only supports Device::RKNPU2");
    CreateRKNPU2Backend();
    FDINFO << "Runtime initialized with Backend::RKNPU2 in " << option.device
           << "." << std::endl;
  } else if (option.backend == Backend::SOPHGOTPU) {
    FDASSERT(option.device == Device::SOPHGOTPUD,
             "Backend::SOPHGO only supports Device::SOPHGO");
    CreateSophgoNPUBackend();
    FDINFO << "Runtime initialized with Backend::SOPHGO in " << option.device
           << "." << std::endl;
  } else {
    FDERROR << "Runtime only support "
               "Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as "
@@ -211,6 +209,13 @@ FDTensor* Runtime::GetOutputTensor(const std::string& name) {
 }
 void Runtime::CreatePaddleBackend() {
  FDASSERT(
      option.device == Device::CPU || option.device == Device::GPU ||
          option.device == Device::IPU,
      "Backend::PDINFER only supports Device::CPU/Device::GPU/Device::IPU.");
  FDASSERT(
      option.model_format == ModelFormat::PADDLE,
      "Backend::PDINFER only supports model format of ModelFormat::PADDLE.");
 #ifdef ENABLE_PADDLE_BACKEND
  auto pd_option = PaddleBackendOption();
  pd_option.model_file = option.model_file;
@@ -265,8 +270,6 @@ void Runtime::CreatePaddleBackend() {
    pd_option.ipu_option = ipu_option;
  }
 #endif
  FDASSERT(option.model_format == ModelFormat::PADDLE,
           "PaddleBackend only support model format of ModelFormat::PADDLE.");
  backend_ = utils::make_unique<PaddleBackend>();
  auto casted_backend = dynamic_cast<PaddleBackend*>(backend_.get());
  if (pd_option.model_from_memory_) {
@@ -283,9 +286,17 @@ void Runtime::CreatePaddleBackend() {
           "PaddleBackend is not available, please compiled with "
           "ENABLE_PADDLE_BACKEND=ON.");
 #endif
  FDINFO << "Runtime initialized with Backend::PDINFER in " << option.device
         << "." << std::endl;
 }
 void Runtime::CreateOpenVINOBackend() {
  FDASSERT(option.device == Device::CPU,
           "Backend::OPENVINO only supports Device::CPU");
  FDASSERT(option.model_format == ModelFormat::PADDLE ||
               option.model_format == ModelFormat::ONNX,
           "OpenVINOBackend only support model format of ModelFormat::PADDLE / "
           "ModelFormat::ONNX.");
 #ifdef ENABLE_OPENVINO_BACKEND
  auto ov_option = OpenVINOBackendOption();
  ov_option.cpu_thread_num = option.cpu_thread_num;
@@ -295,10 +306,6 @@ void Runtime::CreateOpenVINOBackend() {
  for (const auto& op : option.ov_cpu_operators) {
    ov_option.cpu_operators.insert(op);
  }
  FDASSERT(option.model_format == ModelFormat::PADDLE ||
               option.model_format == ModelFormat::ONNX,
           "OpenVINOBackend only support model format of ModelFormat::PADDLE / "
           "ModelFormat::ONNX.");
  backend_ = utils::make_unique<OpenVINOBackend>();
  auto casted_backend = dynamic_cast<OpenVINOBackend*>(backend_.get());
@@ -315,9 +322,17 @@ void Runtime::CreateOpenVINOBackend() {
           "OpenVINOBackend is not available, please compiled with "
           "ENABLE_OPENVINO_BACKEND=ON.");
 #endif
  FDINFO << "Runtime initialized with Backend::OPENVINO in " << option.device
         << "." << std::endl;
 }
 void Runtime::CreateOrtBackend() {
  FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
           "Backend::ORT only supports Device::CPU/Device::GPU.");
  FDASSERT(option.model_format == ModelFormat::PADDLE ||
               option.model_format == ModelFormat::ONNX,
           "OrtBackend only support model format of ModelFormat::PADDLE / "
           "ModelFormat::ONNX.");
 #ifdef ENABLE_ORT_BACKEND
  auto ort_option = OrtBackendOption();
  ort_option.graph_optimization_level = option.ort_graph_opt_level;
@@ -328,10 +343,6 @@ void Runtime::CreateOrtBackend() {
  ort_option.gpu_id = option.device_id;
  ort_option.external_stream_ = option.external_stream_;
  FDASSERT(option.model_format == ModelFormat::PADDLE ||
               option.model_format == ModelFormat::ONNX,
           "OrtBackend only support model format of ModelFormat::PADDLE / "
           "ModelFormat::ONNX.");
  backend_ = utils::make_unique<OrtBackend>();
  auto casted_backend = dynamic_cast<OrtBackend*>(backend_.get());
  if (option.model_format == ModelFormat::ONNX) {
@@ -347,9 +358,17 @@ void Runtime::CreateOrtBackend() {
           "OrtBackend is not available, please compiled with "
           "ENABLE_ORT_BACKEND=ON.");
 #endif
  FDINFO << "Runtime initialized with Backend::ORT in " << option.device << "."
         << std::endl;
 }
 void Runtime::CreateTrtBackend() {
  FDASSERT(option.device == Device::GPU,
           "Backend::TRT only supports Device::GPU.");
  FDASSERT(option.model_format == ModelFormat::PADDLE ||
               option.model_format == ModelFormat::ONNX,
           "TrtBackend only support model format of ModelFormat::PADDLE / "
           "ModelFormat::ONNX.");
 #ifdef ENABLE_TRT_BACKEND
  auto trt_option = TrtBackendOption();
  trt_option.model_file = option.model_file;
@@ -367,10 +386,6 @@ void Runtime::CreateTrtBackend() {
  trt_option.enable_pinned_memory = option.enable_pinned_memory;
  trt_option.external_stream_ = option.external_stream_;
  FDASSERT(option.model_format == ModelFormat::PADDLE ||
               option.model_format == ModelFormat::ONNX,
           "TrtBackend only support model format of ModelFormat::PADDLE / "
           "ModelFormat::ONNX.");
  backend_ = utils::make_unique<TrtBackend>();
  auto casted_backend = dynamic_cast<TrtBackend*>(backend_.get());
  if (option.model_format == ModelFormat::ONNX) {
@@ -386,12 +401,19 @@ void Runtime::CreateTrtBackend() {
           "TrtBackend is not available, please compiled with "
           "ENABLE_TRT_BACKEND=ON.");
 #endif
  FDINFO << "Runtime initialized with Backend::TRT in " << option.device << "."
         << std::endl;
 }
 void Runtime::CreateLiteBackend() {
-#ifdef ENABLE_LITE_BACKEND
+  FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX ||
               option.device == Device::KUNLUNXIN ||
               option.device == Device::ASCEND,
           "Backend::LITE only supports "
           "Device::CPU/Device::TIMVX/Device::KUNLUNXIN/Device::ASCEND.");
  FDASSERT(option.model_format == ModelFormat::PADDLE,
           "LiteBackend only support model format of ModelFormat::PADDLE");
 #ifdef ENABLE_LITE_BACKEND
  backend_ = utils::make_unique<LiteBackend>();
  auto casted_backend = dynamic_cast<LiteBackend*>(backend_.get());
  FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file,
@@ -402,15 +424,19 @@ void Runtime::CreateLiteBackend() {
           "LiteBackend is not available, please compiled with "
           "ENABLE_LITE_BACKEND=ON.");
 #endif
  FDINFO << "Runtime initialized with Backend::LITE in " << option.device << "."
         << std::endl;
 }
 void Runtime::CreateRKNPU2Backend() {
  FDASSERT(option.device == Device::RKNPU,
           "Backend::RKNPU2 only supports Device::RKNPU2");
  FDASSERT(option.model_format == ModelFormat::RKNN,
           "RKNPU2Backend only support model format of ModelFormat::RKNN");
 #ifdef ENABLE_RKNPU2_BACKEND
  auto rknpu2_option = RKNPU2BackendOption();
  rknpu2_option.cpu_name = option.rknpu2_cpu_name_;
  rknpu2_option.core_mask = option.rknpu2_core_mask_;
  FDASSERT(option.model_format == ModelFormat::RKNN,
           "RKNPU2Backend only support model format of ModelFormat::RKNN");
  backend_ = utils::make_unique<RKNPU2Backend>();
  auto casted_backend = dynamic_cast<RKNPU2Backend*>(backend_.get());
  FDASSERT(casted_backend->InitFromRKNN(option.model_file, rknpu2_option),
@@ -420,13 +446,17 @@ void Runtime::CreateRKNPU2Backend() {
           "RKNPU2Backend is not available, please compiled with "
           "ENABLE_RKNPU2_BACKEND=ON.");
 #endif
  FDINFO << "Runtime initialized with Backend::RKNPU2 in " << option.device
         << "." << std::endl;
 }
 void Runtime::CreateSophgoNPUBackend() {
-#ifdef ENABLE_SOPHGO_BACKEND
+  FDASSERT(option.device == Device::SOPHGOTPUD,
-  auto sophgo_option = SophgoBackendOption();
+           "Backend::SOPHGO only supports Device::SOPHGO");
  FDASSERT(option.model_format == ModelFormat::SOPHGO,
           "SophgoBackend only support model format of ModelFormat::SOPHGO");
 #ifdef ENABLE_SOPHGO_BACKEND
  auto sophgo_option = SophgoBackendOption();
  backend_ = utils::make_unique<SophgoBackend>();
  auto casted_backend = dynamic_cast<SophgoBackend*>(backend_.get());
  FDASSERT(casted_backend->InitFromSophgo(option.model_file, sophgo_option),
@@ -436,6 +466,8 @@ void Runtime::CreateSophgoNPUBackend() {
           "SophgoBackend is not available, please compiled with "
           "ENABLE_SOPHGO_BACKEND=ON.");
 #endif
  FDINFO << "Runtime initialized with Backend::SOPHGO in " << option.device
         << "." << std::endl;
 }
 Runtime* Runtime::Clone(void* stream, int device_id) {
@@ -458,4 +490,36 @@ Runtime* Runtime::Clone(void* stream, int device_id) {
  return runtime;
 }
 // only for poros backend
 bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
                      const RuntimeOption& _option) {
 #ifdef ENABLE_POROS_BACKEND
  option = _option;
  auto poros_option = PorosBackendOption();
  poros_option.use_gpu = (option.device == Device::GPU) ? true : false;
  poros_option.gpu_id = option.device_id;
  poros_option.long_to_int = option.long_to_int;
  poros_option.use_nvidia_tf32 = option.use_nvidia_tf32;
  poros_option.unconst_ops_thres = option.unconst_ops_thres;
  poros_option.poros_file = option.poros_file;
  poros_option.is_dynamic = option.is_dynamic;
  poros_option.enable_fp16 = option.trt_enable_fp16;
  poros_option.max_batch_size = option.trt_max_batch_size;
  poros_option.max_workspace_size = option.trt_max_workspace_size;
  FDASSERT(
      option.model_format == ModelFormat::TORCHSCRIPT,
      "PorosBackend only support model format of ModelFormat::TORCHSCRIPT.");
  backend_ = utils::make_unique<PorosBackend>();
  auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get());
  FDASSERT(
      casted_backend->Compile(option.model_file, prewarm_tensors, poros_option),
      "Load model from Torchscript failed while initliazing PorosBackend.");
 #else
  FDASSERT(false,
           "PorosBackend is not available, please compiled with "
           "ENABLE_POROS_BACKEND=ON.");
 #endif
  return true;
 }
 }  // namespace fastdeploy
--- a/fastdeploy/runtime/runtime.h
+++ b/fastdeploy/runtime/runtime.h
@@ -51,15 +51,6 @@ struct FASTDEPLOY_DECL Runtime {
   */
  bool Infer();
  /** \brief Compile TorchScript Module, only for Poros backend
   *
   * \param[in] prewarm_tensors Prewarm datas for compile
   * \param[in] _option Runtime option
   * \return true if compile successed, otherwise false
   */
  bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
               const RuntimeOption& _option);
  /** \brief Get number of inputs
   */
  int NumInputs() { return backend_->NumInputs(); }
@@ -94,6 +85,15 @@ struct FASTDEPLOY_DECL Runtime {
  RuntimeOption option;
  /** \brief Compile TorchScript Module, only for Poros backend
   *
   * \param[in] prewarm_tensors Prewarm datas for compile
   * \param[in] _option Runtime option
   * \return true if compile successed, otherwise false
   */
  bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
               const RuntimeOption& _option);
 private:
  void CreateOrtBackend();
  void CreatePaddleBackend();
--- a/fastdeploy/runtime/runtime_option.cc
+++ b/fastdeploy/runtime/runtime_option.cc
@@ -337,37 +337,6 @@ void RuntimeOption::SetOpenVINOStreams(int num_streams) {
  ov_num_streams = num_streams;
 }
 bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
                      const RuntimeOption& _option) {
 #ifdef ENABLE_POROS_BACKEND
  option = _option;
  auto poros_option = PorosBackendOption();
  poros_option.use_gpu = (option.device == Device::GPU) ? true : false;
  poros_option.gpu_id = option.device_id;
  poros_option.long_to_int = option.long_to_int;
  poros_option.use_nvidia_tf32 = option.use_nvidia_tf32;
  poros_option.unconst_ops_thres = option.unconst_ops_thres;
  poros_option.poros_file = option.poros_file;
  poros_option.is_dynamic = option.is_dynamic;
  poros_option.enable_fp16 = option.trt_enable_fp16;
  poros_option.max_batch_size = option.trt_max_batch_size;
  poros_option.max_workspace_size = option.trt_max_workspace_size;
  FDASSERT(
      option.model_format == ModelFormat::TORCHSCRIPT,
      "PorosBackend only support model format of ModelFormat::TORCHSCRIPT.");
  backend_ = utils::make_unique<PorosBackend>();
  auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get());
  FDASSERT(
      casted_backend->Compile(option.model_file, prewarm_tensors, poros_option),
      "Load model from Torchscript failed while initliazing PorosBackend.");
 #else
  FDASSERT(false,
           "PorosBackend is not available, please compiled with "
           "ENABLE_POROS_BACKEND=ON.");
 #endif
  return true;
 }
 void RuntimeOption::EnablePaddleTrtCollectShape() { pd_collect_shape = true; }
 void RuntimeOption::DisablePaddleTrtCollectShape() { pd_collect_shape = false; }
--- a/fastdeploy/utils/utils.h
+++ b/fastdeploy/utils/utils.h
@@ -203,8 +203,7 @@ FASTDEPLOY_DECL bool ReadBinaryFromFile(const std::string& file,
 FASTDEPLOY_DECL std::vector<int64_t>
 GetStride(const std::vector<int64_t>& dims);
-template <typename T, typename std::enable_if<std::is_integral<T>::value,
+template <typename T>
                                              bool>::type = true>
 std::string Str(const std::vector<T>& shape) {
  std::ostringstream oss;
  oss << "[ " << shape[0];