[Other] [Part2] Upgrade runtime module (#1080)

[Other] Upgrade runtime module
2025-10-05 08:37:06 +08:00 · 2023-01-09 13:22:51 +08:00
parent cbf88a46fa
commit 4aa4ebd7c3
53 changed files with 312 additions and 374 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -185,15 +185,15 @@ configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/core/config.h.
 configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/main.cc.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/main.cc)
 file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*.cc)
 file(GLOB_RECURSE FDTENSOR_FUNC_CUDA_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/function/*.cu)
-file(GLOB_RECURSE DEPLOY_OP_CUDA_KERNEL_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/op_cuda_kernels/*.cu)
-file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/ort/*.cc)
-file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/paddle/*.cc)
-file(GLOB_RECURSE DEPLOY_POROS_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/poros/*.cc)
-file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cpp)
-file(GLOB_RECURSE DEPLOY_OPENVINO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/openvino/*.cc)
-file(GLOB_RECURSE DEPLOY_RKNPU2_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/rknpu2/*.cc)
-file(GLOB_RECURSE DEPLOY_SOPHGO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/sophgo/*.cc)
-file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/lite/*.cc)
+file(GLOB_RECURSE DEPLOY_OP_CUDA_KERNEL_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/op_cuda_kernels/*.cu)
+file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/ort/*.cc)
+file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/paddle/*.cc)
+file(GLOB_RECURSE DEPLOY_POROS_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/poros/*.cc)
+file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/tensorrt/*.cpp)
+file(GLOB_RECURSE DEPLOY_OPENVINO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/openvino/*.cc)
+file(GLOB_RECURSE DEPLOY_RKNPU2_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/rknpu2/*.cc)
+file(GLOB_RECURSE DEPLOY_SOPHGO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/sophgo/*.cc)
+file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/lite/*.cc)
 file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cc)
 file(GLOB_RECURSE DEPLOY_ENCRYPTION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/encryption/*.cc)
 file(GLOB_RECURSE DEPLOY_PIPELINE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pipeline/*.cc)
@@ -289,7 +289,7 @@ if(ENABLE_POROS_BACKEND)
  else ()
    message(STATUS "site-packages: ${Python3_SITELIB}")
  endif ()
-  include_directories(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/poros/common)
+  include_directories(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/poros/common)
  # find trt
  if(NOT WITH_GPU)
  message(FATAL_ERROR "While -DENABLE_POROS_BACKEND=ON, must set -DWITH_GPU=ON, but now it's OFF")
@@ -377,7 +377,7 @@ if(ENABLE_TRT_BACKEND)

  add_definitions(-DENABLE_TRT_BACKEND)
  include_directories(${TRT_INC_DIR})
-  include_directories(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/common)
+  include_directories(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/tensorrt/common)
  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_TRT_SRCS})
  find_library(TRT_INFER_LIB nvinfer ${TRT_LIB_DIR} NO_DEFAULT_PATH)
  find_library(TRT_ONNX_LIB nvonnxparser ${TRT_LIB_DIR} NO_DEFAULT_PATH)
@@ -574,7 +574,7 @@ install(
  DESTINATION ${CMAKE_INSTALL_PREFIX}/include
  FILES_MATCHING
  PATTERN "*.h"
-  PATTERN "${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/*/*.h"
+  PATTERN "${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/*/*.h"
 )
 install(
  DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install
--- a/fastdeploy/fastdeploy_model.cc
+++ b/fastdeploy/fastdeploy_model.cc
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fastdeploy/fastdeploy_model.h"
+
 #include "fastdeploy/utils/utils.h"

 namespace fastdeploy {
@@ -42,8 +43,7 @@ bool IsSupported(const std::vector<Backend>& backends, Backend backend) {
 bool FastDeployModel::InitRuntimeWithSpecifiedBackend() {
  if (!IsBackendAvailable(runtime_option.backend)) {
    FDERROR << runtime_option.backend
-            << " is not compiled with current FastDeploy library."
-            << std::endl;
+            << " is not compiled with current FastDeploy library." << std::endl;
    return false;
  }

@@ -57,42 +57,58 @@ bool FastDeployModel::InitRuntimeWithSpecifiedBackend() {

  if (use_gpu) {
    if (!IsSupported(valid_gpu_backends, runtime_option.backend)) {
-      FDERROR << "The valid gpu backends of model " << ModelName() << " are " << Str(valid_gpu_backends) << ", " << runtime_option.backend << " is not supported." << std::endl;
+      FDERROR << "The valid gpu backends of model " << ModelName() << " are "
+              << Str(valid_gpu_backends) << ", " << runtime_option.backend
+              << " is not supported." << std::endl;
      return false;
    }
  } else if (use_rknpu) {
    if (!IsSupported(valid_rknpu_backends, runtime_option.backend)) {
-      FDERROR << "The valid rknpu backends of model " << ModelName() << " are " << Str(valid_rknpu_backends) << ", " << runtime_option.backend << " is not supported." << std::endl;
+      FDERROR << "The valid rknpu backends of model " << ModelName() << " are "
+              << Str(valid_rknpu_backends) << ", " << runtime_option.backend
+              << " is not supported." << std::endl;
      return false;
    }
  } else if (use_sophgotpu) {
    if (!IsSupported(valid_sophgonpu_backends, runtime_option.backend)) {
-      FDERROR << "The valid rknpu backends of model " << ModelName() << " are " << Str(valid_rknpu_backends) << ", " << runtime_option.backend << " is not supported." << std::endl;
+      FDERROR << "The valid rknpu backends of model " << ModelName() << " are "
+              << Str(valid_rknpu_backends) << ", " << runtime_option.backend
+              << " is not supported." << std::endl;
      return false;
    }
  } else if (use_timvx) {
    if (!IsSupported(valid_timvx_backends, runtime_option.backend)) {
-      FDERROR << "The valid timvx backends of model " << ModelName() << " are " << Str(valid_timvx_backends) << ", " << runtime_option.backend << " is not supported." << std::endl;
+      FDERROR << "The valid timvx backends of model " << ModelName() << " are "
+              << Str(valid_timvx_backends) << ", " << runtime_option.backend
+              << " is not supported." << std::endl;
      return false;
    }
  } else if (use_ascend) {
    if (!IsSupported(valid_ascend_backends, runtime_option.backend)) {
-      FDERROR << "The valid ascend backends of model " << ModelName() << " are " << Str(valid_ascend_backends) << ", " << runtime_option.backend << " is not supported." << std::endl;
+      FDERROR << "The valid ascend backends of model " << ModelName() << " are "
+              << Str(valid_ascend_backends) << ", " << runtime_option.backend
+              << " is not supported." << std::endl;
      return false;
    }
  } else if (use_kunlunxin) {
    if (!IsSupported(valid_kunlunxin_backends, runtime_option.backend)) {
-      FDERROR << "The valid kunlunxin backends of model " << ModelName() << " are " << Str(valid_kunlunxin_backends) << ", " << runtime_option.backend << " is not supported." << std::endl;
+      FDERROR << "The valid kunlunxin backends of model " << ModelName()
+              << " are " << Str(valid_kunlunxin_backends) << ", "
+              << runtime_option.backend << " is not supported." << std::endl;
      return false;
    }
-  } else if(use_ipu) {
+  } else if (use_ipu) {
    if (!IsSupported(valid_ipu_backends, runtime_option.backend)) {
-      FDERROR << "The valid ipu backends of model " << ModelName() << " are " << Str(valid_ipu_backends) << ", " << runtime_option.backend << " is not supported." << std::endl;
+      FDERROR << "The valid ipu backends of model " << ModelName() << " are "
+              << Str(valid_ipu_backends) << ", " << runtime_option.backend
+              << " is not supported." << std::endl;
      return false;
    }
  } else {
    if (!IsSupported(valid_cpu_backends, runtime_option.backend)) {
-      FDERROR << "The valid cpu backends of model " << ModelName() << " are " << Str(valid_cpu_backends) << ", " << runtime_option.backend << " is not supported." << std::endl;
+      FDERROR << "The valid cpu backends of model " << ModelName() << " are "
+              << Str(valid_cpu_backends) << ", " << runtime_option.backend
+              << " is not supported." << std::endl;
      return false;
    }
  }
@@ -135,16 +151,12 @@ bool FastDeployModel::InitRuntimeWithSpecifiedDevice() {
    return false;
 #endif
  }
-  FDERROR << "Only support CPU/GPU/IPU/RKNPU/TIMVX/KunlunXin/ASCEND now." << std::endl;
+  FDERROR << "Only support CPU/GPU/IPU/RKNPU/TIMVX/KunlunXin/ASCEND now."
+          << std::endl;
  return false;
 }

 bool FastDeployModel::InitRuntime() {
-  if (!runtime_option.model_from_memory_) {
-    FDASSERT(
-        CheckModelFormat(runtime_option.model_file, runtime_option.model_format),
-        "ModelFormatCheck Failed.");
-  }
  if (runtime_initialized_) {
    FDERROR << "The model is already initialized, cannot be initliazed again."
            << std::endl;
@@ -298,7 +310,6 @@ bool FastDeployModel::CreateKunlunXinBackend() {
  return false;
 }

-
 bool FastDeployModel::CreateASCENDBackend() {
  if (valid_ascend_backends.size() == 0) {
    FDERROR << "There's no valid ascend backends for model: " << ModelName()
@@ -322,7 +333,6 @@ bool FastDeployModel::CreateASCENDBackend() {
  return false;
 }

-
 bool FastDeployModel::CreateIpuBackend() {
  if (valid_ipu_backends.size() == 0) {
    FDERROR << "There's no valid ipu backends for model: " << ModelName()
--- a/fastdeploy/pybind/rknpu2_config_pybind.cc
+++ b/fastdeploy/pybind/rknpu2_config_pybind.cc
@@ -11,8 +11,8 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "fastdeploy/backends/rknpu2/option.h"
 #include "fastdeploy/pybind/main.h"
+#include "fastdeploy/runtime/backends/rknpu2/option.h"
 namespace fastdeploy {
 void BindRKNPU2Config(pybind11::module& m) {
  pybind11::enum_<fastdeploy::rknpu2::CpuName>(
--- a/fastdeploy/runtime/backends/backend.h
+++ b/fastdeploy/runtime/backends/backend.h
--- a/fastdeploy/runtime/backends/lite/lite_backend.cc
+++ b/fastdeploy/runtime/backends/lite/lite_backend.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "fastdeploy/backends/lite/lite_backend.h"
+#include "fastdeploy/runtime/backends/lite/lite_backend.h"

 #include <cstring>

@@ -43,18 +43,18 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
  option_ = option;
  std::vector<paddle::lite_api::Place> valid_places;
  if (option_.enable_int8) {
-    if(option_.enable_kunlunxin) {
+    if (option_.enable_kunlunxin) {
      valid_places.push_back(
          paddle::lite_api::Place{TARGET(kXPU), PRECISION(kInt8)});
    } else {
      valid_places.push_back(
-        paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)});
+          paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)});
    }
    FDINFO << "Lite::Backend enable_int8 option is ON ! Lite::Backend will "
           << "inference with int8 precision!" << std::endl;
  }
  if (option_.enable_fp16) {
-    if(option_.enable_kunlunxin){
+    if (option_.enable_kunlunxin) {
      valid_places.push_back(
          paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFP16)});
    } else {
@@ -66,7 +66,9 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
      if (supported_fp16_) {
        valid_places.push_back(
            paddle::lite_api::Place{TARGET(kARM), PRECISION(kFP16)});
-        FDINFO << "The device supports FP16, Lite::Backend will inference with FP16 precision." << std::endl;    
+        FDINFO << "The device supports FP16, Lite::Backend will inference with "
+                  "FP16 precision."
+               << std::endl;
      } else {
        FDWARNING << "The device doesn't support FP16, will fallback to FP32.";
      }
@@ -74,49 +76,51 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
  }
  if (!option_.nnadapter_subgraph_partition_config_path.empty()) {
    std::vector<char> nnadapter_subgraph_partition_config_buffer;
-    if (ReadFile(option_.nnadapter_subgraph_partition_config_path, &nnadapter_subgraph_partition_config_buffer, false)) {
+    if (ReadFile(option_.nnadapter_subgraph_partition_config_path,
+                 &nnadapter_subgraph_partition_config_buffer, false)) {
      if (!nnadapter_subgraph_partition_config_buffer.empty()) {
-        std::string nnadapter_subgraph_partition_config_string(nnadapter_subgraph_partition_config_buffer.data(), nnadapter_subgraph_partition_config_buffer.size());
-        config_.set_nnadapter_subgraph_partition_config_buffer(nnadapter_subgraph_partition_config_string);
+        std::string nnadapter_subgraph_partition_config_string(
+            nnadapter_subgraph_partition_config_buffer.data(),
+            nnadapter_subgraph_partition_config_buffer.size());
+        config_.set_nnadapter_subgraph_partition_config_buffer(
+            nnadapter_subgraph_partition_config_string);
      }
    }
  }
-  if(option_.enable_timvx) {
+  if (option_.enable_timvx) {
    config_.set_nnadapter_device_names({"verisilicon_timvx"});
    valid_places.push_back(
-          paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)});
+        paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)});
    valid_places.push_back(
        paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)});
    valid_places.push_back(
        paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)});
  }

-  if(option_.enable_ascend){
-    
-    if(option_.nnadapter_device_names.empty()){
+  if (option_.enable_ascend) {
+    if (option_.nnadapter_device_names.empty()) {
      config_.set_nnadapter_device_names({"huawei_ascend_npu"});
    } else {
      config_.set_nnadapter_device_names(option_.nnadapter_device_names);
    }

-    if(!option_.nnadapter_context_properties.empty()){
-      config_.set_nnadapter_context_properties(option_.nnadapter_context_properties);
+    if (!option_.nnadapter_context_properties.empty()) {
+      config_.set_nnadapter_context_properties(
+          option_.nnadapter_context_properties);
    }

-    if(!option_.nnadapter_model_cache_dir.empty()){
+    if (!option_.nnadapter_model_cache_dir.empty()) {
      config_.set_nnadapter_model_cache_dir(option_.nnadapter_model_cache_dir);
    }

-    if(!option_.nnadapter_mixed_precision_quantization_config_path.empty()){
+    if (!option_.nnadapter_mixed_precision_quantization_config_path.empty()) {
      config_.set_nnadapter_mixed_precision_quantization_config_path(
-        option_.nnadapter_mixed_precision_quantization_config_path
-      );
+          option_.nnadapter_mixed_precision_quantization_config_path);
    }

-    if(!option_.nnadapter_subgraph_partition_config_path.empty()){
+    if (!option_.nnadapter_subgraph_partition_config_path.empty()) {
      config_.set_nnadapter_subgraph_partition_config_path(
-        option_.nnadapter_subgraph_partition_config_path
-      );
+          option_.nnadapter_subgraph_partition_config_path);
    }

    valid_places.push_back(
@@ -127,22 +131,26 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
        paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)});
  }

-  if(option_.enable_kunlunxin){
+  if (option_.enable_kunlunxin) {
    valid_places.push_back(
-      paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFloat)});
+        paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFloat)});
    valid_places.push_back(
-      paddle::lite_api::Place{TARGET(kX86), PRECISION(kFloat)});
+        paddle::lite_api::Place{TARGET(kX86), PRECISION(kFloat)});
    config_.set_xpu_dev_per_thread(option_.device_id);
-    config_.set_xpu_workspace_l3_size_per_thread(option_.kunlunxin_l3_workspace_size);
-    config_.set_xpu_l3_cache_method(option_.kunlunxin_l3_workspace_size, option_.kunlunxin_locked);
-    config_.set_xpu_conv_autotune(option_.kunlunxin_autotune, option_.kunlunxin_autotune_file);
-    config_.set_xpu_multi_encoder_method(option_.kunlunxin_precision, option_.kunlunxin_adaptive_seqlen);
+    config_.set_xpu_workspace_l3_size_per_thread(
+        option_.kunlunxin_l3_workspace_size);
+    config_.set_xpu_l3_cache_method(option_.kunlunxin_l3_workspace_size,
+                                    option_.kunlunxin_locked);
+    config_.set_xpu_conv_autotune(option_.kunlunxin_autotune,
+                                  option_.kunlunxin_autotune_file);
+    config_.set_xpu_multi_encoder_method(option_.kunlunxin_precision,
+                                         option_.kunlunxin_adaptive_seqlen);
    if (option_.kunlunxin_enable_multi_stream) {
      config_.enable_xpu_multi_stream();
    }
  } else {
    valid_places.push_back(
-      paddle::lite_api::Place{TARGET(kARM), PRECISION(kFloat)});
+        paddle::lite_api::Place{TARGET(kARM), PRECISION(kFloat)});
  }
  config_.set_valid_places(valid_places);
  if (option_.threads > 0) {
@@ -155,12 +163,11 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
 }

 bool LiteBackend::ReadFile(const std::string& filename,
-               std::vector<char>* contents,
-               const bool binary) {
-  FILE *fp = fopen(filename.c_str(), binary ? "rb" : "r");
-  if (!fp){
-     FDERROR << "Cannot open file " << filename << "." << std::endl;
-     return false;
+                           std::vector<char>* contents, const bool binary) {
+  FILE* fp = fopen(filename.c_str(), binary ? "rb" : "r");
+  if (!fp) {
+    FDERROR << "Cannot open file " << filename << "." << std::endl;
+    return false;
  }
  fseek(fp, 0, SEEK_END);
  size_t size = ftell(fp);
@@ -168,7 +175,7 @@ bool LiteBackend::ReadFile(const std::string& filename,
  contents->clear();
  contents->resize(size);
  size_t offset = 0;
-  char *ptr = reinterpret_cast<char *>(&(contents->at(0)));
+  char* ptr = reinterpret_cast<char*>(&(contents->at(0)));
  while (offset < size) {
    size_t already_read = fread(ptr, 1, size - offset, fp);
    offset += already_read;
@@ -196,8 +203,9 @@ bool LiteBackend::InitFromPaddle(const std::string& model_file,
  if (option_.optimized_model_dir != "") {
    FDINFO << "Optimzed model dir is not empty, will save optimized model to: "
           << option_.optimized_model_dir << std::endl;
-    predictor_->SaveOptimizedModel(option_.optimized_model_dir,
-                                   paddle::lite_api::LiteModelType::kNaiveBuffer);
+    predictor_->SaveOptimizedModel(
+        option_.optimized_model_dir,
+        paddle::lite_api::LiteModelType::kNaiveBuffer);
  }

  inputs_desc_.clear();
@@ -221,7 +229,7 @@ bool LiteBackend::InitFromPaddle(const std::string& model_file,
    auto shape = tensor->shape();
    info.shape.assign(shape.begin(), shape.end());
    info.name = output_names[i];
-    if(!option_.enable_kunlunxin){
+    if (!option_.enable_kunlunxin) {
      info.dtype = LiteDataTypeToFD(tensor->precision());
    }
    outputs_desc_.emplace_back(info);
@@ -250,8 +258,7 @@ TensorInfo LiteBackend::GetOutputInfo(int index) {
 std::vector<TensorInfo> LiteBackend::GetOutputInfos() { return outputs_desc_; }

 bool LiteBackend::Infer(std::vector<FDTensor>& inputs,
-                        std::vector<FDTensor>* outputs,
-                        bool copy_to_fd) {                                                
+                        std::vector<FDTensor>* outputs, bool copy_to_fd) {
  if (inputs.size() != inputs_desc_.size()) {
    FDERROR << "[LiteBackend] Size of inputs(" << inputs.size()
            << ") should keep same with the inputs of this model("
@@ -270,25 +277,25 @@ bool LiteBackend::Infer(std::vector<FDTensor>& inputs,
    tensor->Resize(inputs[i].shape);
    if (inputs[i].dtype == FDDataType::FP32) {
      tensor->CopyFromCpu<float, paddle::lite_api::TargetType::kHost>(
-        reinterpret_cast<const float*>(const_cast<void*>(
-        inputs[i].CpuData())));
+          reinterpret_cast<const float*>(
+              const_cast<void*>(inputs[i].CpuData())));
    } else if (inputs[i].dtype == FDDataType::INT32) {
      tensor->CopyFromCpu<int, paddle::lite_api::TargetType::kHost>(
-        reinterpret_cast<const int*>(const_cast<void*>(
-        inputs[i].CpuData())));
+          reinterpret_cast<const int*>(const_cast<void*>(inputs[i].CpuData())));
    } else if (inputs[i].dtype == FDDataType::INT8) {
      tensor->CopyFromCpu<int8_t, paddle::lite_api::TargetType::kHost>(
-        reinterpret_cast<const int8_t*>(const_cast<void*>(
-        inputs[i].CpuData())));
+          reinterpret_cast<const int8_t*>(
+              const_cast<void*>(inputs[i].CpuData())));
    } else if (inputs[i].dtype == FDDataType::UINT8) {
      tensor->CopyFromCpu<uint8_t, paddle::lite_api::TargetType::kHost>(
-        reinterpret_cast<const uint8_t*>(const_cast<void*>(
-        inputs[i].CpuData())));
+          reinterpret_cast<const uint8_t*>(
+              const_cast<void*>(inputs[i].CpuData())));
    } else if (inputs[i].dtype == FDDataType::INT64) {
-#if (defined(__aarch64__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_ARM64))      
+#if (defined(__aarch64__) || defined(__x86_64__) || defined(_M_X64) || \
+     defined(_M_ARM64))
      tensor->CopyFromCpu<int64_t, paddle::lite_api::TargetType::kHost>(
-        reinterpret_cast<const int64_t*>(const_cast<void*>(
-        inputs[i].CpuData())));
+          reinterpret_cast<const int64_t*>(
+              const_cast<void*>(inputs[i].CpuData())));
 #else
      FDASSERT(false, "FDDataType::INT64 is not support for x86/armv7 now!");
 #endif
@@ -302,7 +309,7 @@ bool LiteBackend::Infer(std::vector<FDTensor>& inputs,
  outputs->resize(outputs_desc_.size());
  for (size_t i = 0; i < outputs_desc_.size(); ++i) {
    auto tensor = predictor_->GetOutput(i);
-    if(outputs_desc_[i].dtype != LiteDataTypeToFD(tensor->precision())){
+    if (outputs_desc_[i].dtype != LiteDataTypeToFD(tensor->precision())) {
      outputs_desc_[i].dtype = LiteDataTypeToFD(tensor->precision());
    }
    (*outputs)[i].Resize(tensor->shape(), outputs_desc_[i].dtype,
--- a/fastdeploy/runtime/backends/lite/lite_backend.h
+++ b/fastdeploy/runtime/backends/lite/lite_backend.h
@@ -19,8 +19,8 @@
 #include <string>
 #include <vector>

-#include "fastdeploy/backends/backend.h"
-#include "fastdeploy/backends/lite/option.h"
+#include "fastdeploy/runtime/backends/backend.h"
+#include "fastdeploy/runtime/backends/lite/option.h"
 #include "paddle_api.h"  // NOLINT

 namespace fastdeploy {
--- a/fastdeploy/runtime/backends/lite/option.h
+++ b/fastdeploy/runtime/backends/lite/option.h
--- a/fastdeploy/runtime/backends/op_cuda_kernels/adaptive_pool2d_kernel.cu
+++ b/fastdeploy/runtime/backends/op_cuda_kernels/adaptive_pool2d_kernel.cu
--- a/fastdeploy/runtime/backends/op_cuda_kernels/adaptive_pool2d_kernel.h
+++ b/fastdeploy/runtime/backends/op_cuda_kernels/adaptive_pool2d_kernel.h
--- a/fastdeploy/runtime/backends/openvino/option.h
+++ b/fastdeploy/runtime/backends/openvino/option.h
--- a/fastdeploy/runtime/backends/openvino/ov_backend.cc
+++ b/fastdeploy/runtime/backends/openvino/ov_backend.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "fastdeploy/backends/openvino/ov_backend.h"
+#include "fastdeploy/runtime/backends/openvino/ov_backend.h"
 #ifdef ENABLE_PADDLE_FRONTEND
 #include "paddle2onnx/converter.h"
 #endif
--- a/fastdeploy/runtime/backends/openvino/ov_backend.h
+++ b/fastdeploy/runtime/backends/openvino/ov_backend.h
@@ -19,9 +19,9 @@
 #include <string>
 #include <vector>

-#include "fastdeploy/backends/backend.h"
+#include "fastdeploy/runtime/backends/backend.h"
 #include "fastdeploy/utils/unique_ptr.h"
-#include "fastdeploy/backends/openvino/option.h"
+#include "fastdeploy/runtime/backends/openvino/option.h"
 #include "openvino/openvino.hpp"

 namespace fastdeploy {
--- a/fastdeploy/runtime/backends/ort/ops/adaptive_pool2d.cc
+++ b/fastdeploy/runtime/backends/ort/ops/adaptive_pool2d.cc
@@ -104,9 +104,9 @@ void AdaptivePool2dKernel::GetAttribute(const OrtKernelInfo* info) {
      ort_.KernelInfoGetAttribute<std::string>(info, "pooling_type");
  output_size_ =
      ort_.KernelInfoGetAttribute<std::vector<int64_t>>(info, "output_size");
-  FDASSERT(output_size_.size() == 4 && output_size_[2] > 0 &&
-               output_size_[3] > 0,
-           "The output size of adaptive pool must be positive.");
+  FDASSERT(
+      output_size_.size() == 4 && output_size_[2] > 0 && output_size_[3] > 0,
+      "The output size of adaptive pool must be positive.");
 }
 }  // namespace fastdeploy

--- a/fastdeploy/runtime/backends/ort/ops/adaptive_pool2d.h
+++ b/fastdeploy/runtime/backends/ort/ops/adaptive_pool2d.h
@@ -25,7 +25,7 @@
 #include "onnxruntime_cxx_api.h"  // NOLINT

 #ifdef WITH_GPU
-#include "fastdeploy/backends/op_cuda_kernels/adaptive_pool2d_kernel.h"
+#include "fastdeploy/runtime/backends/op_cuda_kernels/adaptive_pool2d_kernel.h"
 #endif

 namespace fastdeploy {
--- a/fastdeploy/runtime/backends/ort/ops/multiclass_nms.cc
+++ b/fastdeploy/runtime/backends/ort/ops/multiclass_nms.cc
@@ -14,10 +14,12 @@

 #ifndef NON_64_PLATFORM

-#include "fastdeploy/backends/ort/ops/multiclass_nms.h"
+#include "fastdeploy/runtime/backends/ort/ops/multiclass_nms.h"
+
+#include <algorithm>
+
 #include "fastdeploy/core/fd_tensor.h"
 #include "fastdeploy/utils/utils.h"
-#include <algorithm>

 namespace fastdeploy {

--- a/fastdeploy/runtime/backends/ort/ops/multiclass_nms.h
+++ b/fastdeploy/runtime/backends/ort/ops/multiclass_nms.h
--- a/fastdeploy/runtime/backends/ort/option.h
+++ b/fastdeploy/runtime/backends/ort/option.h
--- a/fastdeploy/runtime/backends/ort/ort_backend.cc
+++ b/fastdeploy/runtime/backends/ort/ort_backend.cc
@@ -12,14 +12,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "fastdeploy/backends/ort/ort_backend.h"
+#include "fastdeploy/runtime/backends/ort/ort_backend.h"

 #include <memory>

-#include "fastdeploy/backends/ort/ops/adaptive_pool2d.h"
-#include "fastdeploy/backends/ort/ops/multiclass_nms.h"
-#include "fastdeploy/backends/ort/utils.h"
 #include "fastdeploy/core/float16.h"
+#include "fastdeploy/runtime/backends/ort/ops/adaptive_pool2d.h"
+#include "fastdeploy/runtime/backends/ort/ops/multiclass_nms.h"
+#include "fastdeploy/runtime/backends/ort/utils.h"
 #include "fastdeploy/utils/utils.h"
 #ifdef ENABLE_PADDLE_FRONTEND
 #include "paddle2onnx/converter.h"
--- a/fastdeploy/runtime/backends/ort/ort_backend.h
+++ b/fastdeploy/runtime/backends/ort/ort_backend.h
@@ -20,8 +20,8 @@
 #include <vector>
 #include <map>

-#include "fastdeploy/backends/backend.h"
-#include "fastdeploy/backends/ort/option.h"
+#include "fastdeploy/runtime/backends/backend.h"
+#include "fastdeploy/runtime/backends/ort/option.h"
 #include "onnxruntime_cxx_api.h"  // NOLINT

 namespace fastdeploy {
--- a/fastdeploy/runtime/backends/ort/utils.cc
+++ b/fastdeploy/runtime/backends/ort/utils.cc
@@ -12,7 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "fastdeploy/backends/ort/utils.h"
+#include "fastdeploy/runtime/backends/ort/utils.h"
+
 #include "fastdeploy/utils/utils.h"

 namespace fastdeploy {
--- a/fastdeploy/runtime/backends/ort/utils.h
+++ b/fastdeploy/runtime/backends/ort/utils.h
@@ -19,7 +19,7 @@
 #include <string>
 #include <vector>

-#include "fastdeploy/backends/backend.h"
+#include "fastdeploy/runtime/backends/backend.h"
 #include "onnxruntime_cxx_api.h"  // NOLINT

 namespace fastdeploy {
--- a/fastdeploy/runtime/backends/paddle/option.h
+++ b/fastdeploy/runtime/backends/paddle/option.h
@@ -19,7 +19,7 @@
 #include <memory>
 #include <string>
 #include <vector>
-#include "fastdeploy/backends/tensorrt/option.h"
+#include "fastdeploy/runtime/backends/tensorrt/option.h"


 namespace fastdeploy {
--- a/fastdeploy/runtime/backends/paddle/paddle_backend.cc
+++ b/fastdeploy/runtime/backends/paddle/paddle_backend.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "fastdeploy/backends/paddle/paddle_backend.h"
+#include "fastdeploy/runtime/backends/paddle/paddle_backend.h"

 #include <sstream>

--- a/fastdeploy/runtime/backends/paddle/paddle_backend.h
+++ b/fastdeploy/runtime/backends/paddle/paddle_backend.h
@@ -19,8 +19,8 @@
 #include <string>
 #include <vector>

-#include "fastdeploy/backends/backend.h"
-#include "fastdeploy/backends/paddle/option.h"
+#include "fastdeploy/runtime/backends/backend.h"
+#include "fastdeploy/runtime/backends/paddle/option.h"
 #ifdef ENABLE_PADDLE_FRONTEND
 #include "paddle2onnx/converter.h"
 #endif
--- a/fastdeploy/runtime/backends/paddle/util.cc
+++ b/fastdeploy/runtime/backends/paddle/util.cc
@@ -12,8 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "fastdeploy/backends/paddle/paddle_backend.h"
 #include "fastdeploy/core/float16.h"
+#include "fastdeploy/runtime/backends/paddle/paddle_backend.h"

 namespace fastdeploy {
 paddle_infer::PlaceType ConvertFDDeviceToPlace(Device device) {
--- a/fastdeploy/runtime/backends/poros/common/compile.h
+++ b/fastdeploy/runtime/backends/poros/common/compile.h
@@ -19,9 +19,9 @@
 #include <string>
 #include <unordered_map>

-#include "iengine.h"
-#include "poros_module.h"
-#include "torch/script.h"
+#include "iengine.h"  // NOLINT
+#include "poros_module.h"  // NOLINT
+#include "torch/script.h"  // NOLINT

 namespace baidu {
 namespace mirana {
--- a/fastdeploy/runtime/backends/poros/common/iengine.h
+++ b/fastdeploy/runtime/backends/poros/common/iengine.h
@@ -16,12 +16,12 @@

 #include <string>

-//from pytorch
-#include "ATen/core/interned_strings.h"
-#include "torch/csrc/jit/ir/ir.h"
-#include "torch/script.h"
+// from pytorch
+#include "ATen/core/interned_strings.h"  // NOLINT
+#include "torch/csrc/jit/ir/ir.h"  // NOLINT
+#include "torch/script.h"  // NOLINT

-#include "plugin_create.h"
+#include "plugin_create.h"  // NOLINT

 namespace baidu {
 namespace mirana {
--- a/fastdeploy/runtime/backends/poros/common/plugin_create.h
+++ b/fastdeploy/runtime/backends/poros/common/plugin_create.h
@@ -36,7 +36,7 @@ IPlugin* create_plugin(const std::string& plugin_name,

 void create_all_plugins(const plugin_creator_map_t& plugin_creator_map,
                        std::unordered_map<std::string, IPlugin*>& plugin_m);
-//void create_all_plugins(std::unordered_map<std::string, IPlugin*>& plugin_m);
+// void create_all_plugins(std::unordered_map<std::string, IPlugin*>& plugin_m);

 template <typename PluginType> IPlugin* default_plugin_creator() {
  return new (std::nothrow) PluginType;
--- a/fastdeploy/runtime/backends/poros/common/poros_module.h
+++ b/fastdeploy/runtime/backends/poros/common/poros_module.h
@@ -14,8 +14,8 @@

 #pragma once

-#include "torch/csrc/jit/jit_log.h"
-#include "torch/script.h"
+#include "torch/csrc/jit/jit_log.h"  // NOLINT
+#include "torch/script.h"  // NOLINT
 #include <string>
 // #include "ATen/Context.h"

@@ -37,20 +37,21 @@ struct PorosOptions {
  bool use_nvidia_tf32 = false;
 };

+
 class PorosModule : public torch::jit::Module {
 public:
-  PorosModule(torch::jit::Module module) : torch::jit::Module(module) {}
+  PorosModule(torch::jit::Module module) : torch::jit::Module(module) {}  // NOLINT
  ~PorosModule() = default;

  void to_device(Device device) { _options.device = device; }

-  //c10::IValue forward(std::vector<c10::IValue> inputs);
-  //void save(const std::string& filename);
+  // c10::IValue forward(std::vector<c10::IValue> inputs);
+  // void save(const std::string& filename);
 public:
  PorosOptions _options;
 };

-//via porosmodule.save
+// via porosmodule.save
 std::unique_ptr<PorosModule> Load(const std::string& filename,
                                  const PorosOptions& options);

--- a/fastdeploy/runtime/backends/poros/option.h
+++ b/fastdeploy/runtime/backends/poros/option.h
--- a/fastdeploy/runtime/backends/poros/poros_backend.cc
+++ b/fastdeploy/runtime/backends/poros/poros_backend.cc
@@ -12,7 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "fastdeploy/backends/poros/poros_backend.h"
+#include "fastdeploy/runtime/backends/poros/poros_backend.h"
+
 #include <sys/time.h>

 namespace fastdeploy {
--- a/fastdeploy/runtime/backends/poros/poros_backend.h
+++ b/fastdeploy/runtime/backends/poros/poros_backend.h
@@ -19,10 +19,10 @@
 #include <string>
 #include <vector>

-#include "fastdeploy/backends/backend.h"
-#include "fastdeploy/backends/poros/option.h"
-#include "fastdeploy/backends/poros/common/compile.h"
-#include "fastdeploy/backends/poros/common/poros_module.h"
+#include "fastdeploy/runtime/backends/backend.h"
+#include "fastdeploy/runtime/backends/poros/option.h"
+#include "fastdeploy/runtime/backends/poros/common/compile.h"
+#include "fastdeploy/runtime/backends/poros/common/poros_module.h"

 namespace fastdeploy {

--- a/fastdeploy/runtime/backends/poros/utils.cc
+++ b/fastdeploy/runtime/backends/poros/utils.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "fastdeploy/backends/poros/poros_backend.h"
+#include "fastdeploy/runtime/backends/poros/poros_backend.h"

 #ifdef WITH_GPU
 #include <cuda_runtime_api.h>
@@ -23,32 +23,32 @@ namespace fastdeploy {
 std::string AtType2String(const at::ScalarType& dtype) {
  std::string out;
  switch (dtype) {
-  case at::kByte:
-    out = "at::kByte";
-    break;
-  case at::kChar:
-    out = "at::kChar";
-    break;
-  case at::kShort:
-    out = "at::kShort";
-    break;
-  case at::kInt:
-    out = "at::kInt";
-    break;
-  case at::kLong:
-    out = "at::kLong";
-    break;
-  case at::kHalf:
-    out = "at::kHalf";
-    break;
-  case at::kFloat:
-    out = "at::kFloat";
-    break;
-  case at::kDouble:
-    out = "at::kDouble";
-    break;
-  default:
-    out = "at::UNKNOWN";
+    case at::kByte:
+      out = "at::kByte";
+      break;
+    case at::kChar:
+      out = "at::kChar";
+      break;
+    case at::kShort:
+      out = "at::kShort";
+      break;
+    case at::kInt:
+      out = "at::kInt";
+      break;
+    case at::kLong:
+      out = "at::kLong";
+      break;
+    case at::kHalf:
+      out = "at::kHalf";
+      break;
+    case at::kFloat:
+      out = "at::kFloat";
+      break;
+    case at::kDouble:
+      out = "at::kDouble";
+      break;
+    default:
+      out = "at::UNKNOWN";
  }
  return out;
 }
@@ -129,8 +129,9 @@ at::Tensor CreatePorosValue(FDTensor& tensor, bool is_backend_cuda) {
             numel * sizeof(double));
    }
  } else {
-    FDASSERT(false, "Unrecognized data type while calling "
-                    "PorosBackend::CreatePorosValue().");
+    FDASSERT(false,
+             "Unrecognized data type while calling "
+             "PorosBackend::CreatePorosValue().");
  }
  return poros_value;
 }
--- a/fastdeploy/runtime/backends/rknpu2/option.h
+++ b/fastdeploy/runtime/backends/rknpu2/option.h
--- a/fastdeploy/runtime/backends/rknpu2/rknpu2_backend.cc
+++ b/fastdeploy/runtime/backends/rknpu2/rknpu2_backend.cc
@@ -11,7 +11,8 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "fastdeploy/backends/rknpu2/rknpu2_backend.h"
+#include "fastdeploy/runtime/backends/rknpu2/rknpu2_backend.h"
+
 #include "fastdeploy/utils/perf.h"
 namespace fastdeploy {
 RKNPU2Backend::~RKNPU2Backend() {
@@ -76,9 +77,8 @@ void RKNPU2Backend::BuildOption(const RKNPU2BackendOption& option) {
 /***************************************************************
 *  @name       InitFromRKNN
 *  @brief      Initialize RKNN model
- *  @param      model_file: Binary data for the RKNN model or the path of RKNN model.
- *              params_file: None
- *              option: config
+ *  @param      model_file: Binary data for the RKNN model or the path of RKNN
+ *model. params_file: None option: config
 *  @return     bool
 *  @note       None
 ***************************************************************/
@@ -232,8 +232,8 @@ bool RKNPU2Backend::GetModelInputOutputInfos() {
      return false;
    }

-    // If the output dimension is 3, the runtime will automatically change it to 4.
-    // Obviously, this is wrong, and manual correction is required here.
+    // If the output dimension is 3, the runtime will automatically change it
+    // to 4. Obviously, this is wrong, and manual correction is required here.
    int n_dims = output_attrs_[i].n_dims;
    if ((n_dims == 4) && (output_attrs_[i].dims[3] == 1)) {
      n_dims--;
@@ -263,14 +263,15 @@ bool RKNPU2Backend::GetModelInputOutputInfos() {
 *  @note       None
 ***************************************************************/
 void RKNPU2Backend::DumpTensorAttr(rknn_tensor_attr& attr) {
-  printf("index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], "
-         "n_elems=%d, size=%d, fmt=%s, type=%s, "
-         "qnt_type=%s, zp=%d, scale=%f, pass_through=%d\n",
-         attr.index, attr.name, attr.n_dims, attr.dims[0], attr.dims[1],
-         attr.dims[2], attr.dims[3], attr.n_elems, attr.size,
-         get_format_string(attr.fmt), get_type_string(attr.type),
-         get_qnt_type_string(attr.qnt_type), attr.zp, attr.scale,
-         attr.pass_through);
+  printf(
+      "index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], "
+      "n_elems=%d, size=%d, fmt=%s, type=%s, "
+      "qnt_type=%s, zp=%d, scale=%f, pass_through=%d\n",
+      attr.index, attr.name, attr.n_dims, attr.dims[0], attr.dims[1],
+      attr.dims[2], attr.dims[3], attr.n_elems, attr.size,
+      get_format_string(attr.fmt), get_type_string(attr.type),
+      get_qnt_type_string(attr.qnt_type), attr.zp, attr.scale,
+      attr.pass_through);
 }

 TensorInfo RKNPU2Backend::GetInputInfo(int index) {
@@ -357,7 +358,8 @@ bool RKNPU2Backend::Infer(std::vector<FDTensor>& inputs,
      // The data type of output data is changed to FP32
      output_attrs_[i].type = RKNN_TENSOR_FLOAT32;

-      // default output type is depend on model, this requires float32 to compute top5
+      // default output type is depend on model, this requires float32 to
+      // compute top5
      ret = rknn_set_io_mem(ctx, output_mems_[i], &output_attrs_[i]);

      // set output memory and attribute
@@ -452,8 +454,8 @@ FDDataType RKNPU2Backend::RknnTensorTypeToFDDataType(rknn_tensor_type type) {
 *  @return     None
 *  @note       None
 ***************************************************************/
-rknn_tensor_type
-RKNPU2Backend::FDDataTypeToRknnTensorType(fastdeploy::FDDataType type) {
+rknn_tensor_type RKNPU2Backend::FDDataTypeToRknnTensorType(
+    fastdeploy::FDDataType type) {
  if (type == FDDataType::FP16) {
    return rknn_tensor_type::RKNN_TENSOR_FLOAT16;
  }
--- a/fastdeploy/runtime/backends/rknpu2/rknpu2_backend.h
+++ b/fastdeploy/runtime/backends/rknpu2/rknpu2_backend.h
@@ -13,8 +13,8 @@
 // limitations under the License.
 #pragma once

-#include "fastdeploy/backends/backend.h"
-#include "fastdeploy/backends/rknpu2/option.h"
+#include "fastdeploy/runtime/backends/backend.h"
+#include "fastdeploy/runtime/backends/rknpu2/option.h"
 #include "fastdeploy/core/fd_tensor.h"
 #include "rknn_api.h"  // NOLINT
 #include <cstring>
--- a/fastdeploy/runtime/backends/sophgo/option.h
+++ b/fastdeploy/runtime/backends/sophgo/option.h
--- a/fastdeploy/runtime/backends/sophgo/sophgo_backend.cc
+++ b/fastdeploy/runtime/backends/sophgo/sophgo_backend.cc
@@ -11,7 +11,7 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "fastdeploy/backends/sophgo/sophgo_backend.h"
+#include "fastdeploy/runtime/backends/sophgo/sophgo_backend.h"

 #include <assert.h>

--- a/fastdeploy/runtime/backends/sophgo/sophgo_backend.h
+++ b/fastdeploy/runtime/backends/sophgo/sophgo_backend.h
@@ -13,11 +13,11 @@
 // limitations under the License.
 #pragma once

-#include "fastdeploy/backends/backend.h"
+#include "fastdeploy/runtime/backends/backend.h"
 #include "fastdeploy/core/fd_tensor.h"
 #include "bmruntime_interface.h" // NOLINT
 #include "bmlib_runtime.h" // NOLINT
-#include "fastdeploy/backends/sophgo/option.h"
+#include "fastdeploy/runtime/backends/sophgo/option.h"
 #include <cstring>
 #include <iostream>
 #include <memory>
--- a/fastdeploy/runtime/backends/tensorrt/ops/adaptive_pool2d.cc
+++ b/fastdeploy/runtime/backends/tensorrt/ops/adaptive_pool2d.cc
@@ -97,10 +97,9 @@ void AdaptivePool2d::serialize(void* buffer) const noexcept {
  FDASSERT(d == a + getSerializationSize(), "d == a + getSerializationSize()");
 }

-nvinfer1::DataType
-AdaptivePool2d::getOutputDataType(int index,
-                                  const nvinfer1::DataType* inputType,
-                                  int nbInputs) const noexcept {
+nvinfer1::DataType AdaptivePool2d::getOutputDataType(
+    int index, const nvinfer1::DataType* inputType,
+    int nbInputs) const noexcept {
  return inputType[0];
 }

--- a/fastdeploy/runtime/backends/tensorrt/ops/adaptive_pool2d.h
+++ b/fastdeploy/runtime/backends/tensorrt/ops/adaptive_pool2d.h
@@ -14,7 +14,7 @@

 #pragma once
 #include "common.h"  // NOLINT
-#include "fastdeploy/backends/op_cuda_kernels/adaptive_pool2d_kernel.h"
+#include "fastdeploy/runtime/backends/op_cuda_kernels/adaptive_pool2d_kernel.h"

 namespace fastdeploy {

--- a/fastdeploy/runtime/backends/tensorrt/ops/common.h
+++ b/fastdeploy/runtime/backends/tensorrt/ops/common.h
--- a/fastdeploy/runtime/backends/tensorrt/option.h
+++ b/fastdeploy/runtime/backends/tensorrt/option.h
--- a/fastdeploy/runtime/backends/tensorrt/trt_backend.cc
+++ b/fastdeploy/runtime/backends/tensorrt/trt_backend.cc
@@ -12,13 +12,13 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "fastdeploy/backends/tensorrt/trt_backend.h"
-#include "fastdeploy/function/cuda_cast.h"
+#include "fastdeploy/runtime/backends/tensorrt/trt_backend.h"

 #include <cstring>
 #include <unordered_map>

 #include "NvInferRuntime.h"
+#include "fastdeploy/function/cuda_cast.h"
 #include "fastdeploy/utils/utils.h"
 #ifdef ENABLE_PADDLE_FRONTEND
 #include "paddle2onnx/converter.h"
@@ -215,9 +215,9 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file,
  outputs_desc_.resize(onnx_reader.num_outputs);
  for (int i = 0; i < onnx_reader.num_inputs; ++i) {
    std::string name(onnx_reader.inputs[i].name);
-    std::vector<int64_t> shape(onnx_reader.inputs[i].shape,
-                               onnx_reader.inputs[i].shape +
-                                   onnx_reader.inputs[i].rank);
+    std::vector<int64_t> shape(
+        onnx_reader.inputs[i].shape,
+        onnx_reader.inputs[i].shape + onnx_reader.inputs[i].rank);
    inputs_desc_[i].name = name;
    inputs_desc_[i].shape.assign(shape.begin(), shape.end());
    inputs_desc_[i].dtype = ReaderDtypeToTrtDtype(onnx_reader.inputs[i].dtype);
@@ -238,9 +238,9 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file,

  for (int i = 0; i < onnx_reader.num_outputs; ++i) {
    std::string name(onnx_reader.outputs[i].name);
-    std::vector<int64_t> shape(onnx_reader.outputs[i].shape,
-                               onnx_reader.outputs[i].shape +
-                                   onnx_reader.outputs[i].rank);
+    std::vector<int64_t> shape(
+        onnx_reader.outputs[i].shape,
+        onnx_reader.outputs[i].shape + onnx_reader.outputs[i].rank);
    outputs_desc_[i].name = name;
    outputs_desc_[i].shape.assign(shape.begin(), shape.end());
    outputs_desc_[i].dtype =
@@ -313,8 +313,8 @@ bool TrtBackend::Infer(std::vector<FDTensor>& inputs,
    return false;
  }
  for (size_t i = 0; i < outputs->size(); ++i) {
-    // if the final output tensor's dtype is different from the model output tensor's dtype,
-    // then we need cast the data to the final output's dtype
+    // if the final output tensor's dtype is different from the model output
+    // tensor's dtype, then we need cast the data to the final output's dtype
    auto model_output_dtype =
        GetFDDataType(outputs_device_buffer_[(*outputs)[i].name].dtype());
    if ((*outputs)[i].dtype != model_output_dtype) {
@@ -369,7 +369,8 @@ void TrtBackend::GetInputOutputInfo() {
        outputs_desc_[i].original_dtype;
  }

-  // Re-read the tensor infos from TRT model and write into inputs_desc_ and outputs_desc_
+  // Re-read the tensor infos from TRT model and write into inputs_desc_ and
+  // outputs_desc_
  std::vector<TrtValueInfo>().swap(inputs_desc_);
  std::vector<TrtValueInfo>().swap(outputs_desc_);
  inputs_desc_.clear();
--- a/fastdeploy/runtime/backends/tensorrt/trt_backend.h
+++ b/fastdeploy/runtime/backends/tensorrt/trt_backend.h
@@ -23,9 +23,9 @@

 #include "NvInfer.h"
 #include "NvOnnxParser.h"
-#include "fastdeploy/backends/backend.h"
-#include "fastdeploy/backends/tensorrt/utils.h"
-#include "fastdeploy/backends/tensorrt/option.h"
+#include "fastdeploy/runtime/backends/backend.h"
+#include "fastdeploy/runtime/backends/tensorrt/utils.h"
+#include "fastdeploy/runtime/backends/tensorrt/option.h"
 #include "fastdeploy/utils/unique_ptr.h"

 class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
--- a/fastdeploy/runtime/backends/tensorrt/utils.cc
+++ b/fastdeploy/runtime/backends/tensorrt/utils.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "fastdeploy/backends/tensorrt/utils.h"
+#include "fastdeploy/runtime/backends/tensorrt/utils.h"

 namespace fastdeploy {

--- a/fastdeploy/runtime/backends/tensorrt/utils.h
+++ b/fastdeploy/runtime/backends/tensorrt/utils.h
--- a/fastdeploy/runtime/enum_variables.cc
+++ b/fastdeploy/runtime/enum_variables.cc
@@ -82,4 +82,43 @@ std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
  out << "UNKNOWN-ModelFormat";
  return out;
 }
+
+std::vector<Backend> GetAvailableBackends() {
+  std::vector<Backend> backends;
+#ifdef ENABLE_ORT_BACKEND
+  backends.push_back(Backend::ORT);
+#endif
+#ifdef ENABLE_TRT_BACKEND
+  backends.push_back(Backend::TRT);
+#endif
+#ifdef ENABLE_PADDLE_BACKEND
+  backends.push_back(Backend::PDINFER);
+#endif
+#ifdef ENABLE_POROS_BACKEND
+  backends.push_back(Backend::POROS);
+#endif
+#ifdef ENABLE_OPENVINO_BACKEND
+  backends.push_back(Backend::OPENVINO);
+#endif
+#ifdef ENABLE_LITE_BACKEND
+  backends.push_back(Backend::LITE);
+#endif
+#ifdef ENABLE_RKNPU2_BACKEND
+  backends.push_back(Backend::RKNPU2);
+#endif
+#ifdef ENABLE_SOPHGO_BACKEND
+  backends.push_back(Backend::SOPHGOTPU);
+#endif
+  return backends;
+}
+
+bool IsBackendAvailable(const Backend& backend) {
+  std::vector<Backend> backends = GetAvailableBackends();
+  for (size_t i = 0; i < backends.size(); ++i) {
+    if (backend == backends[i]) {
+      return true;
+    }
+  }
+  return false;
+}
 }  // namespace fastdeploy
--- a/fastdeploy/runtime/enum_variables.h
+++ b/fastdeploy/runtime/enum_variables.h
@@ -25,6 +25,7 @@

 namespace fastdeploy {

+
 /*! Inference backend supported in FastDeploy */
 enum Backend {
  UNKNOWN,  ///< Unknown inference backend
@@ -38,6 +39,17 @@ enum Backend {
  SOPHGOTPU,  ///< SOPHGOTPU, support SOPHGO format model, Sophgo TPU only
 };

+/**
+ * @brief Get all the available inference backend in FastDeploy
+ */
+FASTDEPLOY_DECL std::vector<Backend> GetAvailableBackends();
+
+/**
+ * @brief Check if the inference backend available
+ */
+FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend);
+
+
 enum FASTDEPLOY_DECL Device {
  CPU,
  GPU,
@@ -69,11 +81,8 @@ static std::map<ModelFormat, std::vector<Backend>> s_default_backends_cfg = {
  {ModelFormat::SOPHGO, {Backend::SOPHGOTPU}}
 };

-FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, const Backend& b);
-
-FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, const Device& d);
-
-FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
-                                         const ModelFormat& f);
+FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const Backend& b);
+FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const Device& d);
+FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const ModelFormat& f);

 }  // namespace fastdeploy
--- a/fastdeploy/runtime/runtime.cc
+++ b/fastdeploy/runtime/runtime.cc
@@ -18,35 +18,35 @@
 #include "fastdeploy/utils/utils.h"

 #ifdef ENABLE_ORT_BACKEND
-#include "fastdeploy/backends/ort/ort_backend.h"
+#include "fastdeploy/runtime/backends/ort/ort_backend.h"
 #endif

 #ifdef ENABLE_TRT_BACKEND
-#include "fastdeploy/backends/tensorrt/trt_backend.h"
+#include "fastdeploy/runtime/backends/tensorrt/trt_backend.h"
 #endif

 #ifdef ENABLE_PADDLE_BACKEND
-#include "fastdeploy/backends/paddle/paddle_backend.h"
+#include "fastdeploy/runtime/backends/paddle/paddle_backend.h"
 #endif

 #ifdef ENABLE_POROS_BACKEND
-#include "fastdeploy/backends/poros/poros_backend.h"
+#include "fastdeploy/runtime/backends/poros/poros_backend.h"
 #endif

 #ifdef ENABLE_OPENVINO_BACKEND
-#include "fastdeploy/backends/openvino/ov_backend.h"
+#include "fastdeploy/runtime/backends/openvino/ov_backend.h"
 #endif

 #ifdef ENABLE_LITE_BACKEND
-#include "fastdeploy/backends/lite/lite_backend.h"
+#include "fastdeploy/runtime/backends/lite/lite_backend.h"
 #endif

 #ifdef ENABLE_RKNPU2_BACKEND
-#include "fastdeploy/backends/rknpu2/rknpu2_backend.h"
+#include "fastdeploy/runtime/backends/rknpu2/rknpu2_backend.h"
 #endif

 #ifdef ENABLE_SOPHGO_BACKEND
-#include "fastdeploy/backends/sophgo/sophgo_backend.h"
+#include "fastdeploy/runtime/backends/sophgo/sophgo_backend.h"
 #endif

 namespace fastdeploy {
--- a/fastdeploy/runtime/runtime.h
+++ b/fastdeploy/runtime/runtime.h
@@ -19,7 +19,7 @@
 */

 #pragma once
-#include "fastdeploy/backends/backend.h"
+#include "fastdeploy/runtime/backends/backend.h"
 #include "fastdeploy/core/fd_tensor.h"
 #include "fastdeploy/runtime/runtime_option.h"
 #include "fastdeploy/utils/perf.h"
--- a/fastdeploy/runtime/runtime_option.cc
+++ b/fastdeploy/runtime/runtime_option.cc
@@ -18,127 +18,6 @@

 namespace fastdeploy {

-std::vector<Backend> GetAvailableBackends() {
-  std::vector<Backend> backends;
-#ifdef ENABLE_ORT_BACKEND
-  backends.push_back(Backend::ORT);
-#endif
-#ifdef ENABLE_TRT_BACKEND
-  backends.push_back(Backend::TRT);
-#endif
-#ifdef ENABLE_PADDLE_BACKEND
-  backends.push_back(Backend::PDINFER);
-#endif
-#ifdef ENABLE_POROS_BACKEND
-  backends.push_back(Backend::POROS);
-#endif
-#ifdef ENABLE_OPENVINO_BACKEND
-  backends.push_back(Backend::OPENVINO);
-#endif
-#ifdef ENABLE_LITE_BACKEND
-  backends.push_back(Backend::LITE);
-#endif
-#ifdef ENABLE_RKNPU2_BACKEND
-  backends.push_back(Backend::RKNPU2);
-#endif
-#ifdef ENABLE_SOPHGO_BACKEND
-  backends.push_back(Backend::SOPHGOTPU);
-#endif
-  return backends;
-}
-
-bool IsBackendAvailable(const Backend& backend) {
-  std::vector<Backend> backends = GetAvailableBackends();
-  for (size_t i = 0; i < backends.size(); ++i) {
-    if (backend == backends[i]) {
-      return true;
-    }
-  }
-  return false;
-}
-
-bool CheckModelFormat(const std::string& model_file,
-                      const ModelFormat& model_format) {
-  if (model_format == ModelFormat::PADDLE) {
-    if (model_file.size() < 8 ||
-        model_file.substr(model_file.size() - 8, 8) != ".pdmodel") {
-      FDERROR << "With model format of ModelFormat::PADDLE, the model file "
-                 "should ends with `.pdmodel`, but now it's "
-              << model_file << std::endl;
-      return false;
-    }
-  } else if (model_format == ModelFormat::ONNX) {
-    if (model_file.size() < 5 ||
-        model_file.substr(model_file.size() - 5, 5) != ".onnx") {
-      FDERROR << "With model format of ModelFormat::ONNX, the model file "
-                 "should ends with `.onnx`, but now it's "
-              << model_file << std::endl;
-      return false;
-    }
-  } else if (model_format == ModelFormat::RKNN) {
-    if (model_file.size() < 5 ||
-        model_file.substr(model_file.size() - 5, 5) != ".rknn") {
-      FDERROR << "With model format of ModelFormat::RKNN, the model file "
-                 "should ends with `.rknn`, but now it's "
-              << model_file << std::endl;
-      return false;
-    }
-  } else if (model_format == ModelFormat::TORCHSCRIPT) {
-    if (model_file.size() < 3 ||
-        model_file.substr(model_file.size() - 3, 3) != ".pt") {
-      FDERROR
-          << "With model format of ModelFormat::TORCHSCRIPT, the model file "
-             "should ends with `.pt`, but now it's "
-          << model_file << std::endl;
-      return false;
-    }
-  } else if (model_format == ModelFormat::SOPHGO) {
-    if (model_file.size() < 7 ||
-        model_file.substr(model_file.size() - 7, 7) != ".bmodel") {
-      FDERROR << "With model format of ModelFormat::SOPHGO, the model file "
-                 "should ends with `.bmodel`, but now it's "
-              << model_file << std::endl;
-      return false;
-    }
-  } else {
-    FDERROR
-        << "Only support model format with frontend ModelFormat::PADDLE / "
-           "ModelFormat::ONNX / ModelFormat::RKNN / ModelFormat::TORCHSCRIPT."
-        << std::endl;
-    return false;
-  }
-  return true;
-}
-
-ModelFormat GuessModelFormat(const std::string& model_file) {
-  if (model_file.size() > 8 &&
-      model_file.substr(model_file.size() - 8, 8) == ".pdmodel") {
-    FDINFO << "Model Format: PaddlePaddle." << std::endl;
-    return ModelFormat::PADDLE;
-  } else if (model_file.size() > 5 &&
-             model_file.substr(model_file.size() - 5, 5) == ".onnx") {
-    FDINFO << "Model Format: ONNX." << std::endl;
-    return ModelFormat::ONNX;
-  } else if (model_file.size() > 3 &&
-             model_file.substr(model_file.size() - 3, 3) == ".pt") {
-    FDINFO << "Model Format: Torchscript." << std::endl;
-    return ModelFormat::TORCHSCRIPT;
-  } else if (model_file.size() > 5 &&
-             model_file.substr(model_file.size() - 5, 5) == ".rknn") {
-    FDINFO << "Model Format: RKNN." << std::endl;
-    return ModelFormat::RKNN;
-  } else if (model_file.size() > 7 &&
-             model_file.substr(model_file.size() - 7, 7) == ".bmodel") {
-    FDINFO << "Model Format: SOPHGO." << std::endl;
-    return ModelFormat::SOPHGO;
-  }
-
-  FDERROR << "Cannot guess which model format you are using, please set "
-             "RuntimeOption::model_format manually."
-          << std::endl;
-  return ModelFormat::PADDLE;
-}
-
 void RuntimeOption::SetModelPath(const std::string& model_path,
                                 const std::string& params_path,
                                 const ModelFormat& format) {
--- a/fastdeploy/runtime/runtime_option.h
+++ b/fastdeploy/runtime/runtime_option.h
@@ -24,31 +24,17 @@
 #include <map>
 #include <vector>
 #include "fastdeploy/runtime/enum_variables.h"
-#include "fastdeploy/backends/lite/option.h"
-#include "fastdeploy/backends/openvino/option.h"
-#include "fastdeploy/backends/ort/option.h"
-#include "fastdeploy/backends/paddle/option.h"
-#include "fastdeploy/backends/poros/option.h"
-#include "fastdeploy/backends/rknpu2/option.h"
-#include "fastdeploy/backends/sophgo/option.h"
-#include "fastdeploy/backends/tensorrt/option.h"
+#include "fastdeploy/runtime/backends/lite/option.h"
+#include "fastdeploy/runtime/backends/openvino/option.h"
+#include "fastdeploy/runtime/backends/ort/option.h"
+#include "fastdeploy/runtime/backends/paddle/option.h"
+#include "fastdeploy/runtime/backends/poros/option.h"
+#include "fastdeploy/runtime/backends/rknpu2/option.h"
+#include "fastdeploy/runtime/backends/sophgo/option.h"
+#include "fastdeploy/runtime/backends/tensorrt/option.h"

 namespace fastdeploy {

-/**
- * @brief Get all the available inference backend in FastDeploy
- */
-FASTDEPLOY_DECL std::vector<Backend> GetAvailableBackends();
-
-/**
- * @brief Check if the inference backend available
- */
-FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend);
-
-bool CheckModelFormat(const std::string& model_file,
-                      const ModelFormat& model_format);
-ModelFormat GuessModelFormat(const std::string& model_file);
-
 /*! @brief Option object used when create a new Runtime object
 */
 struct FASTDEPLOY_DECL RuntimeOption {