[Other] [Part2] Upgrade runtime module (#1080)

[Other] Upgrade runtime module
2025-10-06 00:57:33 +08:00 · 2023-01-09 13:22:51 +08:00
parent cbf88a46fa
commit 4aa4ebd7c3
53 changed files with 312 additions and 374 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -185,15 +185,15 @@ configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/core/config.h.
 configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/main.cc.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/main.cc)
 file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*.cc)
 file(GLOB_RECURSE FDTENSOR_FUNC_CUDA_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/function/*.cu)
-file(GLOB_RECURSE DEPLOY_OP_CUDA_KERNEL_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/op_cuda_kernels/*.cu)
+file(GLOB_RECURSE DEPLOY_OP_CUDA_KERNEL_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/op_cuda_kernels/*.cu)
-file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/ort/*.cc)
+file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/ort/*.cc)
-file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/paddle/*.cc)
+file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/paddle/*.cc)
-file(GLOB_RECURSE DEPLOY_POROS_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/poros/*.cc)
+file(GLOB_RECURSE DEPLOY_POROS_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/poros/*.cc)
-file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cpp)
+file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/tensorrt/*.cpp)
-file(GLOB_RECURSE DEPLOY_OPENVINO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/openvino/*.cc)
+file(GLOB_RECURSE DEPLOY_OPENVINO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/openvino/*.cc)
-file(GLOB_RECURSE DEPLOY_RKNPU2_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/rknpu2/*.cc)
+file(GLOB_RECURSE DEPLOY_RKNPU2_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/rknpu2/*.cc)
-file(GLOB_RECURSE DEPLOY_SOPHGO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/sophgo/*.cc)
+file(GLOB_RECURSE DEPLOY_SOPHGO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/sophgo/*.cc)
-file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/lite/*.cc)
+file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/lite/*.cc)
 file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cc)
 file(GLOB_RECURSE DEPLOY_ENCRYPTION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/encryption/*.cc)
 file(GLOB_RECURSE DEPLOY_PIPELINE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pipeline/*.cc)
@@ -289,7 +289,7 @@ if(ENABLE_POROS_BACKEND)
  else ()
    message(STATUS "site-packages: ${Python3_SITELIB}")
  endif ()
-  include_directories(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/poros/common)
+  include_directories(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/poros/common)
  # find trt
  if(NOT WITH_GPU)
  message(FATAL_ERROR "While -DENABLE_POROS_BACKEND=ON, must set -DWITH_GPU=ON, but now it's OFF")
@@ -377,7 +377,7 @@ if(ENABLE_TRT_BACKEND)
  add_definitions(-DENABLE_TRT_BACKEND)
  include_directories(${TRT_INC_DIR})
-  include_directories(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/common)
+  include_directories(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/tensorrt/common)
  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_TRT_SRCS})
  find_library(TRT_INFER_LIB nvinfer ${TRT_LIB_DIR} NO_DEFAULT_PATH)
  find_library(TRT_ONNX_LIB nvonnxparser ${TRT_LIB_DIR} NO_DEFAULT_PATH)
@@ -574,7 +574,7 @@ install(
  DESTINATION ${CMAKE_INSTALL_PREFIX}/include
  FILES_MATCHING
  PATTERN "*.h"
-  PATTERN "${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/*/*.h"
+  PATTERN "${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/*/*.h"
 )
 install(
  DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install
--- a/fastdeploy/fastdeploy_model.cc
+++ b/fastdeploy/fastdeploy_model.cc
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fastdeploy/fastdeploy_model.h"
 #include "fastdeploy/utils/utils.h"
 namespace fastdeploy {
@@ -42,8 +43,7 @@ bool IsSupported(const std::vector<Backend>& backends, Backend backend) {
 bool FastDeployModel::InitRuntimeWithSpecifiedBackend() {
  if (!IsBackendAvailable(runtime_option.backend)) {
    FDERROR << runtime_option.backend
-            << " is not compiled with current FastDeploy library."
+            << " is not compiled with current FastDeploy library." << std::endl;
            << std::endl;
    return false;
  }
@@ -57,42 +57,58 @@ bool FastDeployModel::InitRuntimeWithSpecifiedBackend() {
  if (use_gpu) {
    if (!IsSupported(valid_gpu_backends, runtime_option.backend)) {
-      FDERROR << "The valid gpu backends of model " << ModelName() << " are " << Str(valid_gpu_backends) << ", " << runtime_option.backend << " is not supported." << std::endl;
+      FDERROR << "The valid gpu backends of model " << ModelName() << " are "
              << Str(valid_gpu_backends) << ", " << runtime_option.backend
              << " is not supported." << std::endl;
      return false;
    }
  } else if (use_rknpu) {
    if (!IsSupported(valid_rknpu_backends, runtime_option.backend)) {
-      FDERROR << "The valid rknpu backends of model " << ModelName() << " are " << Str(valid_rknpu_backends) << ", " << runtime_option.backend << " is not supported." << std::endl;
+      FDERROR << "The valid rknpu backends of model " << ModelName() << " are "
              << Str(valid_rknpu_backends) << ", " << runtime_option.backend
              << " is not supported." << std::endl;
      return false;
    }
  } else if (use_sophgotpu) {
    if (!IsSupported(valid_sophgonpu_backends, runtime_option.backend)) {
-      FDERROR << "The valid rknpu backends of model " << ModelName() << " are " << Str(valid_rknpu_backends) << ", " << runtime_option.backend << " is not supported." << std::endl;
+      FDERROR << "The valid rknpu backends of model " << ModelName() << " are "
              << Str(valid_rknpu_backends) << ", " << runtime_option.backend
              << " is not supported." << std::endl;
      return false;
    }
  } else if (use_timvx) {
    if (!IsSupported(valid_timvx_backends, runtime_option.backend)) {
-      FDERROR << "The valid timvx backends of model " << ModelName() << " are " << Str(valid_timvx_backends) << ", " << runtime_option.backend << " is not supported." << std::endl;
+      FDERROR << "The valid timvx backends of model " << ModelName() << " are "
              << Str(valid_timvx_backends) << ", " << runtime_option.backend
              << " is not supported." << std::endl;
      return false;
    }
  } else if (use_ascend) {
    if (!IsSupported(valid_ascend_backends, runtime_option.backend)) {
-      FDERROR << "The valid ascend backends of model " << ModelName() << " are " << Str(valid_ascend_backends) << ", " << runtime_option.backend << " is not supported." << std::endl;
+      FDERROR << "The valid ascend backends of model " << ModelName() << " are "
              << Str(valid_ascend_backends) << ", " << runtime_option.backend
              << " is not supported." << std::endl;
      return false;
    }
  } else if (use_kunlunxin) {
    if (!IsSupported(valid_kunlunxin_backends, runtime_option.backend)) {
-      FDERROR << "The valid kunlunxin backends of model " << ModelName() << " are " << Str(valid_kunlunxin_backends) << ", " << runtime_option.backend << " is not supported." << std::endl;
+      FDERROR << "The valid kunlunxin backends of model " << ModelName()
              << " are " << Str(valid_kunlunxin_backends) << ", "
              << runtime_option.backend << " is not supported." << std::endl;
      return false;
    }
-  } else if(use_ipu) {
+  } else if (use_ipu) {
    if (!IsSupported(valid_ipu_backends, runtime_option.backend)) {
-      FDERROR << "The valid ipu backends of model " << ModelName() << " are " << Str(valid_ipu_backends) << ", " << runtime_option.backend << " is not supported." << std::endl;
+      FDERROR << "The valid ipu backends of model " << ModelName() << " are "
              << Str(valid_ipu_backends) << ", " << runtime_option.backend
              << " is not supported." << std::endl;
      return false;
    }
  } else {
    if (!IsSupported(valid_cpu_backends, runtime_option.backend)) {
-      FDERROR << "The valid cpu backends of model " << ModelName() << " are " << Str(valid_cpu_backends) << ", " << runtime_option.backend << " is not supported." << std::endl;
+      FDERROR << "The valid cpu backends of model " << ModelName() << " are "
              << Str(valid_cpu_backends) << ", " << runtime_option.backend
              << " is not supported." << std::endl;
      return false;
    }
  }
@@ -135,16 +151,12 @@ bool FastDeployModel::InitRuntimeWithSpecifiedDevice() {
    return false;
 #endif
  }
-  FDERROR << "Only support CPU/GPU/IPU/RKNPU/TIMVX/KunlunXin/ASCEND now." << std::endl;
+  FDERROR << "Only support CPU/GPU/IPU/RKNPU/TIMVX/KunlunXin/ASCEND now."
          << std::endl;
  return false;
 }
 bool FastDeployModel::InitRuntime() {
  if (!runtime_option.model_from_memory_) {
    FDASSERT(
        CheckModelFormat(runtime_option.model_file, runtime_option.model_format),
        "ModelFormatCheck Failed.");
  }
  if (runtime_initialized_) {
    FDERROR << "The model is already initialized, cannot be initliazed again."
            << std::endl;
@@ -298,7 +310,6 @@ bool FastDeployModel::CreateKunlunXinBackend() {
  return false;
 }
 bool FastDeployModel::CreateASCENDBackend() {
  if (valid_ascend_backends.size() == 0) {
    FDERROR << "There's no valid ascend backends for model: " << ModelName()
@@ -322,7 +333,6 @@ bool FastDeployModel::CreateASCENDBackend() {
  return false;
 }
 bool FastDeployModel::CreateIpuBackend() {
  if (valid_ipu_backends.size() == 0) {
    FDERROR << "There's no valid ipu backends for model: " << ModelName()
--- a/fastdeploy/pybind/rknpu2_config_pybind.cc
+++ b/fastdeploy/pybind/rknpu2_config_pybind.cc
@@ -11,8 +11,8 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fastdeploy/backends/rknpu2/option.h"
 #include "fastdeploy/pybind/main.h"
 #include "fastdeploy/runtime/backends/rknpu2/option.h"
 namespace fastdeploy {
 void BindRKNPU2Config(pybind11::module& m) {
  pybind11::enum_<fastdeploy::rknpu2::CpuName>(
--- a/fastdeploy/runtime/backends/backend.h
+++ b/fastdeploy/runtime/backends/backend.h
--- a/fastdeploy/runtime/backends/lite/lite_backend.cc
+++ b/fastdeploy/runtime/backends/lite/lite_backend.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "fastdeploy/backends/lite/lite_backend.h"
+#include "fastdeploy/runtime/backends/lite/lite_backend.h"
 #include <cstring>
@@ -43,7 +43,7 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
  option_ = option;
  std::vector<paddle::lite_api::Place> valid_places;
  if (option_.enable_int8) {
-    if(option_.enable_kunlunxin) {
+    if (option_.enable_kunlunxin) {
      valid_places.push_back(
          paddle::lite_api::Place{TARGET(kXPU), PRECISION(kInt8)});
    } else {
@@ -54,7 +54,7 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
           << "inference with int8 precision!" << std::endl;
  }
  if (option_.enable_fp16) {
-    if(option_.enable_kunlunxin){
+    if (option_.enable_kunlunxin) {
      valid_places.push_back(
          paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFP16)});
    } else {
@@ -66,7 +66,9 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
      if (supported_fp16_) {
        valid_places.push_back(
            paddle::lite_api::Place{TARGET(kARM), PRECISION(kFP16)});
-        FDINFO << "The device supports FP16, Lite::Backend will inference with FP16 precision." << std::endl;    
+        FDINFO << "The device supports FP16, Lite::Backend will inference with "
                  "FP16 precision."
               << std::endl;
      } else {
        FDWARNING << "The device doesn't support FP16, will fallback to FP32.";
      }
@@ -74,14 +76,18 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
  }
  if (!option_.nnadapter_subgraph_partition_config_path.empty()) {
    std::vector<char> nnadapter_subgraph_partition_config_buffer;
-    if (ReadFile(option_.nnadapter_subgraph_partition_config_path, &nnadapter_subgraph_partition_config_buffer, false)) {
+    if (ReadFile(option_.nnadapter_subgraph_partition_config_path,
                 &nnadapter_subgraph_partition_config_buffer, false)) {
      if (!nnadapter_subgraph_partition_config_buffer.empty()) {
-        std::string nnadapter_subgraph_partition_config_string(nnadapter_subgraph_partition_config_buffer.data(), nnadapter_subgraph_partition_config_buffer.size());
+        std::string nnadapter_subgraph_partition_config_string(
-        config_.set_nnadapter_subgraph_partition_config_buffer(nnadapter_subgraph_partition_config_string);
+            nnadapter_subgraph_partition_config_buffer.data(),
            nnadapter_subgraph_partition_config_buffer.size());
        config_.set_nnadapter_subgraph_partition_config_buffer(
            nnadapter_subgraph_partition_config_string);
      }
    }
  }
-  if(option_.enable_timvx) {
+  if (option_.enable_timvx) {
    config_.set_nnadapter_device_names({"verisilicon_timvx"});
    valid_places.push_back(
        paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)});
@@ -91,32 +97,30 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
        paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)});
  }
-  if(option_.enable_ascend){
+  if (option_.enable_ascend) {
-    
+    if (option_.nnadapter_device_names.empty()) {
    if(option_.nnadapter_device_names.empty()){
      config_.set_nnadapter_device_names({"huawei_ascend_npu"});
    } else {
      config_.set_nnadapter_device_names(option_.nnadapter_device_names);
    }
-    if(!option_.nnadapter_context_properties.empty()){
+    if (!option_.nnadapter_context_properties.empty()) {
-      config_.set_nnadapter_context_properties(option_.nnadapter_context_properties);
+      config_.set_nnadapter_context_properties(
          option_.nnadapter_context_properties);
    }
-    if(!option_.nnadapter_model_cache_dir.empty()){
+    if (!option_.nnadapter_model_cache_dir.empty()) {
      config_.set_nnadapter_model_cache_dir(option_.nnadapter_model_cache_dir);
    }
-    if(!option_.nnadapter_mixed_precision_quantization_config_path.empty()){
+    if (!option_.nnadapter_mixed_precision_quantization_config_path.empty()) {
      config_.set_nnadapter_mixed_precision_quantization_config_path(
-        option_.nnadapter_mixed_precision_quantization_config_path
+          option_.nnadapter_mixed_precision_quantization_config_path);
      );
    }
-    if(!option_.nnadapter_subgraph_partition_config_path.empty()){
+    if (!option_.nnadapter_subgraph_partition_config_path.empty()) {
      config_.set_nnadapter_subgraph_partition_config_path(
-        option_.nnadapter_subgraph_partition_config_path
+          option_.nnadapter_subgraph_partition_config_path);
      );
    }
    valid_places.push_back(
@@ -127,16 +131,20 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
        paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)});
  }
-  if(option_.enable_kunlunxin){
+  if (option_.enable_kunlunxin) {
    valid_places.push_back(
        paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFloat)});
    valid_places.push_back(
        paddle::lite_api::Place{TARGET(kX86), PRECISION(kFloat)});
    config_.set_xpu_dev_per_thread(option_.device_id);
-    config_.set_xpu_workspace_l3_size_per_thread(option_.kunlunxin_l3_workspace_size);
+    config_.set_xpu_workspace_l3_size_per_thread(
-    config_.set_xpu_l3_cache_method(option_.kunlunxin_l3_workspace_size, option_.kunlunxin_locked);
+        option_.kunlunxin_l3_workspace_size);
-    config_.set_xpu_conv_autotune(option_.kunlunxin_autotune, option_.kunlunxin_autotune_file);
+    config_.set_xpu_l3_cache_method(option_.kunlunxin_l3_workspace_size,
-    config_.set_xpu_multi_encoder_method(option_.kunlunxin_precision, option_.kunlunxin_adaptive_seqlen);
+                                    option_.kunlunxin_locked);
    config_.set_xpu_conv_autotune(option_.kunlunxin_autotune,
                                  option_.kunlunxin_autotune_file);
    config_.set_xpu_multi_encoder_method(option_.kunlunxin_precision,
                                         option_.kunlunxin_adaptive_seqlen);
    if (option_.kunlunxin_enable_multi_stream) {
      config_.enable_xpu_multi_stream();
    }
@@ -155,10 +163,9 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
 }
 bool LiteBackend::ReadFile(const std::string& filename,
-               std::vector<char>* contents,
+                           std::vector<char>* contents, const bool binary) {
-               const bool binary) {
+  FILE* fp = fopen(filename.c_str(), binary ? "rb" : "r");
-  FILE *fp = fopen(filename.c_str(), binary ? "rb" : "r");
+  if (!fp) {
  if (!fp){
    FDERROR << "Cannot open file " << filename << "." << std::endl;
    return false;
  }
@@ -168,7 +175,7 @@ bool LiteBackend::ReadFile(const std::string& filename,
  contents->clear();
  contents->resize(size);
  size_t offset = 0;
-  char *ptr = reinterpret_cast<char *>(&(contents->at(0)));
+  char* ptr = reinterpret_cast<char*>(&(contents->at(0)));
  while (offset < size) {
    size_t already_read = fread(ptr, 1, size - offset, fp);
    offset += already_read;
@@ -196,7 +203,8 @@ bool LiteBackend::InitFromPaddle(const std::string& model_file,
  if (option_.optimized_model_dir != "") {
    FDINFO << "Optimzed model dir is not empty, will save optimized model to: "
           << option_.optimized_model_dir << std::endl;
-    predictor_->SaveOptimizedModel(option_.optimized_model_dir,
+    predictor_->SaveOptimizedModel(
        option_.optimized_model_dir,
        paddle::lite_api::LiteModelType::kNaiveBuffer);
  }
@@ -221,7 +229,7 @@ bool LiteBackend::InitFromPaddle(const std::string& model_file,
    auto shape = tensor->shape();
    info.shape.assign(shape.begin(), shape.end());
    info.name = output_names[i];
-    if(!option_.enable_kunlunxin){
+    if (!option_.enable_kunlunxin) {
      info.dtype = LiteDataTypeToFD(tensor->precision());
    }
    outputs_desc_.emplace_back(info);
@@ -250,8 +258,7 @@ TensorInfo LiteBackend::GetOutputInfo(int index) {
 std::vector<TensorInfo> LiteBackend::GetOutputInfos() { return outputs_desc_; }
 bool LiteBackend::Infer(std::vector<FDTensor>& inputs,
-                        std::vector<FDTensor>* outputs,
+                        std::vector<FDTensor>* outputs, bool copy_to_fd) {
                        bool copy_to_fd) {                                                
  if (inputs.size() != inputs_desc_.size()) {
    FDERROR << "[LiteBackend] Size of inputs(" << inputs.size()
            << ") should keep same with the inputs of this model("
@@ -270,25 +277,25 @@ bool LiteBackend::Infer(std::vector<FDTensor>& inputs,
    tensor->Resize(inputs[i].shape);
    if (inputs[i].dtype == FDDataType::FP32) {
      tensor->CopyFromCpu<float, paddle::lite_api::TargetType::kHost>(
-        reinterpret_cast<const float*>(const_cast<void*>(
+          reinterpret_cast<const float*>(
-        inputs[i].CpuData())));
+              const_cast<void*>(inputs[i].CpuData())));
    } else if (inputs[i].dtype == FDDataType::INT32) {
      tensor->CopyFromCpu<int, paddle::lite_api::TargetType::kHost>(
-        reinterpret_cast<const int*>(const_cast<void*>(
+          reinterpret_cast<const int*>(const_cast<void*>(inputs[i].CpuData())));
        inputs[i].CpuData())));
    } else if (inputs[i].dtype == FDDataType::INT8) {
      tensor->CopyFromCpu<int8_t, paddle::lite_api::TargetType::kHost>(
-        reinterpret_cast<const int8_t*>(const_cast<void*>(
+          reinterpret_cast<const int8_t*>(
-        inputs[i].CpuData())));
+              const_cast<void*>(inputs[i].CpuData())));
    } else if (inputs[i].dtype == FDDataType::UINT8) {
      tensor->CopyFromCpu<uint8_t, paddle::lite_api::TargetType::kHost>(
-        reinterpret_cast<const uint8_t*>(const_cast<void*>(
+          reinterpret_cast<const uint8_t*>(
-        inputs[i].CpuData())));
+              const_cast<void*>(inputs[i].CpuData())));
    } else if (inputs[i].dtype == FDDataType::INT64) {
-#if (defined(__aarch64__) || defined(__x86_64__) || defined(_M_X64) || defined(_M_ARM64))      
+#if (defined(__aarch64__) || defined(__x86_64__) || defined(_M_X64) || \
     defined(_M_ARM64))
      tensor->CopyFromCpu<int64_t, paddle::lite_api::TargetType::kHost>(
-        reinterpret_cast<const int64_t*>(const_cast<void*>(
+          reinterpret_cast<const int64_t*>(
-        inputs[i].CpuData())));
+              const_cast<void*>(inputs[i].CpuData())));
 #else
      FDASSERT(false, "FDDataType::INT64 is not support for x86/armv7 now!");
 #endif
@@ -302,7 +309,7 @@ bool LiteBackend::Infer(std::vector<FDTensor>& inputs,
  outputs->resize(outputs_desc_.size());
  for (size_t i = 0; i < outputs_desc_.size(); ++i) {
    auto tensor = predictor_->GetOutput(i);
-    if(outputs_desc_[i].dtype != LiteDataTypeToFD(tensor->precision())){
+    if (outputs_desc_[i].dtype != LiteDataTypeToFD(tensor->precision())) {
      outputs_desc_[i].dtype = LiteDataTypeToFD(tensor->precision());
    }
    (*outputs)[i].Resize(tensor->shape(), outputs_desc_[i].dtype,
--- a/fastdeploy/runtime/backends/lite/lite_backend.h
+++ b/fastdeploy/runtime/backends/lite/lite_backend.h
@@ -19,8 +19,8 @@
 #include <string>
 #include <vector>
-#include "fastdeploy/backends/backend.h"
+#include "fastdeploy/runtime/backends/backend.h"
-#include "fastdeploy/backends/lite/option.h"
+#include "fastdeploy/runtime/backends/lite/option.h"
 #include "paddle_api.h"  // NOLINT
 namespace fastdeploy {
--- a/fastdeploy/runtime/backends/lite/option.h
+++ b/fastdeploy/runtime/backends/lite/option.h
--- a/fastdeploy/runtime/backends/op_cuda_kernels/adaptive_pool2d_kernel.cu
+++ b/fastdeploy/runtime/backends/op_cuda_kernels/adaptive_pool2d_kernel.cu
--- a/fastdeploy/runtime/backends/op_cuda_kernels/adaptive_pool2d_kernel.h
+++ b/fastdeploy/runtime/backends/op_cuda_kernels/adaptive_pool2d_kernel.h
--- a/fastdeploy/runtime/backends/openvino/option.h
+++ b/fastdeploy/runtime/backends/openvino/option.h
--- a/fastdeploy/runtime/backends/openvino/ov_backend.cc
+++ b/fastdeploy/runtime/backends/openvino/ov_backend.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "fastdeploy/backends/openvino/ov_backend.h"
+#include "fastdeploy/runtime/backends/openvino/ov_backend.h"
 #ifdef ENABLE_PADDLE_FRONTEND
 #include "paddle2onnx/converter.h"
 #endif
--- a/fastdeploy/runtime/backends/openvino/ov_backend.h
+++ b/fastdeploy/runtime/backends/openvino/ov_backend.h
@@ -19,9 +19,9 @@
 #include <string>
 #include <vector>
-#include "fastdeploy/backends/backend.h"
+#include "fastdeploy/runtime/backends/backend.h"
 #include "fastdeploy/utils/unique_ptr.h"
-#include "fastdeploy/backends/openvino/option.h"
+#include "fastdeploy/runtime/backends/openvino/option.h"
 #include "openvino/openvino.hpp"
 namespace fastdeploy {
--- a/fastdeploy/runtime/backends/ort/ops/adaptive_pool2d.cc
+++ b/fastdeploy/runtime/backends/ort/ops/adaptive_pool2d.cc
@@ -104,8 +104,8 @@ void AdaptivePool2dKernel::GetAttribute(const OrtKernelInfo* info) {
      ort_.KernelInfoGetAttribute<std::string>(info, "pooling_type");
  output_size_ =
      ort_.KernelInfoGetAttribute<std::vector<int64_t>>(info, "output_size");
-  FDASSERT(output_size_.size() == 4 && output_size_[2] > 0 &&
+  FDASSERT(
-               output_size_[3] > 0,
+      output_size_.size() == 4 && output_size_[2] > 0 && output_size_[3] > 0,
      "The output size of adaptive pool must be positive.");
 }
 }  // namespace fastdeploy
--- a/fastdeploy/runtime/backends/ort/ops/adaptive_pool2d.h
+++ b/fastdeploy/runtime/backends/ort/ops/adaptive_pool2d.h
@@ -25,7 +25,7 @@
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #ifdef WITH_GPU
-#include "fastdeploy/backends/op_cuda_kernels/adaptive_pool2d_kernel.h"
+#include "fastdeploy/runtime/backends/op_cuda_kernels/adaptive_pool2d_kernel.h"
 #endif
 namespace fastdeploy {
--- a/fastdeploy/runtime/backends/ort/ops/multiclass_nms.cc
+++ b/fastdeploy/runtime/backends/ort/ops/multiclass_nms.cc
@@ -14,10 +14,12 @@
 #ifndef NON_64_PLATFORM
-#include "fastdeploy/backends/ort/ops/multiclass_nms.h"
+#include "fastdeploy/runtime/backends/ort/ops/multiclass_nms.h"
 #include <algorithm>
 #include "fastdeploy/core/fd_tensor.h"
 #include "fastdeploy/utils/utils.h"
 #include <algorithm>
 namespace fastdeploy {
--- a/fastdeploy/runtime/backends/ort/ops/multiclass_nms.h
+++ b/fastdeploy/runtime/backends/ort/ops/multiclass_nms.h
--- a/fastdeploy/runtime/backends/ort/option.h
+++ b/fastdeploy/runtime/backends/ort/option.h
--- a/fastdeploy/runtime/backends/ort/ort_backend.cc
+++ b/fastdeploy/runtime/backends/ort/ort_backend.cc
@@ -12,14 +12,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "fastdeploy/backends/ort/ort_backend.h"
+#include "fastdeploy/runtime/backends/ort/ort_backend.h"
 #include <memory>
 #include "fastdeploy/backends/ort/ops/adaptive_pool2d.h"
 #include "fastdeploy/backends/ort/ops/multiclass_nms.h"
 #include "fastdeploy/backends/ort/utils.h"
 #include "fastdeploy/core/float16.h"
 #include "fastdeploy/runtime/backends/ort/ops/adaptive_pool2d.h"
 #include "fastdeploy/runtime/backends/ort/ops/multiclass_nms.h"
 #include "fastdeploy/runtime/backends/ort/utils.h"
 #include "fastdeploy/utils/utils.h"
 #ifdef ENABLE_PADDLE_FRONTEND
 #include "paddle2onnx/converter.h"
--- a/fastdeploy/runtime/backends/ort/ort_backend.h
+++ b/fastdeploy/runtime/backends/ort/ort_backend.h
@@ -20,8 +20,8 @@
 #include <vector>
 #include <map>
-#include "fastdeploy/backends/backend.h"
+#include "fastdeploy/runtime/backends/backend.h"
-#include "fastdeploy/backends/ort/option.h"
+#include "fastdeploy/runtime/backends/ort/option.h"
 #include "onnxruntime_cxx_api.h"  // NOLINT
 namespace fastdeploy {
--- a/fastdeploy/runtime/backends/ort/utils.cc
+++ b/fastdeploy/runtime/backends/ort/utils.cc
@@ -12,7 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "fastdeploy/backends/ort/utils.h"
+#include "fastdeploy/runtime/backends/ort/utils.h"
 #include "fastdeploy/utils/utils.h"
 namespace fastdeploy {
--- a/fastdeploy/runtime/backends/ort/utils.h
+++ b/fastdeploy/runtime/backends/ort/utils.h
@@ -19,7 +19,7 @@
 #include <string>
 #include <vector>
-#include "fastdeploy/backends/backend.h"
+#include "fastdeploy/runtime/backends/backend.h"
 #include "onnxruntime_cxx_api.h"  // NOLINT
 namespace fastdeploy {
--- a/fastdeploy/runtime/backends/paddle/option.h
+++ b/fastdeploy/runtime/backends/paddle/option.h
@@ -19,7 +19,7 @@
 #include <memory>
 #include <string>
 #include <vector>
-#include "fastdeploy/backends/tensorrt/option.h"
+#include "fastdeploy/runtime/backends/tensorrt/option.h"
 namespace fastdeploy {
--- a/fastdeploy/runtime/backends/paddle/paddle_backend.cc
+++ b/fastdeploy/runtime/backends/paddle/paddle_backend.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "fastdeploy/backends/paddle/paddle_backend.h"
+#include "fastdeploy/runtime/backends/paddle/paddle_backend.h"
 #include <sstream>
--- a/fastdeploy/runtime/backends/paddle/paddle_backend.h
+++ b/fastdeploy/runtime/backends/paddle/paddle_backend.h
@@ -19,8 +19,8 @@
 #include <string>
 #include <vector>
-#include "fastdeploy/backends/backend.h"
+#include "fastdeploy/runtime/backends/backend.h"
-#include "fastdeploy/backends/paddle/option.h"
+#include "fastdeploy/runtime/backends/paddle/option.h"
 #ifdef ENABLE_PADDLE_FRONTEND
 #include "paddle2onnx/converter.h"
 #endif
--- a/fastdeploy/runtime/backends/paddle/util.cc
+++ b/fastdeploy/runtime/backends/paddle/util.cc
@@ -12,8 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fastdeploy/backends/paddle/paddle_backend.h"
 #include "fastdeploy/core/float16.h"
 #include "fastdeploy/runtime/backends/paddle/paddle_backend.h"
 namespace fastdeploy {
 paddle_infer::PlaceType ConvertFDDeviceToPlace(Device device) {
--- a/fastdeploy/runtime/backends/poros/common/compile.h
+++ b/fastdeploy/runtime/backends/poros/common/compile.h
@@ -19,9 +19,9 @@
 #include <string>
 #include <unordered_map>
-#include "iengine.h"
+#include "iengine.h"  // NOLINT
-#include "poros_module.h"
+#include "poros_module.h"  // NOLINT
-#include "torch/script.h"
+#include "torch/script.h"  // NOLINT
 namespace baidu {
 namespace mirana {
--- a/fastdeploy/runtime/backends/poros/common/iengine.h
+++ b/fastdeploy/runtime/backends/poros/common/iengine.h
@@ -16,12 +16,12 @@
 #include <string>
-//from pytorch
+// from pytorch
-#include "ATen/core/interned_strings.h"
+#include "ATen/core/interned_strings.h"  // NOLINT
-#include "torch/csrc/jit/ir/ir.h"
+#include "torch/csrc/jit/ir/ir.h"  // NOLINT
-#include "torch/script.h"
+#include "torch/script.h"  // NOLINT
-#include "plugin_create.h"
+#include "plugin_create.h"  // NOLINT
 namespace baidu {
 namespace mirana {
--- a/fastdeploy/runtime/backends/poros/common/plugin_create.h
+++ b/fastdeploy/runtime/backends/poros/common/plugin_create.h
@@ -36,7 +36,7 @@ IPlugin* create_plugin(const std::string& plugin_name,
 void create_all_plugins(const plugin_creator_map_t& plugin_creator_map,
                        std::unordered_map<std::string, IPlugin*>& plugin_m);
-//void create_all_plugins(std::unordered_map<std::string, IPlugin*>& plugin_m);
+// void create_all_plugins(std::unordered_map<std::string, IPlugin*>& plugin_m);
 template <typename PluginType> IPlugin* default_plugin_creator() {
  return new (std::nothrow) PluginType;
--- a/fastdeploy/runtime/backends/poros/common/poros_module.h
+++ b/fastdeploy/runtime/backends/poros/common/poros_module.h
@@ -14,8 +14,8 @@
 #pragma once
-#include "torch/csrc/jit/jit_log.h"
+#include "torch/csrc/jit/jit_log.h"  // NOLINT
-#include "torch/script.h"
+#include "torch/script.h"  // NOLINT
 #include <string>
 // #include "ATen/Context.h"
@@ -37,20 +37,21 @@ struct PorosOptions {
  bool use_nvidia_tf32 = false;
 };
 class PorosModule : public torch::jit::Module {
 public:
-  PorosModule(torch::jit::Module module) : torch::jit::Module(module) {}
+  PorosModule(torch::jit::Module module) : torch::jit::Module(module) {}  // NOLINT
  ~PorosModule() = default;
  void to_device(Device device) { _options.device = device; }
-  //c10::IValue forward(std::vector<c10::IValue> inputs);
+  // c10::IValue forward(std::vector<c10::IValue> inputs);
-  //void save(const std::string& filename);
+  // void save(const std::string& filename);
 public:
  PorosOptions _options;
 };
-//via porosmodule.save
+// via porosmodule.save
 std::unique_ptr<PorosModule> Load(const std::string& filename,
                                  const PorosOptions& options);
--- a/fastdeploy/runtime/backends/poros/option.h
+++ b/fastdeploy/runtime/backends/poros/option.h
--- a/fastdeploy/runtime/backends/poros/poros_backend.cc
+++ b/fastdeploy/runtime/backends/poros/poros_backend.cc
@@ -12,7 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "fastdeploy/backends/poros/poros_backend.h"
+#include "fastdeploy/runtime/backends/poros/poros_backend.h"
 #include <sys/time.h>
 namespace fastdeploy {
--- a/fastdeploy/runtime/backends/poros/poros_backend.h
+++ b/fastdeploy/runtime/backends/poros/poros_backend.h
@@ -19,10 +19,10 @@
 #include <string>
 #include <vector>
-#include "fastdeploy/backends/backend.h"
+#include "fastdeploy/runtime/backends/backend.h"
-#include "fastdeploy/backends/poros/option.h"
+#include "fastdeploy/runtime/backends/poros/option.h"
-#include "fastdeploy/backends/poros/common/compile.h"
+#include "fastdeploy/runtime/backends/poros/common/compile.h"
-#include "fastdeploy/backends/poros/common/poros_module.h"
+#include "fastdeploy/runtime/backends/poros/common/poros_module.h"
 namespace fastdeploy {
--- a/fastdeploy/runtime/backends/poros/utils.cc
+++ b/fastdeploy/runtime/backends/poros/utils.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "fastdeploy/backends/poros/poros_backend.h"
+#include "fastdeploy/runtime/backends/poros/poros_backend.h"
 #ifdef WITH_GPU
 #include <cuda_runtime_api.h>
@@ -129,7 +129,8 @@ at::Tensor CreatePorosValue(FDTensor& tensor, bool is_backend_cuda) {
             numel * sizeof(double));
    }
  } else {
-    FDASSERT(false, "Unrecognized data type while calling "
+    FDASSERT(false,
             "Unrecognized data type while calling "
             "PorosBackend::CreatePorosValue().");
  }
  return poros_value;
--- a/fastdeploy/runtime/backends/rknpu2/option.h
+++ b/fastdeploy/runtime/backends/rknpu2/option.h
--- a/fastdeploy/runtime/backends/rknpu2/rknpu2_backend.cc
+++ b/fastdeploy/runtime/backends/rknpu2/rknpu2_backend.cc
@@ -11,7 +11,8 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "fastdeploy/backends/rknpu2/rknpu2_backend.h"
+#include "fastdeploy/runtime/backends/rknpu2/rknpu2_backend.h"
 #include "fastdeploy/utils/perf.h"
 namespace fastdeploy {
 RKNPU2Backend::~RKNPU2Backend() {
@@ -76,9 +77,8 @@ void RKNPU2Backend::BuildOption(const RKNPU2BackendOption& option) {
 /***************************************************************
 *  @name       InitFromRKNN
 *  @brief      Initialize RKNN model
- *  @param      model_file: Binary data for the RKNN model or the path of RKNN model.
+ *  @param      model_file: Binary data for the RKNN model or the path of RKNN
- *              params_file: None
+ *model. params_file: None option: config
 *              option: config
 *  @return     bool
 *  @note       None
 ***************************************************************/
@@ -232,8 +232,8 @@ bool RKNPU2Backend::GetModelInputOutputInfos() {
      return false;
    }
-    // If the output dimension is 3, the runtime will automatically change it to 4.
+    // If the output dimension is 3, the runtime will automatically change it
-    // Obviously, this is wrong, and manual correction is required here.
+    // to 4. Obviously, this is wrong, and manual correction is required here.
    int n_dims = output_attrs_[i].n_dims;
    if ((n_dims == 4) && (output_attrs_[i].dims[3] == 1)) {
      n_dims--;
@@ -263,7 +263,8 @@ bool RKNPU2Backend::GetModelInputOutputInfos() {
 *  @note       None
 ***************************************************************/
 void RKNPU2Backend::DumpTensorAttr(rknn_tensor_attr& attr) {
-  printf("index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], "
+  printf(
      "index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], "
      "n_elems=%d, size=%d, fmt=%s, type=%s, "
      "qnt_type=%s, zp=%d, scale=%f, pass_through=%d\n",
      attr.index, attr.name, attr.n_dims, attr.dims[0], attr.dims[1],
@@ -357,7 +358,8 @@ bool RKNPU2Backend::Infer(std::vector<FDTensor>& inputs,
      // The data type of output data is changed to FP32
      output_attrs_[i].type = RKNN_TENSOR_FLOAT32;
-      // default output type is depend on model, this requires float32 to compute top5
+      // default output type is depend on model, this requires float32 to
      // compute top5
      ret = rknn_set_io_mem(ctx, output_mems_[i], &output_attrs_[i]);
      // set output memory and attribute
@@ -452,8 +454,8 @@ FDDataType RKNPU2Backend::RknnTensorTypeToFDDataType(rknn_tensor_type type) {
 *  @return     None
 *  @note       None
 ***************************************************************/
-rknn_tensor_type
+rknn_tensor_type RKNPU2Backend::FDDataTypeToRknnTensorType(
-RKNPU2Backend::FDDataTypeToRknnTensorType(fastdeploy::FDDataType type) {
+    fastdeploy::FDDataType type) {
  if (type == FDDataType::FP16) {
    return rknn_tensor_type::RKNN_TENSOR_FLOAT16;
  }
--- a/fastdeploy/runtime/backends/rknpu2/rknpu2_backend.h
+++ b/fastdeploy/runtime/backends/rknpu2/rknpu2_backend.h
@@ -13,8 +13,8 @@
 // limitations under the License.
 #pragma once
-#include "fastdeploy/backends/backend.h"
+#include "fastdeploy/runtime/backends/backend.h"
-#include "fastdeploy/backends/rknpu2/option.h"
+#include "fastdeploy/runtime/backends/rknpu2/option.h"
 #include "fastdeploy/core/fd_tensor.h"
 #include "rknn_api.h"  // NOLINT
 #include <cstring>
--- a/fastdeploy/runtime/backends/sophgo/option.h
+++ b/fastdeploy/runtime/backends/sophgo/option.h
--- a/fastdeploy/runtime/backends/sophgo/sophgo_backend.cc
+++ b/fastdeploy/runtime/backends/sophgo/sophgo_backend.cc
@@ -11,7 +11,7 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "fastdeploy/backends/sophgo/sophgo_backend.h"
+#include "fastdeploy/runtime/backends/sophgo/sophgo_backend.h"
 #include <assert.h>
--- a/fastdeploy/runtime/backends/sophgo/sophgo_backend.h
+++ b/fastdeploy/runtime/backends/sophgo/sophgo_backend.h
@@ -13,11 +13,11 @@
 // limitations under the License.
 #pragma once
-#include "fastdeploy/backends/backend.h"
+#include "fastdeploy/runtime/backends/backend.h"
 #include "fastdeploy/core/fd_tensor.h"
 #include "bmruntime_interface.h" // NOLINT
 #include "bmlib_runtime.h" // NOLINT
-#include "fastdeploy/backends/sophgo/option.h"
+#include "fastdeploy/runtime/backends/sophgo/option.h"
 #include <cstring>
 #include <iostream>
 #include <memory>
--- a/fastdeploy/runtime/backends/tensorrt/ops/adaptive_pool2d.cc
+++ b/fastdeploy/runtime/backends/tensorrt/ops/adaptive_pool2d.cc
@@ -97,9 +97,8 @@ void AdaptivePool2d::serialize(void* buffer) const noexcept {
  FDASSERT(d == a + getSerializationSize(), "d == a + getSerializationSize()");
 }
-nvinfer1::DataType
+nvinfer1::DataType AdaptivePool2d::getOutputDataType(
-AdaptivePool2d::getOutputDataType(int index,
+    int index, const nvinfer1::DataType* inputType,
                                  const nvinfer1::DataType* inputType,
    int nbInputs) const noexcept {
  return inputType[0];
 }
--- a/fastdeploy/runtime/backends/tensorrt/ops/adaptive_pool2d.h
+++ b/fastdeploy/runtime/backends/tensorrt/ops/adaptive_pool2d.h
@@ -14,7 +14,7 @@
 #pragma once
 #include "common.h"  // NOLINT
-#include "fastdeploy/backends/op_cuda_kernels/adaptive_pool2d_kernel.h"
+#include "fastdeploy/runtime/backends/op_cuda_kernels/adaptive_pool2d_kernel.h"
 namespace fastdeploy {
--- a/fastdeploy/runtime/backends/tensorrt/ops/common.h
+++ b/fastdeploy/runtime/backends/tensorrt/ops/common.h
--- a/fastdeploy/runtime/backends/tensorrt/option.h
+++ b/fastdeploy/runtime/backends/tensorrt/option.h
--- a/fastdeploy/runtime/backends/tensorrt/trt_backend.cc
+++ b/fastdeploy/runtime/backends/tensorrt/trt_backend.cc
@@ -12,13 +12,13 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "fastdeploy/backends/tensorrt/trt_backend.h"
+#include "fastdeploy/runtime/backends/tensorrt/trt_backend.h"
 #include "fastdeploy/function/cuda_cast.h"
 #include <cstring>
 #include <unordered_map>
 #include "NvInferRuntime.h"
 #include "fastdeploy/function/cuda_cast.h"
 #include "fastdeploy/utils/utils.h"
 #ifdef ENABLE_PADDLE_FRONTEND
 #include "paddle2onnx/converter.h"
@@ -215,9 +215,9 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file,
  outputs_desc_.resize(onnx_reader.num_outputs);
  for (int i = 0; i < onnx_reader.num_inputs; ++i) {
    std::string name(onnx_reader.inputs[i].name);
-    std::vector<int64_t> shape(onnx_reader.inputs[i].shape,
+    std::vector<int64_t> shape(
-                               onnx_reader.inputs[i].shape +
+        onnx_reader.inputs[i].shape,
-                                   onnx_reader.inputs[i].rank);
+        onnx_reader.inputs[i].shape + onnx_reader.inputs[i].rank);
    inputs_desc_[i].name = name;
    inputs_desc_[i].shape.assign(shape.begin(), shape.end());
    inputs_desc_[i].dtype = ReaderDtypeToTrtDtype(onnx_reader.inputs[i].dtype);
@@ -238,9 +238,9 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file,
  for (int i = 0; i < onnx_reader.num_outputs; ++i) {
    std::string name(onnx_reader.outputs[i].name);
-    std::vector<int64_t> shape(onnx_reader.outputs[i].shape,
+    std::vector<int64_t> shape(
-                               onnx_reader.outputs[i].shape +
+        onnx_reader.outputs[i].shape,
-                                   onnx_reader.outputs[i].rank);
+        onnx_reader.outputs[i].shape + onnx_reader.outputs[i].rank);
    outputs_desc_[i].name = name;
    outputs_desc_[i].shape.assign(shape.begin(), shape.end());
    outputs_desc_[i].dtype =
@@ -313,8 +313,8 @@ bool TrtBackend::Infer(std::vector<FDTensor>& inputs,
    return false;
  }
  for (size_t i = 0; i < outputs->size(); ++i) {
-    // if the final output tensor's dtype is different from the model output tensor's dtype,
+    // if the final output tensor's dtype is different from the model output
-    // then we need cast the data to the final output's dtype
+    // tensor's dtype, then we need cast the data to the final output's dtype
    auto model_output_dtype =
        GetFDDataType(outputs_device_buffer_[(*outputs)[i].name].dtype());
    if ((*outputs)[i].dtype != model_output_dtype) {
@@ -369,7 +369,8 @@ void TrtBackend::GetInputOutputInfo() {
        outputs_desc_[i].original_dtype;
  }
-  // Re-read the tensor infos from TRT model and write into inputs_desc_ and outputs_desc_
+  // Re-read the tensor infos from TRT model and write into inputs_desc_ and
  // outputs_desc_
  std::vector<TrtValueInfo>().swap(inputs_desc_);
  std::vector<TrtValueInfo>().swap(outputs_desc_);
  inputs_desc_.clear();
--- a/fastdeploy/runtime/backends/tensorrt/trt_backend.h
+++ b/fastdeploy/runtime/backends/tensorrt/trt_backend.h
@@ -23,9 +23,9 @@
 #include "NvInfer.h"
 #include "NvOnnxParser.h"
-#include "fastdeploy/backends/backend.h"
+#include "fastdeploy/runtime/backends/backend.h"
-#include "fastdeploy/backends/tensorrt/utils.h"
+#include "fastdeploy/runtime/backends/tensorrt/utils.h"
-#include "fastdeploy/backends/tensorrt/option.h"
+#include "fastdeploy/runtime/backends/tensorrt/option.h"
 #include "fastdeploy/utils/unique_ptr.h"
 class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
--- a/fastdeploy/runtime/backends/tensorrt/utils.cc
+++ b/fastdeploy/runtime/backends/tensorrt/utils.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "fastdeploy/backends/tensorrt/utils.h"
+#include "fastdeploy/runtime/backends/tensorrt/utils.h"
 namespace fastdeploy {
--- a/fastdeploy/runtime/backends/tensorrt/utils.h
+++ b/fastdeploy/runtime/backends/tensorrt/utils.h
--- a/fastdeploy/runtime/enum_variables.cc
+++ b/fastdeploy/runtime/enum_variables.cc
@@ -82,4 +82,43 @@ std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
  out << "UNKNOWN-ModelFormat";
  return out;
 }
 std::vector<Backend> GetAvailableBackends() {
  std::vector<Backend> backends;
 #ifdef ENABLE_ORT_BACKEND
  backends.push_back(Backend::ORT);
 #endif
 #ifdef ENABLE_TRT_BACKEND
  backends.push_back(Backend::TRT);
 #endif
 #ifdef ENABLE_PADDLE_BACKEND
  backends.push_back(Backend::PDINFER);
 #endif
 #ifdef ENABLE_POROS_BACKEND
  backends.push_back(Backend::POROS);
 #endif
 #ifdef ENABLE_OPENVINO_BACKEND
  backends.push_back(Backend::OPENVINO);
 #endif
 #ifdef ENABLE_LITE_BACKEND
  backends.push_back(Backend::LITE);
 #endif
 #ifdef ENABLE_RKNPU2_BACKEND
  backends.push_back(Backend::RKNPU2);
 #endif
 #ifdef ENABLE_SOPHGO_BACKEND
  backends.push_back(Backend::SOPHGOTPU);
 #endif
  return backends;
 }
 bool IsBackendAvailable(const Backend& backend) {
  std::vector<Backend> backends = GetAvailableBackends();
  for (size_t i = 0; i < backends.size(); ++i) {
    if (backend == backends[i]) {
      return true;
    }
  }
  return false;
 }
 }  // namespace fastdeploy
--- a/fastdeploy/runtime/enum_variables.h
+++ b/fastdeploy/runtime/enum_variables.h
@@ -25,6 +25,7 @@
 namespace fastdeploy {
 /*! Inference backend supported in FastDeploy */
 enum Backend {
  UNKNOWN,  ///< Unknown inference backend
@@ -38,6 +39,17 @@ enum Backend {
  SOPHGOTPU,  ///< SOPHGOTPU, support SOPHGO format model, Sophgo TPU only
 };
 /**
 * @brief Get all the available inference backend in FastDeploy
 */
 FASTDEPLOY_DECL std::vector<Backend> GetAvailableBackends();
 /**
 * @brief Check if the inference backend available
 */
 FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend);
 enum FASTDEPLOY_DECL Device {
  CPU,
  GPU,
@@ -69,11 +81,8 @@ static std::map<ModelFormat, std::vector<Backend>> s_default_backends_cfg = {
  {ModelFormat::SOPHGO, {Backend::SOPHGOTPU}}
 };
-FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, const Backend& b);
+FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const Backend& b);
-
+FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const Device& d);
-FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, const Device& d);
+FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& o, const ModelFormat& f);
 FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
                                         const ModelFormat& f);
 }  // namespace fastdeploy
--- a/fastdeploy/runtime/runtime.cc
+++ b/fastdeploy/runtime/runtime.cc
@@ -18,35 +18,35 @@
 #include "fastdeploy/utils/utils.h"
 #ifdef ENABLE_ORT_BACKEND
-#include "fastdeploy/backends/ort/ort_backend.h"
+#include "fastdeploy/runtime/backends/ort/ort_backend.h"
 #endif
 #ifdef ENABLE_TRT_BACKEND
-#include "fastdeploy/backends/tensorrt/trt_backend.h"
+#include "fastdeploy/runtime/backends/tensorrt/trt_backend.h"
 #endif
 #ifdef ENABLE_PADDLE_BACKEND
-#include "fastdeploy/backends/paddle/paddle_backend.h"
+#include "fastdeploy/runtime/backends/paddle/paddle_backend.h"
 #endif
 #ifdef ENABLE_POROS_BACKEND
-#include "fastdeploy/backends/poros/poros_backend.h"
+#include "fastdeploy/runtime/backends/poros/poros_backend.h"
 #endif
 #ifdef ENABLE_OPENVINO_BACKEND
-#include "fastdeploy/backends/openvino/ov_backend.h"
+#include "fastdeploy/runtime/backends/openvino/ov_backend.h"
 #endif
 #ifdef ENABLE_LITE_BACKEND
-#include "fastdeploy/backends/lite/lite_backend.h"
+#include "fastdeploy/runtime/backends/lite/lite_backend.h"
 #endif
 #ifdef ENABLE_RKNPU2_BACKEND
-#include "fastdeploy/backends/rknpu2/rknpu2_backend.h"
+#include "fastdeploy/runtime/backends/rknpu2/rknpu2_backend.h"
 #endif
 #ifdef ENABLE_SOPHGO_BACKEND
-#include "fastdeploy/backends/sophgo/sophgo_backend.h"
+#include "fastdeploy/runtime/backends/sophgo/sophgo_backend.h"
 #endif
 namespace fastdeploy {
--- a/fastdeploy/runtime/runtime.h
+++ b/fastdeploy/runtime/runtime.h
@@ -19,7 +19,7 @@
 */
 #pragma once
-#include "fastdeploy/backends/backend.h"
+#include "fastdeploy/runtime/backends/backend.h"
 #include "fastdeploy/core/fd_tensor.h"
 #include "fastdeploy/runtime/runtime_option.h"
 #include "fastdeploy/utils/perf.h"
--- a/fastdeploy/runtime/runtime_option.cc
+++ b/fastdeploy/runtime/runtime_option.cc
@@ -18,127 +18,6 @@
 namespace fastdeploy {
 std::vector<Backend> GetAvailableBackends() {
  std::vector<Backend> backends;
 #ifdef ENABLE_ORT_BACKEND
  backends.push_back(Backend::ORT);
 #endif
 #ifdef ENABLE_TRT_BACKEND
  backends.push_back(Backend::TRT);
 #endif
 #ifdef ENABLE_PADDLE_BACKEND
  backends.push_back(Backend::PDINFER);
 #endif
 #ifdef ENABLE_POROS_BACKEND
  backends.push_back(Backend::POROS);
 #endif
 #ifdef ENABLE_OPENVINO_BACKEND
  backends.push_back(Backend::OPENVINO);
 #endif
 #ifdef ENABLE_LITE_BACKEND
  backends.push_back(Backend::LITE);
 #endif
 #ifdef ENABLE_RKNPU2_BACKEND
  backends.push_back(Backend::RKNPU2);
 #endif
 #ifdef ENABLE_SOPHGO_BACKEND
  backends.push_back(Backend::SOPHGOTPU);
 #endif
  return backends;
 }
 bool IsBackendAvailable(const Backend& backend) {
  std::vector<Backend> backends = GetAvailableBackends();
  for (size_t i = 0; i < backends.size(); ++i) {
    if (backend == backends[i]) {
      return true;
    }
  }
  return false;
 }
 bool CheckModelFormat(const std::string& model_file,
                      const ModelFormat& model_format) {
  if (model_format == ModelFormat::PADDLE) {
    if (model_file.size() < 8 ||
        model_file.substr(model_file.size() - 8, 8) != ".pdmodel") {
      FDERROR << "With model format of ModelFormat::PADDLE, the model file "
                 "should ends with `.pdmodel`, but now it's "
              << model_file << std::endl;
      return false;
    }
  } else if (model_format == ModelFormat::ONNX) {
    if (model_file.size() < 5 ||
        model_file.substr(model_file.size() - 5, 5) != ".onnx") {
      FDERROR << "With model format of ModelFormat::ONNX, the model file "
                 "should ends with `.onnx`, but now it's "
              << model_file << std::endl;
      return false;
    }
  } else if (model_format == ModelFormat::RKNN) {
    if (model_file.size() < 5 ||
        model_file.substr(model_file.size() - 5, 5) != ".rknn") {
      FDERROR << "With model format of ModelFormat::RKNN, the model file "
                 "should ends with `.rknn`, but now it's "
              << model_file << std::endl;
      return false;
    }
  } else if (model_format == ModelFormat::TORCHSCRIPT) {
    if (model_file.size() < 3 ||
        model_file.substr(model_file.size() - 3, 3) != ".pt") {
      FDERROR
          << "With model format of ModelFormat::TORCHSCRIPT, the model file "
             "should ends with `.pt`, but now it's "
          << model_file << std::endl;
      return false;
    }
  } else if (model_format == ModelFormat::SOPHGO) {
    if (model_file.size() < 7 ||
        model_file.substr(model_file.size() - 7, 7) != ".bmodel") {
      FDERROR << "With model format of ModelFormat::SOPHGO, the model file "
                 "should ends with `.bmodel`, but now it's "
              << model_file << std::endl;
      return false;
    }
  } else {
    FDERROR
        << "Only support model format with frontend ModelFormat::PADDLE / "
           "ModelFormat::ONNX / ModelFormat::RKNN / ModelFormat::TORCHSCRIPT."
        << std::endl;
    return false;
  }
  return true;
 }
 ModelFormat GuessModelFormat(const std::string& model_file) {
  if (model_file.size() > 8 &&
      model_file.substr(model_file.size() - 8, 8) == ".pdmodel") {
    FDINFO << "Model Format: PaddlePaddle." << std::endl;
    return ModelFormat::PADDLE;
  } else if (model_file.size() > 5 &&
             model_file.substr(model_file.size() - 5, 5) == ".onnx") {
    FDINFO << "Model Format: ONNX." << std::endl;
    return ModelFormat::ONNX;
  } else if (model_file.size() > 3 &&
             model_file.substr(model_file.size() - 3, 3) == ".pt") {
    FDINFO << "Model Format: Torchscript." << std::endl;
    return ModelFormat::TORCHSCRIPT;
  } else if (model_file.size() > 5 &&
             model_file.substr(model_file.size() - 5, 5) == ".rknn") {
    FDINFO << "Model Format: RKNN." << std::endl;
    return ModelFormat::RKNN;
  } else if (model_file.size() > 7 &&
             model_file.substr(model_file.size() - 7, 7) == ".bmodel") {
    FDINFO << "Model Format: SOPHGO." << std::endl;
    return ModelFormat::SOPHGO;
  }
  FDERROR << "Cannot guess which model format you are using, please set "
             "RuntimeOption::model_format manually."
          << std::endl;
  return ModelFormat::PADDLE;
 }
 void RuntimeOption::SetModelPath(const std::string& model_path,
                                 const std::string& params_path,
                                 const ModelFormat& format) {
--- a/fastdeploy/runtime/runtime_option.h
+++ b/fastdeploy/runtime/runtime_option.h
@@ -24,31 +24,17 @@
 #include <map>
 #include <vector>
 #include "fastdeploy/runtime/enum_variables.h"
-#include "fastdeploy/backends/lite/option.h"
+#include "fastdeploy/runtime/backends/lite/option.h"
-#include "fastdeploy/backends/openvino/option.h"
+#include "fastdeploy/runtime/backends/openvino/option.h"
-#include "fastdeploy/backends/ort/option.h"
+#include "fastdeploy/runtime/backends/ort/option.h"
-#include "fastdeploy/backends/paddle/option.h"
+#include "fastdeploy/runtime/backends/paddle/option.h"
-#include "fastdeploy/backends/poros/option.h"
+#include "fastdeploy/runtime/backends/poros/option.h"
-#include "fastdeploy/backends/rknpu2/option.h"
+#include "fastdeploy/runtime/backends/rknpu2/option.h"
-#include "fastdeploy/backends/sophgo/option.h"
+#include "fastdeploy/runtime/backends/sophgo/option.h"
-#include "fastdeploy/backends/tensorrt/option.h"
+#include "fastdeploy/runtime/backends/tensorrt/option.h"
 namespace fastdeploy {
 /**
 * @brief Get all the available inference backend in FastDeploy
 */
 FASTDEPLOY_DECL std::vector<Backend> GetAvailableBackends();
 /**
 * @brief Check if the inference backend available
 */
 FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend);
 bool CheckModelFormat(const std::string& model_file,
                      const ModelFormat& model_format);
 ModelFormat GuessModelFormat(const std::string& model_file);
 /*! @brief Option object used when create a new Runtime object
 */
 struct FASTDEPLOY_DECL RuntimeOption {