From d7a65e5c70e14ca1dc7831369fb0e2e39d869b46 Mon Sep 17 00:00:00 2001
From: Jason <jiangjiajun@baidu.com>
Date: Fri, 6 Jan 2023 13:44:05 +0800
Subject: [PATCH] [Other] Upgrade runtime module (#1068)

* Upgrade runtime module

* Update option.h

* Fix build error

* Move enumerates

* little modification

* little modification

* little modification:

* Remove some useless flags
---
 CMakeLists.txt                                |   17 +-
 FastDeploy.cmake.in                           |    1 -
 cmake/opencv.cmake                            |    6 -
 cmake/timvx.cmake                             |    5 -
 fastdeploy/backends/lite/option.h             |   11 +
 fastdeploy/backends/openvino/option.h         |    1 +
 fastdeploy/backends/ort/option.h              |    1 +
 fastdeploy/backends/paddle/option.h           |    1 +
 fastdeploy/backends/paddle/paddle_backend.cc  |    2 +
 fastdeploy/backends/poros/option.h            |    1 +
 .../rknpu2_config.h => rknpu2/option.h}       |    0
 .../{rknpu => }/rknpu2/rknpu2_backend.cc      |    4 +-
 .../{rknpu => }/rknpu2/rknpu2_backend.h       |    2 +-
 fastdeploy/backends/sophgo/option.h           |    1 +
 fastdeploy/backends/tensorrt/option.h         |    1 +
 fastdeploy/core/config.h.in                   |    4 -
 fastdeploy/core/fd_tensor.h                   |    2 +-
 fastdeploy/core/fd_type.cc                    |  155 +--
 fastdeploy/core/fd_type.h                     |   19 -
 fastdeploy/fastdeploy_model.h                 |    4 +-
 fastdeploy/pybind/main.h                      |    2 +-
 fastdeploy/pybind/rknpu2_config_pybind.cc     |   24 +-
 fastdeploy/runtime.cc                         | 1012 -----------------
 fastdeploy/runtime.h                          |  572 +---------
 fastdeploy/runtime/enum_variables.cc          |   85 ++
 fastdeploy/runtime/enum_variables.h           |   79 ++
 fastdeploy/runtime/runtime.cc                 |  492 ++++++++
 fastdeploy/runtime/runtime.h                  |  109 ++
 fastdeploy/runtime/runtime_option.cc          |  515 +++++++++
 fastdeploy/runtime/runtime_option.h           |  482 ++++++++
 .../detection/contrib/yolov7end2end_trt.cc    |    6 +-
 31 files changed, 1838 insertions(+), 1778 deletions(-)
 rename fastdeploy/backends/{rknpu/rknpu2/rknpu2_config.h => rknpu2/option.h} (100%)
 rename fastdeploy/backends/{rknpu => }/rknpu2/rknpu2_backend.cc (99%)
 rename fastdeploy/backends/{rknpu => }/rknpu2/rknpu2_backend.h (98%)
 mode change 100755 => 100644 fastdeploy/core/fd_type.cc
 delete mode 100755 fastdeploy/runtime.cc
 create mode 100644 fastdeploy/runtime/enum_variables.cc
 create mode 100644 fastdeploy/runtime/enum_variables.h
 create mode 100644 fastdeploy/runtime/runtime.cc
 create mode 100755 fastdeploy/runtime/runtime.h
 create mode 100644 fastdeploy/runtime/runtime_option.cc
 create mode 100644 fastdeploy/runtime/runtime_option.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 42bc600bb..eb5a18fdc 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -71,15 +71,12 @@ option(WITH_ASCEND "Whether to compile for Huawei Ascend deploy." OFF)
 option(WITH_TIMVX "Whether to compile for TIMVX deploy." OFF)
 option(WITH_KUNLUNXIN "Whether to compile for KunlunXin XPU deploy." OFF)
 option(WITH_TESTING "Whether to compile with unittest." OFF)
+
 ############################# Options for Android cross compiling #########################
 option(WITH_OPENCV_STATIC "Use OpenCV static lib for Android." OFF)
 option(WITH_LITE_STATIC "Use Paddle Lite static lib for Android." OFF)
 option(WITH_OPENMP "Use OpenMP support for Android." OFF)
 
-# Please don't open this flag now, some bugs exists.
-# Only support Linux Now
-# option(ENABLE_OPENCV_CUDA "Whether to enable opencv with cuda, this will allow process image with GPU." OFF)
-
 # Whether to build fastdeploy with vision/text/... examples, only for testings.
 option(BUILD_EXAMPLES "Whether to build fastdeploy with vision examples" OFF)
 
@@ -187,7 +184,6 @@ add_definitions(-DFASTDEPLOY_LIB)
 configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/core/config.h.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/core/config.h)
 configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/main.cc.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/main.cc)
 file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*.cc)
-file(GLOB_RECURSE FDTENSOR_FUNC_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/function/*.cc)
 file(GLOB_RECURSE FDTENSOR_FUNC_CUDA_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/function/*.cu)
 file(GLOB_RECURSE DEPLOY_OP_CUDA_KERNEL_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/op_cuda_kernels/*.cu)
 file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/ort/*.cc)
@@ -195,7 +191,7 @@ file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fas
 file(GLOB_RECURSE DEPLOY_POROS_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/poros/*.cc)
 file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cpp)
 file(GLOB_RECURSE DEPLOY_OPENVINO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/openvino/*.cc)
-file(GLOB_RECURSE DEPLOY_RKNPU2_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/rknpu/rknpu2/*.cc)
+file(GLOB_RECURSE DEPLOY_RKNPU2_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/rknpu2/*.cc)
 file(GLOB_RECURSE DEPLOY_SOPHGO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/sophgo/*.cc)
 file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/lite/*.cc)
 file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cc)
@@ -420,15 +416,6 @@ endif()
 if(ENABLE_VISION)
   add_definitions(-DENABLE_VISION)
   add_definitions(-DENABLE_VISION_VISUALIZE)
-  if(ENABLE_OPENCV_CUDA)
-    if(NOT WITH_GPU)
-      message(FATAL_ERROR "ENABLE_OPENCV_CUDA is available on Linux and WITH_GPU=ON, but now WITH_GPU=OFF.")
-    endif()
-    if(APPLE OR ANDROID OR IOS OR WIN32)
-      message(FATAL_ERROR "Cannot enable opencv with cuda in mac/ios/android/windows os, please set -DENABLE_OPENCV_CUDA=OFF.")
-    endif()
-    add_definitions(-DENABLE_OPENCV_CUDA)
-  endif()
   add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/yaml-cpp)
   list(APPEND DEPEND_LIBS yaml-cpp)
   if(BUILD_CUDA_SRC)
diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in
index 6ba0b4307..a9f52d2e3 100755
--- a/FastDeploy.cmake.in
+++ b/FastDeploy.cmake.in
@@ -20,7 +20,6 @@ set(PADDLEINFERENCE_VERSION @PADDLEINFERENCE_VERSION@)
 set(OPENVINO_VERSION @OPENVINO_VERSION@)
 set(WITH_LITE_STATIC @WITH_LITE_STATIC@)
 set(WITH_OPENCV_STATIC @WITH_OPENCV_STATIC@)
-# set(ENABLE_OPENCV_CUDA @ENABLE_OPENCV_CUDA@)
 set(OPENCV_FILENAME @OPENCV_FILENAME@)
 set(OPENVINO_FILENAME @OPENVINO_FILENAME@)
 set(PADDLELITE_FILENAME @PADDLELITE_FILENAME@)
diff --git a/cmake/opencv.cmake b/cmake/opencv.cmake
index fd2ecabe4..5103a69d9 100755
--- a/cmake/opencv.cmake
+++ b/cmake/opencv.cmake
@@ -42,12 +42,6 @@ else()
   if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
     set(OPENCV_FILENAME "opencv-linux-aarch64-3.4.14")
   endif()
-  if(ENABLE_OPENCV_CUDA)
-    if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
-      message(FATAL_ERROR "Cannot set ENABLE_OPENCV_CUDA=ON while in linux-aarch64 platform.")
-    endif()
-    set(OPENCV_FILENAME "opencv-linux-x64-gpu-3.4.16")
-  endif()
 endif()
 
 if(NOT OPENCV_FILENAME)
diff --git a/cmake/timvx.cmake b/cmake/timvx.cmake
index 973face96..aae1fba1a 100755
--- a/cmake/timvx.cmake
+++ b/cmake/timvx.cmake
@@ -29,11 +29,6 @@ if(${WITH_GPU})
     set(WITH_GPU OFF)
 endif()
 
-if(${ENABLE_OPENCV_CUDA})
-    message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_OPENCV_CUDA=OFF") 
-    set(ENABLE_OPENCV_CUDA OFF) 
-endif()
-
 if(${ENABLE_TEXT})
     set(ENABLE_TEXT OFF CACHE BOOL "Force ENABLE_TEXT OFF" FORCE)
     message(STATUS "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_TEXT=OFF")
diff --git a/fastdeploy/backends/lite/option.h b/fastdeploy/backends/lite/option.h
index 2a4ba7a33..072f23771 100755
--- a/fastdeploy/backends/lite/option.h
+++ b/fastdeploy/backends/lite/option.h
@@ -14,6 +14,7 @@
 
 #pragma once
 
+#include "fastdeploy/core/fd_type.h"
 #include <iostream>
 #include <memory>
 #include <string>
@@ -21,6 +22,16 @@
 #include <map>
 namespace fastdeploy {
 
+/*! Paddle Lite power mode for mobile device. */
+enum LitePowerMode {
+  LITE_POWER_HIGH = 0,       ///< Use Lite Backend with high power mode
+  LITE_POWER_LOW = 1,        ///< Use Lite Backend with low power mode
+  LITE_POWER_FULL = 2,       ///< Use Lite Backend with full power mode
+  LITE_POWER_NO_BIND = 3,    ///< Use Lite Backend with no bind power mode
+  LITE_POWER_RAND_HIGH = 4,  ///< Use Lite Backend with rand high mode
+  LITE_POWER_RAND_LOW = 5    ///< Use Lite Backend with rand low power mode
+};
+
 struct LiteBackendOption {
   // cpu num threads
   int threads = 1;
diff --git a/fastdeploy/backends/openvino/option.h b/fastdeploy/backends/openvino/option.h
index fa18d5ef9..e78a73496 100644
--- a/fastdeploy/backends/openvino/option.h
+++ b/fastdeploy/backends/openvino/option.h
@@ -14,6 +14,7 @@
 
 #pragma once
 
+#include "fastdeploy/core/fd_type.h"
 #include <iostream>
 #include <memory>
 #include <string>
diff --git a/fastdeploy/backends/ort/option.h b/fastdeploy/backends/ort/option.h
index db58dbdd7..78f117b99 100644
--- a/fastdeploy/backends/ort/option.h
+++ b/fastdeploy/backends/ort/option.h
@@ -14,6 +14,7 @@
 
 #pragma once
 
+#include "fastdeploy/core/fd_type.h"
 #include <iostream>
 #include <memory>
 #include <string>
diff --git a/fastdeploy/backends/paddle/option.h b/fastdeploy/backends/paddle/option.h
index 24fda8277..3f2d03ca0 100644
--- a/fastdeploy/backends/paddle/option.h
+++ b/fastdeploy/backends/paddle/option.h
@@ -14,6 +14,7 @@
 
 #pragma once
 
+#include "fastdeploy/core/fd_type.h"
 #include <iostream>
 #include <memory>
 #include <string>
diff --git a/fastdeploy/backends/paddle/paddle_backend.cc b/fastdeploy/backends/paddle/paddle_backend.cc
index de2ac6223..f5340ed86 100644
--- a/fastdeploy/backends/paddle/paddle_backend.cc
+++ b/fastdeploy/backends/paddle/paddle_backend.cc
@@ -31,6 +31,8 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
       config_.Exp_DisableTensorRtOPs(option.trt_disabled_ops_);
       auto precision = paddle_infer::PrecisionType::kFloat32;
       if (option.trt_option.enable_fp16) {
+        FDINFO << "Will try to use tensorrt fp16 inference with Paddle Backend."
+               << std::endl;
         precision = paddle_infer::PrecisionType::kHalf;
       }
       bool use_static = false;
diff --git a/fastdeploy/backends/poros/option.h b/fastdeploy/backends/poros/option.h
index 4d9a11a07..2b715f7dc 100755
--- a/fastdeploy/backends/poros/option.h
+++ b/fastdeploy/backends/poros/option.h
@@ -14,6 +14,7 @@
 
 #pragma once
 
+#include "fastdeploy/core/fd_type.h"
 #include <iostream>
 #include <memory>
 #include <string>
diff --git a/fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h b/fastdeploy/backends/rknpu2/option.h
similarity index 100%
rename from fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h
rename to fastdeploy/backends/rknpu2/option.h
diff --git a/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.cc b/fastdeploy/backends/rknpu2/rknpu2_backend.cc
similarity index 99%
rename from fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.cc
rename to fastdeploy/backends/rknpu2/rknpu2_backend.cc
index 94a6d42d3..bcb892fb6 100644
--- a/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.cc
+++ b/fastdeploy/backends/rknpu2/rknpu2_backend.cc
@@ -11,7 +11,7 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h"
+#include "fastdeploy/backends/rknpu2/rknpu2_backend.h"
 #include "fastdeploy/utils/perf.h"
 namespace fastdeploy {
 RKNPU2Backend::~RKNPU2Backend() {
@@ -478,4 +478,4 @@ RKNPU2Backend::FDDataTypeToRknnTensorType(fastdeploy::FDDataType type) {
   FDERROR << "rknn_tensor_type don't support this type" << std::endl;
   return RKNN_TENSOR_TYPE_MAX;
 }
-}  // namespace fastdeploy
\ No newline at end of file
+}  // namespace fastdeploy
diff --git a/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h b/fastdeploy/backends/rknpu2/rknpu2_backend.h
similarity index 98%
rename from fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h
rename to fastdeploy/backends/rknpu2/rknpu2_backend.h
index 33704679c..5482c4758 100644
--- a/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h
+++ b/fastdeploy/backends/rknpu2/rknpu2_backend.h
@@ -14,7 +14,7 @@
 #pragma once
 
 #include "fastdeploy/backends/backend.h"
-#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h"
+#include "fastdeploy/backends/rknpu2/option.h"
 #include "fastdeploy/core/fd_tensor.h"
 #include "rknn_api.h"  // NOLINT
 #include <cstring>
diff --git a/fastdeploy/backends/sophgo/option.h b/fastdeploy/backends/sophgo/option.h
index 320cb7ae2..f4339c32f 100644
--- a/fastdeploy/backends/sophgo/option.h
+++ b/fastdeploy/backends/sophgo/option.h
@@ -13,6 +13,7 @@
 // limitations under the License.
 #pragma once
 
+#include "fastdeploy/core/fd_type.h"
 #include <cstring>
 #include <iostream>
 #include <memory>
diff --git a/fastdeploy/backends/tensorrt/option.h b/fastdeploy/backends/tensorrt/option.h
index 3f7c2a208..94ec010d0 100755
--- a/fastdeploy/backends/tensorrt/option.h
+++ b/fastdeploy/backends/tensorrt/option.h
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #pragma once
+#include "fastdeploy/core/fd_type.h"
 #include <iostream>
 #include <map>
 #include <string>
diff --git a/fastdeploy/core/config.h.in b/fastdeploy/core/config.h.in
index 21c398af0..c2541cc46 100755
--- a/fastdeploy/core/config.h.in
+++ b/fastdeploy/core/config.h.in
@@ -57,10 +57,6 @@
 #cmakedefine ENABLE_TEXT
 #endif
 
-#ifndef ENABLE_OPENCV_CUDA
-#cmakedefine ENABLE_OPENCV_CUDA
-#endif
-
 #ifdef ENABLE_VISION
 #ifndef ENABLE_VISION_VISUALIZE
 #define ENABLE_VISION_VISUALIZE
diff --git a/fastdeploy/core/fd_tensor.h b/fastdeploy/core/fd_tensor.h
index c6e1ed5cb..5584f1b30 100644
--- a/fastdeploy/core/fd_tensor.h
+++ b/fastdeploy/core/fd_tensor.h
@@ -21,11 +21,11 @@
 #include "fastdeploy/core/allocate.h"
 #include "fastdeploy/core/fd_scalar.h"
 #include "fastdeploy/core/fd_type.h"
+#include "fastdeploy/runtime/enum_variables.h"
 
 namespace fastdeploy {
 
 struct FASTDEPLOY_DECL FDTensor {
-
   // std::vector<int8_t> data;
   void* buffer_ = nullptr;
   std::vector<int64_t> shape = {0};
diff --git a/fastdeploy/core/fd_type.cc b/fastdeploy/core/fd_type.cc
old mode 100755
new mode 100644
index 420e03ff7..17bc2cdb6
--- a/fastdeploy/core/fd_type.cc
+++ b/fastdeploy/core/fd_type.cc
@@ -44,70 +44,6 @@ int FDDataTypeSize(const FDDataType& data_type) {
   return -1;
 }
 
-std::string Str(const Device& d) {
-  std::string out;
-  switch (d) {
-    case Device::CPU:
-      out = "Device::CPU";
-      break;
-    case Device::GPU:
-      out = "Device::GPU";
-      break;
-    case Device::RKNPU:
-      out = "Device::RKNPU";
-      break;
-    case Device::SOPHGOTPUD:
-      out = "Device::SOPHGOTPUD";
-      break;
-    case Device::IPU:
-      out = "Device::IPU";
-      break;
-    case Device::TIMVX:
-      out = "Device::TIMVX";
-      break;
-    case Device::ASCEND:
-      out = "Device::ASCEND";
-      break;
-    case Device::KUNLUNXIN:
-      out = "Device::KUNLUNXIN";
-      break;
-    default:
-      out = "Device::UNKOWN";
-  }
-  return out;
-}
-
-std::ostream& operator<<(std::ostream& out,const Device& d){
-  switch (d) {
-  case Device::CPU:
-    out << "Device::CPU";
-    break;
-  case Device::GPU:
-    out << "Device::GPU";
-    break;
-  case Device::RKNPU:
-    out << "Device::RKNPU";
-    break;
-  case Device::SOPHGOTPUD:
-    out << "Device::SOPHGOTPUD";
-    break;
-  case Device::TIMVX:
-    out << "Device::TIMVX";
-    break;
-  case Device::KUNLUNXIN:
-    out << "Device::KUNLUNXIN";
-    break;
-  case Device::ASCEND:
-    out << "Device::ASCEND";
-    break;
-  default:
-    out << "Device::UNKOWN";
-  }
-  return out;
-}
-
-
-
 std::string Str(const FDDataType& fdt) {
   std::string out;
   switch (fdt) {
@@ -144,37 +80,37 @@ std::string Str(const FDDataType& fdt) {
   return out;
 }
 
-std::ostream& operator<<(std::ostream& out,const FDDataType& fdt){
+std::ostream& operator<<(std::ostream& out, const FDDataType& fdt) {
   switch (fdt) {
-  case FDDataType::BOOL:
-    out << "FDDataType::BOOL";
-    break;
-  case FDDataType::INT16:
-    out << "FDDataType::INT16";
-    break;
-  case FDDataType::INT32:
-    out << "FDDataType::INT32";
-    break;
-  case FDDataType::INT64:
-    out << "FDDataType::INT64";
-    break;
-  case FDDataType::FP32:
-    out << "FDDataType::FP32";
-    break;
-  case FDDataType::FP64:
-    out << "FDDataType::FP64";
-    break;
-  case FDDataType::FP16:
-    out << "FDDataType::FP16";
-    break;
-  case FDDataType::UINT8:
-    out << "FDDataType::UINT8";
-    break;
-  case FDDataType::INT8:
-    out << "FDDataType::INT8";
-    break;
-  default:
-    out << "FDDataType::UNKNOWN";
+    case FDDataType::BOOL:
+      out << "FDDataType::BOOL";
+      break;
+    case FDDataType::INT16:
+      out << "FDDataType::INT16";
+      break;
+    case FDDataType::INT32:
+      out << "FDDataType::INT32";
+      break;
+    case FDDataType::INT64:
+      out << "FDDataType::INT64";
+      break;
+    case FDDataType::FP32:
+      out << "FDDataType::FP32";
+      break;
+    case FDDataType::FP64:
+      out << "FDDataType::FP64";
+      break;
+    case FDDataType::FP16:
+      out << "FDDataType::FP16";
+      break;
+    case FDDataType::UINT8:
+      out << "FDDataType::UINT8";
+      break;
+    case FDDataType::INT8:
+      out << "FDDataType::INT8";
+      break;
+    default:
+      out << "FDDataType::UNKNOWN";
   }
   return out;
 }
@@ -206,35 +142,4 @@ const FDDataType TypeToDataType<uint8_t>::dtype = UINT8;
 template <>
 const FDDataType TypeToDataType<int8_t>::dtype = INT8;
 
-std::string Str(const ModelFormat& f) {
-  if (f == ModelFormat::PADDLE) {
-    return "ModelFormat::PADDLE";
-  } else if (f == ModelFormat::ONNX) {
-    return "ModelFormat::ONNX";
-  } else if (f == ModelFormat::RKNN) {
-    return "ModelFormat::RKNN";
-  } else if (f == ModelFormat::SOPHGO) {
-    return "ModelFormat::SOPHGO";
-  } else if (f == ModelFormat::TORCHSCRIPT) {
-    return "ModelFormat::TORCHSCRIPT";
-  }
-  return "UNKNOWN-ModelFormat";
-}
-
-std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
-  if (format == ModelFormat::PADDLE) {
-    out << "ModelFormat::PADDLE";
-  } else if (format == ModelFormat::ONNX) {
-    out << "ModelFormat::ONNX";
-  } else if (format == ModelFormat::RKNN) {
-    out << "ModelFormat::RKNN";
-  } else if (format == ModelFormat::SOPHGO) {
-    out << "ModelFormat::SOPHGO";
-  } else if (format == ModelFormat::TORCHSCRIPT) {
-    out << "ModelFormat::TORCHSCRIPT";
-  }
-  out << "UNKNOWN-ModelFormat";
-  return out;
-}
-
 }  // namespace fastdeploy
diff --git a/fastdeploy/core/fd_type.h b/fastdeploy/core/fd_type.h
index 5b49f1e86..2782ecf0b 100755
--- a/fastdeploy/core/fd_type.h
+++ b/fastdeploy/core/fd_type.h
@@ -22,11 +22,6 @@
 
 namespace fastdeploy {
 
-enum FASTDEPLOY_DECL Device {CPU, GPU, RKNPU, IPU, TIMVX, KUNLUNXIN, ASCEND,
-                              SOPHGOTPUD};
-
-FASTDEPLOY_DECL std::string Str(const Device& d);
-
 enum FASTDEPLOY_DECL FDDataType {
   BOOL,
   INT16,
@@ -52,7 +47,6 @@ enum FASTDEPLOY_DECL FDDataType {
   INT8
 };
 
-FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, const Device& d);
 
 FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
                                          const FDDataType& fdt);
@@ -66,17 +60,4 @@ struct FASTDEPLOY_DECL TypeToDataType {
   static const FDDataType dtype;
 };
 
-/*! Deep learning model format */
-enum ModelFormat {
-  AUTOREC,      ///< Auto recognize the model format by model file name
-  PADDLE,       ///< Model with paddlepaddle format
-  ONNX,         ///< Model with ONNX format
-  RKNN,         ///< Model with RKNN format
-  TORCHSCRIPT,  ///< Model with TorchScript format
-  SOPHGO,       ///< Model with SOPHGO format
-};
-
-FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
-                                         const ModelFormat& format);
-
 }  // namespace fastdeploy
diff --git a/fastdeploy/fastdeploy_model.h b/fastdeploy/fastdeploy_model.h
index 9b78c3d3f..698827cc2 100755
--- a/fastdeploy/fastdeploy_model.h
+++ b/fastdeploy/fastdeploy_model.h
@@ -121,9 +121,7 @@ class FASTDEPLOY_DECL FastDeployModel {
     std::vector<FDTensor>().swap(reused_output_tensors_);
   }
 
-  virtual fastdeploy::Runtime* CloneRuntime() {
-    return runtime_->Clone();
-  }
+  virtual fastdeploy::Runtime* CloneRuntime() { return runtime_->Clone(); }
 
   virtual bool SetRuntime(fastdeploy::Runtime* clone_runtime) {
     runtime_ = std::unique_ptr<Runtime>(clone_runtime);
diff --git a/fastdeploy/pybind/main.h b/fastdeploy/pybind/main.h
index de817bb73..b80aeaca4 100755
--- a/fastdeploy/pybind/main.h
+++ b/fastdeploy/pybind/main.h
@@ -21,7 +21,7 @@
 
 #include <type_traits>
 
-#include "fastdeploy/runtime.h"
+#include "fastdeploy/runtime/runtime.h"
 
 #ifdef ENABLE_VISION
 #include "fastdeploy/vision.h"
diff --git a/fastdeploy/pybind/rknpu2_config_pybind.cc b/fastdeploy/pybind/rknpu2_config_pybind.cc
index 4880b2db6..c7ce47553 100644
--- a/fastdeploy/pybind/rknpu2_config_pybind.cc
+++ b/fastdeploy/pybind/rknpu2_config_pybind.cc
@@ -11,23 +11,27 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h"
+#include "fastdeploy/backends/rknpu2/option.h"
 #include "fastdeploy/pybind/main.h"
 namespace fastdeploy {
 void BindRKNPU2Config(pybind11::module& m) {
-  pybind11::enum_<fastdeploy::rknpu2::CpuName>(m, "CpuName", pybind11::arithmetic(),
-                           "CpuName for inference.")
+  pybind11::enum_<fastdeploy::rknpu2::CpuName>(
+      m, "CpuName", pybind11::arithmetic(), "CpuName for inference.")
       .value("RK356X", fastdeploy::rknpu2::CpuName::RK356X)
       .value("RK3588", fastdeploy::rknpu2::CpuName::RK3588)
       .value("UNDEFINED", fastdeploy::rknpu2::CpuName::UNDEFINED);
-  pybind11::enum_<fastdeploy::rknpu2::CoreMask>(m, "CoreMask", pybind11::arithmetic(),
-                            "CoreMask for inference.")
-      .value("RKNN_NPU_CORE_AUTO", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO)
+  pybind11::enum_<fastdeploy::rknpu2::CoreMask>(
+      m, "CoreMask", pybind11::arithmetic(), "CoreMask for inference.")
+      .value("RKNN_NPU_CORE_AUTO",
+             fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO)
       .value("RKNN_NPU_CORE_0", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0)
       .value("RKNN_NPU_CORE_1", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_1)
       .value("RKNN_NPU_CORE_2", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_2)
-      .value("RKNN_NPU_CORE_0_1", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0_1)
-      .value("RKNN_NPU_CORE_0_1_2", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0_1_2)
-      .value("RKNN_NPU_CORE_UNDEFINED", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_UNDEFINED);
+      .value("RKNN_NPU_CORE_0_1",
+             fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0_1)
+      .value("RKNN_NPU_CORE_0_1_2",
+             fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0_1_2)
+      .value("RKNN_NPU_CORE_UNDEFINED",
+             fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_UNDEFINED);
 }
-} // namespace fastdeploy
\ No newline at end of file
+}  // namespace fastdeploy
\ No newline at end of file
diff --git a/fastdeploy/runtime.cc b/fastdeploy/runtime.cc
deleted file mode 100755
index 6c1949ed3..000000000
--- a/fastdeploy/runtime.cc
+++ /dev/null
@@ -1,1012 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/runtime.h"
-
-#include "fastdeploy/utils/unique_ptr.h"
-#include "fastdeploy/utils/utils.h"
-
-#ifdef ENABLE_ORT_BACKEND
-#include "fastdeploy/backends/ort/ort_backend.h"
-#endif
-
-#ifdef ENABLE_TRT_BACKEND
-#include "fastdeploy/backends/tensorrt/trt_backend.h"
-#endif
-
-#ifdef ENABLE_PADDLE_BACKEND
-#include "fastdeploy/backends/paddle/paddle_backend.h"
-#endif
-
-#ifdef ENABLE_POROS_BACKEND
-#include "fastdeploy/backends/poros/poros_backend.h"
-#endif
-
-#ifdef ENABLE_OPENVINO_BACKEND
-#include "fastdeploy/backends/openvino/ov_backend.h"
-#endif
-
-#ifdef ENABLE_LITE_BACKEND
-#include "fastdeploy/backends/lite/lite_backend.h"
-#endif
-
-#ifdef ENABLE_RKNPU2_BACKEND
-#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h"
-#endif
-
-#ifdef ENABLE_SOPHGO_BACKEND
-#include "fastdeploy/backends/sophgo/sophgo_backend.h"
-#endif
-
-namespace fastdeploy {
-
-std::vector<Backend> GetAvailableBackends() {
-  std::vector<Backend> backends;
-#ifdef ENABLE_ORT_BACKEND
-  backends.push_back(Backend::ORT);
-#endif
-#ifdef ENABLE_TRT_BACKEND
-  backends.push_back(Backend::TRT);
-#endif
-#ifdef ENABLE_PADDLE_BACKEND
-  backends.push_back(Backend::PDINFER);
-#endif
-#ifdef ENABLE_POROS_BACKEND
-  backends.push_back(Backend::POROS);
-#endif
-#ifdef ENABLE_OPENVINO_BACKEND
-  backends.push_back(Backend::OPENVINO);
-#endif
-#ifdef ENABLE_LITE_BACKEND
-  backends.push_back(Backend::LITE);
-#endif
-#ifdef ENABLE_RKNPU2_BACKEND
-  backends.push_back(Backend::RKNPU2);
-#endif
-#ifdef ENABLE_SOPHGO_BACKEND
-  backends.push_back(Backend::SOPHGOTPU);
-#endif
-  return backends;
-}
-
-bool IsBackendAvailable(const Backend& backend) {
-  std::vector<Backend> backends = GetAvailableBackends();
-  for (size_t i = 0; i < backends.size(); ++i) {
-    if (backend == backends[i]) {
-      return true;
-    }
-  }
-  return false;
-}
-
-std::string Str(const Backend& b) {
-  if (b == Backend::ORT) {
-    return "Backend::ORT";
-  } else if (b == Backend::TRT) {
-    return "Backend::TRT";
-  } else if (b == Backend::PDINFER) {
-    return "Backend::PDINFER";
-  } else if (b == Backend::POROS) {
-    return "Backend::POROS";
-  } else if (b == Backend::RKNPU2) {
-    return "Backend::RKNPU2";
-  } else if (b == Backend::SOPHGOTPU) {
-    return "Backend::SOPHGOTPU";
-  } else if (b == Backend::OPENVINO) {
-    return "Backend::OPENVINO";
-  } else if (b == Backend::LITE) {
-    return "Backend::PDLITE";
-  }
-  return "UNKNOWN-Backend";
-}
-
-std::ostream& operator<<(std::ostream& out, const Backend& backend) {
-  if (backend == Backend::ORT) {
-    out << "Backend::ORT";
-  } else if (backend == Backend::TRT) {
-    out << "Backend::TRT";
-  } else if (backend == Backend::PDINFER) {
-    out << "Backend::PDINFER";
-  } else if (backend == Backend::OPENVINO) {
-    out << "Backend::OPENVINO";
-  } else if (backend == Backend::RKNPU2) {
-    out << "Backend::RKNPU2";
-  } else if (backend == Backend::SOPHGOTPU) {
-    out << "Backend::SOPHGOTPU";
-  } else if (backend == Backend::POROS) {
-    out << "Backend::POROS";
-  } else if (backend == Backend::LITE) {
-    out << "Backend::PDLITE";
-  } else {
-    out << "UNKNOWN-Backend";
-  }
-  return out;
-}
-
-bool CheckModelFormat(const std::string& model_file,
-                      const ModelFormat& model_format) {
-  if (model_format == ModelFormat::PADDLE) {
-    if (model_file.size() < 8 ||
-        model_file.substr(model_file.size() - 8, 8) != ".pdmodel") {
-      FDERROR << "With model format of ModelFormat::PADDLE, the model file "
-                 "should ends with `.pdmodel`, but now it's "
-              << model_file << std::endl;
-      return false;
-    }
-  } else if (model_format == ModelFormat::ONNX) {
-    if (model_file.size() < 5 ||
-        model_file.substr(model_file.size() - 5, 5) != ".onnx") {
-      FDERROR << "With model format of ModelFormat::ONNX, the model file "
-                 "should ends with `.onnx`, but now it's "
-              << model_file << std::endl;
-      return false;
-    }
-  } else if (model_format == ModelFormat::RKNN) {
-    if (model_file.size() < 5 ||
-        model_file.substr(model_file.size() - 5, 5) != ".rknn") {
-      FDERROR << "With model format of ModelFormat::RKNN, the model file "
-                 "should ends with `.rknn`, but now it's "
-              << model_file << std::endl;
-      return false;
-    }
-  } else if (model_format == ModelFormat::TORCHSCRIPT) {
-    if (model_file.size() < 3 ||
-        model_file.substr(model_file.size() - 3, 3) != ".pt") {
-      FDERROR
-          << "With model format of ModelFormat::TORCHSCRIPT, the model file "
-             "should ends with `.pt`, but now it's "
-          << model_file << std::endl;
-      return false;
-    }
-  } else if (model_format == ModelFormat::SOPHGO) {
-    if (model_file.size() < 7 || 
-        model_file.substr(model_file.size() -7, 7) != ".bmodel") {
-      FDERROR
-          << "With model format of ModelFormat::SOPHGO, the model file "
-             "should ends with `.bmodel`, but now it's "
-          << model_file << std::endl;
-      return false;     
-    }
-  } else {
-    FDERROR
-        << "Only support model format with frontend ModelFormat::PADDLE / "
-           "ModelFormat::ONNX / ModelFormat::RKNN / ModelFormat::TORCHSCRIPT."
-        << std::endl;
-    return false;
-  }
-  return true;
-}
-
-ModelFormat GuessModelFormat(const std::string& model_file) {
-  if (model_file.size() > 8 &&
-      model_file.substr(model_file.size() - 8, 8) == ".pdmodel") {
-    FDINFO << "Model Format: PaddlePaddle." << std::endl;
-    return ModelFormat::PADDLE;
-  } else if (model_file.size() > 5 &&
-             model_file.substr(model_file.size() - 5, 5) == ".onnx") {
-    FDINFO << "Model Format: ONNX." << std::endl;
-    return ModelFormat::ONNX;
-  } else if (model_file.size() > 3 &&
-             model_file.substr(model_file.size() - 3, 3) == ".pt") {
-    FDINFO << "Model Format: Torchscript." << std::endl;
-    return ModelFormat::TORCHSCRIPT;
-  } else if (model_file.size() > 5 &&
-             model_file.substr(model_file.size() - 5, 5) == ".rknn") {
-    FDINFO << "Model Format: RKNN." << std::endl;
-    return ModelFormat::RKNN;
-  } else if (model_file.size() > 7 &&
-             model_file.substr(model_file.size() - 7, 7) == ".bmodel") {
-    FDINFO << "Model Format: SOPHGO." << std::endl;
-    return ModelFormat::SOPHGO;
-  }
-
-  FDERROR << "Cannot guess which model format you are using, please set "
-             "RuntimeOption::model_format manually."
-          << std::endl;
-  return ModelFormat::PADDLE;
-}
-
-void RuntimeOption::SetModelPath(const std::string& model_path,
-                                 const std::string& params_path,
-                                 const ModelFormat& format) {
-  if (format == ModelFormat::PADDLE) {
-    model_file = model_path;
-    params_file = params_path;
-    model_format = ModelFormat::PADDLE;
-  } else if (format == ModelFormat::ONNX) {
-    model_file = model_path;
-    model_format = ModelFormat::ONNX;
-  } else if (format == ModelFormat::TORCHSCRIPT) {
-    model_file = model_path;
-    model_format = ModelFormat::TORCHSCRIPT;
-  } else {
-    FDASSERT(false,
-             "The model format only can be "
-             "ModelFormat::PADDLE/ModelFormat::ONNX/ModelFormat::TORCHSCRIPT.");
-  }
-}
-
-void RuntimeOption::SetModelBuffer(const char * model_buffer,
-                                   size_t model_buffer_size,
-                                   const char * params_buffer,
-                                   size_t params_buffer_size,
-                                   const ModelFormat& format) {
-  model_buffer_size_ = model_buffer_size;
-  params_buffer_size_ = params_buffer_size;
-  model_from_memory_ = true;
-  if (format == ModelFormat::PADDLE) {
-    model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size);
-    params_buffer_ = std::string(params_buffer, params_buffer + params_buffer_size);
-    model_format = ModelFormat::PADDLE;
-  } else if (format == ModelFormat::ONNX) {
-    model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size);
-    model_format = ModelFormat::ONNX;
-  } else if (format == ModelFormat::TORCHSCRIPT) {
-    model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size);
-    model_format = ModelFormat::TORCHSCRIPT;
-  } else {
-    FDASSERT(false,
-             "The model format only can be "
-             "ModelFormat::PADDLE/ModelFormat::ONNX/ModelFormat::TORCHSCRIPT.");
-  }
-}
-
-void RuntimeOption::UseGpu(int gpu_id) {
-#ifdef WITH_GPU
-  device = Device::GPU;
-  device_id = gpu_id;
-#else
-  FDWARNING << "The FastDeploy didn't compile with GPU, will force to use CPU."
-            << std::endl;
-  device = Device::CPU;
-#endif
-}
-
-void RuntimeOption::UseCpu() { device = Device::CPU; }
-
-void RuntimeOption::UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name,
-                              fastdeploy::rknpu2::CoreMask rknpu2_core) {
-  rknpu2_cpu_name_ = rknpu2_name;
-  rknpu2_core_mask_ = rknpu2_core;
-  device = Device::RKNPU;
-}
-
-void RuntimeOption::UseTimVX() {
-  enable_timvx = true;
-  device = Device::TIMVX;
-}
-
-void RuntimeOption::UseKunlunXin(int kunlunxin_id, 
-                          int l3_workspace_size,
-                          bool locked,
-                          bool autotune,
-                          const std::string &autotune_file,
-                          const std::string &precision,
-                          bool adaptive_seqlen,
-                          bool enable_multi_stream) {
-  enable_kunlunxin = true;
-  device_id = kunlunxin_id;
-  kunlunxin_l3_workspace_size = l3_workspace_size;
-  kunlunxin_locked=locked;
-  kunlunxin_autotune=autotune;
-  kunlunxin_autotune_file=autotune_file;
-  kunlunxin_precision = precision;
-  kunlunxin_adaptive_seqlen=adaptive_seqlen;
-  kunlunxin_enable_multi_stream=enable_multi_stream;
-  device = Device::KUNLUNXIN;
-}
-
-void RuntimeOption::UseAscend(){
-  enable_ascend = true;
-  device = Device::ASCEND;
-}
-
-void RuntimeOption::UseSophgo() {
-  device = Device::SOPHGOTPUD;
-  UseSophgoBackend();
-}
-
-void RuntimeOption::SetExternalStream(void* external_stream) {
-  external_stream_ = external_stream;
-}
-
-void RuntimeOption::SetCpuThreadNum(int thread_num) {
-  FDASSERT(thread_num > 0, "The thread_num must be greater than 0.");
-  cpu_thread_num = thread_num;
-}
-
-void RuntimeOption::SetOrtGraphOptLevel(int level) {
-  std::vector<int> supported_level{-1, 0, 1, 2};
-  auto valid_level = std::find(supported_level.begin(), supported_level.end(),
-                               level) != supported_level.end();
-  FDASSERT(valid_level, "The level must be -1, 0, 1, 2.");
-  ort_graph_opt_level = level;
-}
-
-// use paddle inference backend
-void RuntimeOption::UsePaddleBackend() {
-#ifdef ENABLE_PADDLE_BACKEND
-  backend = Backend::PDINFER;
-#else
-  FDASSERT(false, "The FastDeploy didn't compile with Paddle Inference.");
-#endif
-}
-
-// use onnxruntime backend
-void RuntimeOption::UseOrtBackend() {
-#ifdef ENABLE_ORT_BACKEND
-  backend = Backend::ORT;
-#else
-  FDASSERT(false, "The FastDeploy didn't compile with OrtBackend.");
-#endif
-}
-
-// use sophgoruntime backend
-void RuntimeOption::UseSophgoBackend() {
-#ifdef ENABLE_SOPHGO_BACKEND
-  backend = Backend::SOPHGOTPU;
-#else
-  FDASSERT(false, "The FastDeploy didn't compile with SophgoBackend.");
-#endif
-}
-
-// use poros backend
-void RuntimeOption::UsePorosBackend() {
-#ifdef ENABLE_POROS_BACKEND
-  backend = Backend::POROS;
-#else
-  FDASSERT(false, "The FastDeploy didn't compile with PorosBackend.");
-#endif
-}
-
-void RuntimeOption::UseTrtBackend() {
-#ifdef ENABLE_TRT_BACKEND
-  backend = Backend::TRT;
-#else
-  FDASSERT(false, "The FastDeploy didn't compile with TrtBackend.");
-#endif
-}
-
-void RuntimeOption::UseOpenVINOBackend() {
-#ifdef ENABLE_OPENVINO_BACKEND
-  backend = Backend::OPENVINO;
-#else
-  FDASSERT(false, "The FastDeploy didn't compile with OpenVINO.");
-#endif
-}
-
-void RuntimeOption::UseLiteBackend() {
-#ifdef ENABLE_LITE_BACKEND
-  backend = Backend::LITE;
-#else
-  FDASSERT(false, "The FastDeploy didn't compile with Paddle Lite.");
-#endif
-}
-
-void RuntimeOption::SetPaddleMKLDNN(bool pd_mkldnn) {
-  pd_enable_mkldnn = pd_mkldnn;
-}
-
-void RuntimeOption::DeletePaddleBackendPass(const std::string& pass_name) {
-  pd_delete_pass_names.push_back(pass_name);
-}
-void RuntimeOption::EnablePaddleLogInfo() { pd_enable_log_info = true; }
-
-void RuntimeOption::DisablePaddleLogInfo() { pd_enable_log_info = false; }
-
-void RuntimeOption::EnablePaddleToTrt() {
-  FDASSERT(backend == Backend::TRT,
-           "Should call UseTrtBackend() before call EnablePaddleToTrt().");
-#ifdef ENABLE_PADDLE_BACKEND
-  FDINFO << "While using TrtBackend with EnablePaddleToTrt, FastDeploy will "
-            "change to use Paddle Inference Backend."
-         << std::endl;
-  backend = Backend::PDINFER;
-  pd_enable_trt = true;
-#else
-  FDASSERT(false, "While using TrtBackend with EnablePaddleToTrt, require the "
-                  "FastDeploy is compiled with Paddle Inference Backend, "
-                  "please rebuild your FastDeploy.");
-#endif
-}
-
-void RuntimeOption::SetPaddleMKLDNNCacheSize(int size) {
-  FDASSERT(size > 0, "Parameter size must greater than 0.");
-  pd_mkldnn_cache_size = size;
-}
-
-void RuntimeOption::SetOpenVINODevice(const std::string& name) {
-  openvino_device = name;
-}
-
-void RuntimeOption::EnableLiteFP16() { lite_enable_fp16 = true; }
-
-void RuntimeOption::DisableLiteFP16() { lite_enable_fp16 = false; }
-void RuntimeOption::EnableLiteInt8() { lite_enable_int8 = true; }
-
-void RuntimeOption::DisableLiteInt8() { lite_enable_int8 = false; }
-void RuntimeOption::SetLitePowerMode(LitePowerMode mode) {
-  lite_power_mode = mode;
-}
-
-void RuntimeOption::SetLiteOptimizedModelDir(
-    const std::string& optimized_model_dir) {
-  lite_optimized_model_dir = optimized_model_dir;
-}
-
-void RuntimeOption::SetLiteSubgraphPartitionPath(
-    const std::string& nnadapter_subgraph_partition_config_path) {
-  lite_nnadapter_subgraph_partition_config_path =
-      nnadapter_subgraph_partition_config_path;
-}
-
-void RuntimeOption::SetLiteSubgraphPartitionConfigBuffer(
-      const std::string& nnadapter_subgraph_partition_config_buffer){
-  lite_nnadapter_subgraph_partition_config_buffer = nnadapter_subgraph_partition_config_buffer;
-}
-
-void RuntimeOption::SetLiteDeviceNames(const std::vector<std::string>& nnadapter_device_names){
-  lite_nnadapter_device_names = nnadapter_device_names; 
-}
-
-void RuntimeOption::SetLiteContextProperties(const std::string& nnadapter_context_properties){
-  lite_nnadapter_context_properties = nnadapter_context_properties; 
-}
-
-void RuntimeOption::SetLiteModelCacheDir(const std::string& nnadapter_model_cache_dir){
-  lite_nnadapter_model_cache_dir = nnadapter_model_cache_dir;
-}
-
-
-void RuntimeOption::SetLiteDynamicShapeInfo(
-      const std::map<std::string, std::vector<std::vector<int64_t>>>&
-          nnadapter_dynamic_shape_info){
-  lite_nnadapter_dynamic_shape_info = nnadapter_dynamic_shape_info; 
-}
-
-void RuntimeOption::SetLiteMixedPrecisionQuantizationConfigPath(
-      const std::string& nnadapter_mixed_precision_quantization_config_path){
-        lite_nnadapter_mixed_precision_quantization_config_path = nnadapter_mixed_precision_quantization_config_path;
-}
-
-
-void RuntimeOption::SetTrtInputShape(const std::string& input_name,
-                                     const std::vector<int32_t>& min_shape,
-                                     const std::vector<int32_t>& opt_shape,
-                                     const std::vector<int32_t>& max_shape) {
-  trt_min_shape[input_name].clear();
-  trt_max_shape[input_name].clear();
-  trt_opt_shape[input_name].clear();
-  trt_min_shape[input_name].assign(min_shape.begin(), min_shape.end());
-  if (opt_shape.size() == 0) {
-    trt_opt_shape[input_name].assign(min_shape.begin(), min_shape.end());
-  } else {
-    trt_opt_shape[input_name].assign(opt_shape.begin(), opt_shape.end());
-  }
-  if (max_shape.size() == 0) {
-    trt_max_shape[input_name].assign(min_shape.begin(), min_shape.end());
-  } else {
-    trt_max_shape[input_name].assign(max_shape.begin(), max_shape.end());
-  }
-}
-
-void RuntimeOption::SetTrtMaxWorkspaceSize(size_t max_workspace_size) {
-  trt_max_workspace_size = max_workspace_size;
-}
-void RuntimeOption::SetTrtMaxBatchSize(size_t max_batch_size) {
-  trt_max_batch_size = max_batch_size;
-}
-
-void RuntimeOption::EnableTrtFP16() { trt_enable_fp16 = true; }
-
-void RuntimeOption::DisableTrtFP16() { trt_enable_fp16 = false; }
-
-void RuntimeOption::EnablePinnedMemory() { enable_pinned_memory = true; }
-
-void RuntimeOption::DisablePinnedMemory() { enable_pinned_memory = false; }
-
-void RuntimeOption::SetTrtCacheFile(const std::string& cache_file_path) {
-  trt_serialize_file = cache_file_path;
-}
-
-void RuntimeOption::SetOpenVINOStreams(int num_streams) {
-  ov_num_streams = num_streams;
-}
-
-bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
-                      const RuntimeOption& _option) {
-#ifdef ENABLE_POROS_BACKEND
-  option = _option;
-  auto poros_option = PorosBackendOption();
-  poros_option.use_gpu = (option.device == Device::GPU) ? true : false;
-  poros_option.gpu_id = option.device_id;
-  poros_option.long_to_int = option.long_to_int;
-  poros_option.use_nvidia_tf32 = option.use_nvidia_tf32;
-  poros_option.unconst_ops_thres = option.unconst_ops_thres;
-  poros_option.poros_file = option.poros_file;
-  poros_option.is_dynamic = option.is_dynamic;
-  poros_option.enable_fp16 = option.trt_enable_fp16;
-  poros_option.max_batch_size = option.trt_max_batch_size;
-  poros_option.max_workspace_size = option.trt_max_workspace_size;
-  FDASSERT(
-      option.model_format == ModelFormat::TORCHSCRIPT,
-      "PorosBackend only support model format of ModelFormat::TORCHSCRIPT.");
-  backend_ = utils::make_unique<PorosBackend>();
-  auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get());
-  FDASSERT(
-      casted_backend->Compile(option.model_file, prewarm_tensors, poros_option),
-      "Load model from Torchscript failed while initliazing PorosBackend.");
-#else
-  FDASSERT(false, "PorosBackend is not available, please compiled with "
-                  "ENABLE_POROS_BACKEND=ON.");
-#endif
-  return true;
-}
-
-void RuntimeOption::EnablePaddleTrtCollectShape() { pd_collect_shape = true; }
-
-void RuntimeOption::DisablePaddleTrtCollectShape() { pd_collect_shape = false; }
-
-void RuntimeOption::DisablePaddleTrtOPs(const std::vector<std::string>& ops) {
-  trt_disabled_ops_.insert(trt_disabled_ops_.end(), ops.begin(), ops.end());
-}
-
-void RuntimeOption::UseIpu(int device_num, int micro_batch_size,
-                           bool enable_pipelining, int batches_per_step) {
-#ifdef WITH_IPU
-  device = Device::IPU;
-  ipu_device_num = device_num;
-  ipu_micro_batch_size = micro_batch_size;
-  ipu_enable_pipelining = enable_pipelining;
-  ipu_batches_per_step = batches_per_step;
-#else
-  FDWARNING << "The FastDeploy didn't compile with IPU, will force to use CPU."
-            << std::endl;
-  device = Device::CPU;
-#endif
-}
-
-void RuntimeOption::SetIpuConfig(bool enable_fp16, int replica_num,
-                                 float available_memory_proportion,
-                                 bool enable_half_partial) {
-  ipu_enable_fp16 = enable_fp16;
-  ipu_replica_num = replica_num;
-  ipu_available_memory_proportion = available_memory_proportion;
-  ipu_enable_half_partial = enable_half_partial;
-}
-
-bool Runtime::Init(const RuntimeOption& _option) {
-  option = _option;
-  if (option.model_format == ModelFormat::AUTOREC) {
-    option.model_format = GuessModelFormat(_option.model_file);
-  }
-  if (option.backend == Backend::UNKNOWN) {
-    if (IsBackendAvailable(Backend::ORT)) {
-      option.backend = Backend::ORT;
-    } else if (IsBackendAvailable(Backend::PDINFER)) {
-      option.backend = Backend::PDINFER;
-    } else if (IsBackendAvailable(Backend::POROS)) {
-      option.backend = Backend::POROS;
-    } else if (IsBackendAvailable(Backend::OPENVINO)) {
-      option.backend = Backend::OPENVINO;
-    } else if (IsBackendAvailable(Backend::RKNPU2)) {
-      option.backend = Backend::RKNPU2;
-    } else if (IsBackendAvailable(Backend::SOPHGOTPU)) {
-      option.backend = Backend::SOPHGOTPU;
-    } else {
-      FDERROR << "Please define backend in RuntimeOption, current it's "
-                 "Backend::UNKNOWN."
-              << std::endl;
-      return false;
-    }
-  }
-
-  if (option.backend == Backend::ORT) {
-    FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
-             "Backend::ORT only supports Device::CPU/Device::GPU.");
-    CreateOrtBackend();
-    FDINFO << "Runtime initialized with Backend::ORT in " << Str(option.device)
-           << "." << std::endl;
-  } else if (option.backend == Backend::TRT) {
-    FDASSERT(option.device == Device::GPU,
-             "Backend::TRT only supports Device::GPU.");
-    CreateTrtBackend();
-    FDINFO << "Runtime initialized with Backend::TRT in " << Str(option.device)
-           << "." << std::endl;
-  } else if (option.backend == Backend::PDINFER) {
-    FDASSERT(
-        option.device == Device::CPU || option.device == Device::GPU ||
-            option.device == Device::IPU,
-        "Backend::PDINFER only supports Device::CPU/Device::GPU/Device::IPU.");
-    FDASSERT(
-        option.model_format == ModelFormat::PADDLE,
-        "Backend::PDINFER only supports model format of ModelFormat::PADDLE.");
-    CreatePaddleBackend();
-    FDINFO << "Runtime initialized with Backend::PDINFER in "
-           << Str(option.device) << "." << std::endl;
-  } else if (option.backend == Backend::POROS) {
-    FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
-             "Backend::POROS only supports Device::CPU/Device::GPU.");
-    FDASSERT(option.model_format == ModelFormat::TORCHSCRIPT,
-             "Backend::POROS only supports model format of "
-             "ModelFormat::TORCHSCRIPT.");
-    FDINFO << "Runtime initialized with Backend::POROS in "
-           << Str(option.device) << "." << std::endl;
-    return true;
-  } else if (option.backend == Backend::OPENVINO) {
-    FDASSERT(option.device == Device::CPU,
-             "Backend::OPENVINO only supports Device::CPU");
-    CreateOpenVINOBackend();
-    FDINFO << "Runtime initialized with Backend::OPENVINO in "
-           << Str(option.device) << "." << std::endl;
-  } else if (option.backend == Backend::LITE) {
-    FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX || option.device == Device::KUNLUNXIN || option.device == Device::ASCEND,
-             "Backend::LITE only supports Device::CPU/Device::TIMVX/Device::KUNLUNXIN.");
-    CreateLiteBackend();
-    FDINFO << "Runtime initialized with Backend::LITE in " << Str(option.device)
-           << "." << std::endl;
-  } else if (option.backend == Backend::RKNPU2) {
-    FDASSERT(option.device == Device::RKNPU,
-             "Backend::RKNPU2 only supports Device::RKNPU2");
-    CreateRKNPU2Backend();
-
-    FDINFO << "Runtime initialized with Backend::RKNPU2 in "
-           << Str(option.device) << "." << std::endl;
-  } else if (option.backend == Backend::SOPHGOTPU) {
-    FDASSERT(option.device == Device::SOPHGOTPUD,
-             "Backend::SOPHGO only supports Device::SOPHGO");
-    CreateSophgoNPUBackend();
-
-    FDINFO << "Runtime initialized with Backend::SOPHGO in "
-           << Str(option.device) << "." << std::endl;
-  }  
-  else {
-    FDERROR << "Runtime only support "
-               "Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as "
-               "backend now."
-            << std::endl;
-    return false;
-  }
-  return true;
-}
-
-TensorInfo Runtime::GetInputInfo(int index) {
-  return backend_->GetInputInfo(index);
-}
-
-TensorInfo Runtime::GetOutputInfo(int index) {
-  return backend_->GetOutputInfo(index);
-}
-
-std::vector<TensorInfo> Runtime::GetInputInfos() {
-  return backend_->GetInputInfos();
-}
-
-std::vector<TensorInfo> Runtime::GetOutputInfos() {
-  return backend_->GetOutputInfos();
-}
-
-bool Runtime::Infer(std::vector<FDTensor>& input_tensors,
-                    std::vector<FDTensor>* output_tensors) {
-  for (auto& tensor : input_tensors) {
-    FDASSERT(tensor.device_id < 0 || tensor.device_id == option.device_id,
-             "Device id of input tensor(%d) and runtime(%d) are not same.",
-             tensor.device_id, option.device_id);
-  }
-  return backend_->Infer(input_tensors, output_tensors);
-}
-
-bool Runtime::Infer() {
-  bool result = backend_->Infer(input_tensors_, &output_tensors_, false);
-  for (auto& tensor : output_tensors_) {
-    tensor.device_id = option.device_id;
-  }
-  return result;
-}
-
-void Runtime::BindInputTensor(const std::string& name, FDTensor& input) {
-  bool is_exist = false;
-  for (auto& t : input_tensors_) {
-    if (t.name == name) {
-      is_exist = true;
-      t.SetExternalData(input.shape, input.dtype, input.MutableData(),
-                        input.device, input.device_id);
-      break;
-    }
-  }
-  if (!is_exist) {
-    FDTensor new_tensor(name);
-    new_tensor.SetExternalData(input.shape, input.dtype, input.MutableData(),
-                               input.device, input.device_id);
-    input_tensors_.emplace_back(std::move(new_tensor));
-  }
-}
-
-FDTensor* Runtime::GetOutputTensor(const std::string& name) {
-  for (auto& t : output_tensors_) {
-    if (t.name == name) {
-      return &t;
-    }
-  }
-  FDWARNING << "The output name [" << name << "] don't exist." << std::endl;
-  return nullptr;
-}
-
-void Runtime::CreatePaddleBackend() {
-#ifdef ENABLE_PADDLE_BACKEND
-  auto pd_option = PaddleBackendOption();
-  pd_option.model_file = option.model_file;
-  pd_option.params_file = option.params_file;
-  pd_option.enable_mkldnn = option.pd_enable_mkldnn;
-  pd_option.enable_log_info = option.pd_enable_log_info;
-  pd_option.mkldnn_cache_size = option.pd_mkldnn_cache_size;
-  pd_option.use_gpu = (option.device == Device::GPU) ? true : false;
-  pd_option.use_ipu = (option.device == Device::IPU) ? true : false;
-  pd_option.gpu_id = option.device_id;
-  pd_option.delete_pass_names = option.pd_delete_pass_names;
-  pd_option.cpu_thread_num = option.cpu_thread_num;
-  pd_option.enable_pinned_memory = option.enable_pinned_memory;
-  pd_option.external_stream_ = option.external_stream_;
-  pd_option.model_from_memory_ = option.model_from_memory_;
-  if (pd_option.model_from_memory_) {
-    pd_option.model_buffer_ = option.model_buffer_;
-    pd_option.params_buffer_ = option.params_buffer_;
-    pd_option.model_buffer_size_ = option.model_buffer_size_;
-    pd_option.params_buffer_size_ = option.params_buffer_size_;
-  }
-#ifdef ENABLE_TRT_BACKEND
-  if (pd_option.use_gpu && option.pd_enable_trt) {
-    pd_option.enable_trt = true;
-    pd_option.collect_shape = option.pd_collect_shape;
-    auto trt_option = TrtBackendOption();
-    trt_option.gpu_id = option.device_id;
-    trt_option.enable_fp16 = option.trt_enable_fp16;
-    trt_option.max_batch_size = option.trt_max_batch_size;
-    trt_option.max_workspace_size = option.trt_max_workspace_size;
-    trt_option.max_shape = option.trt_max_shape;
-    trt_option.min_shape = option.trt_min_shape;
-    trt_option.opt_shape = option.trt_opt_shape;
-    trt_option.serialize_file = option.trt_serialize_file;
-    trt_option.enable_pinned_memory = option.enable_pinned_memory;
-    pd_option.trt_option = trt_option;
-    pd_option.trt_disabled_ops_ = option.trt_disabled_ops_;
-  }
-#endif
-#ifdef WITH_IPU
-  if (pd_option.use_ipu) {
-    auto ipu_option = IpuOption();
-    ipu_option.ipu_device_num = option.ipu_device_num;
-    ipu_option.ipu_micro_batch_size = option.ipu_micro_batch_size;
-    ipu_option.ipu_enable_pipelining = option.ipu_enable_pipelining;
-    ipu_option.ipu_batches_per_step = option.ipu_batches_per_step;
-    ipu_option.ipu_enable_fp16 = option.ipu_enable_fp16;
-    ipu_option.ipu_replica_num = option.ipu_replica_num;
-    ipu_option.ipu_available_memory_proportion =
-        option.ipu_available_memory_proportion;
-    ipu_option.ipu_enable_half_partial = option.ipu_enable_half_partial;
-    pd_option.ipu_option = ipu_option;
-  }
-#endif
-  FDASSERT(option.model_format == ModelFormat::PADDLE,
-           "PaddleBackend only support model format of ModelFormat::PADDLE.");
-  backend_ = utils::make_unique<PaddleBackend>();
-  auto casted_backend = dynamic_cast<PaddleBackend*>(backend_.get());
-  if (pd_option.model_from_memory_) {
-    FDASSERT(casted_backend->InitFromPaddle(option.model_buffer_, option.params_buffer_,
-                                          pd_option),
-           "Load model from Paddle failed while initliazing PaddleBackend.");
-  } else {
-    FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file,
-                                          pd_option),
-           "Load model from Paddle failed while initliazing PaddleBackend.");
-  }
-#else
-  FDASSERT(false, "PaddleBackend is not available, please compiled with "
-                  "ENABLE_PADDLE_BACKEND=ON.");
-#endif
-}
-
-void Runtime::CreateOpenVINOBackend() {
-#ifdef ENABLE_OPENVINO_BACKEND
-  auto ov_option = OpenVINOBackendOption();
-  ov_option.cpu_thread_num = option.cpu_thread_num;
-  ov_option.device = option.openvino_device;
-  ov_option.shape_infos = option.ov_shape_infos;
-  ov_option.num_streams = option.ov_num_streams;
-  for (const auto& op : option.ov_cpu_operators) {
-    ov_option.cpu_operators.insert(op);
-  }
-  FDASSERT(option.model_format == ModelFormat::PADDLE ||
-               option.model_format == ModelFormat::ONNX,
-           "OpenVINOBackend only support model format of ModelFormat::PADDLE / "
-           "ModelFormat::ONNX.");
-  backend_ = utils::make_unique<OpenVINOBackend>();
-  auto casted_backend = dynamic_cast<OpenVINOBackend*>(backend_.get());
-
-  if (option.model_format == ModelFormat::ONNX) {
-    FDASSERT(casted_backend->InitFromOnnx(option.model_file, ov_option),
-             "Load model from ONNX failed while initliazing OrtBackend.");
-  } else {
-    FDASSERT(casted_backend->InitFromPaddle(option.model_file,
-                                            option.params_file, ov_option),
-             "Load model from Paddle failed while initliazing OrtBackend.");
-  }
-#else
-  FDASSERT(false, "OpenVINOBackend is not available, please compiled with "
-                  "ENABLE_OPENVINO_BACKEND=ON.");
-#endif
-}
-
-void Runtime::CreateOrtBackend() {
-#ifdef ENABLE_ORT_BACKEND
-  auto ort_option = OrtBackendOption();
-  ort_option.graph_optimization_level = option.ort_graph_opt_level;
-  ort_option.intra_op_num_threads = option.cpu_thread_num;
-  ort_option.inter_op_num_threads = option.ort_inter_op_num_threads;
-  ort_option.execution_mode = option.ort_execution_mode;
-  ort_option.use_gpu = (option.device == Device::GPU) ? true : false;
-  ort_option.gpu_id = option.device_id;
-  ort_option.external_stream_ = option.external_stream_;
-
-  FDASSERT(option.model_format == ModelFormat::PADDLE ||
-               option.model_format == ModelFormat::ONNX,
-           "OrtBackend only support model format of ModelFormat::PADDLE / "
-           "ModelFormat::ONNX.");
-  backend_ = utils::make_unique<OrtBackend>();
-  auto casted_backend = dynamic_cast<OrtBackend*>(backend_.get());
-  if (option.model_format == ModelFormat::ONNX) {
-    FDASSERT(casted_backend->InitFromOnnx(option.model_file, ort_option),
-             "Load model from ONNX failed while initliazing OrtBackend.");
-  } else {
-    FDASSERT(casted_backend->InitFromPaddle(option.model_file,
-                                            option.params_file, ort_option),
-             "Load model from Paddle failed while initliazing OrtBackend.");
-  }
-#else
-  FDASSERT(false, "OrtBackend is not available, please compiled with "
-                  "ENABLE_ORT_BACKEND=ON.");
-#endif
-}
-
-void Runtime::CreateTrtBackend() {
-#ifdef ENABLE_TRT_BACKEND
-  auto trt_option = TrtBackendOption();
-  trt_option.model_file = option.model_file;
-  trt_option.params_file = option.params_file;
-  trt_option.model_format = option.model_format;
-  trt_option.gpu_id = option.device_id;
-  trt_option.enable_fp16 = option.trt_enable_fp16;
-  trt_option.enable_int8 = option.trt_enable_int8;
-  trt_option.max_batch_size = option.trt_max_batch_size;
-  trt_option.max_workspace_size = option.trt_max_workspace_size;
-  trt_option.max_shape = option.trt_max_shape;
-  trt_option.min_shape = option.trt_min_shape;
-  trt_option.opt_shape = option.trt_opt_shape;
-  trt_option.serialize_file = option.trt_serialize_file;
-  trt_option.enable_pinned_memory = option.enable_pinned_memory;
-  trt_option.external_stream_ = option.external_stream_;
-
-  FDASSERT(option.model_format == ModelFormat::PADDLE ||
-               option.model_format == ModelFormat::ONNX,
-           "TrtBackend only support model format of ModelFormat::PADDLE / "
-           "ModelFormat::ONNX.");
-  backend_ = utils::make_unique<TrtBackend>();
-  auto casted_backend = dynamic_cast<TrtBackend*>(backend_.get());
-  if (option.model_format == ModelFormat::ONNX) {
-    FDASSERT(casted_backend->InitFromOnnx(option.model_file, trt_option),
-             "Load model from ONNX failed while initliazing TrtBackend.");
-  } else {
-    FDASSERT(casted_backend->InitFromPaddle(option.model_file,
-                                            option.params_file, trt_option),
-             "Load model from Paddle failed while initliazing TrtBackend.");
-  }
-#else
-  FDASSERT(false, "TrtBackend is not available, please compiled with "
-                  "ENABLE_TRT_BACKEND=ON.");
-#endif
-}
-
-void Runtime::CreateLiteBackend() {
-#ifdef ENABLE_LITE_BACKEND
-  auto lite_option = LiteBackendOption();
-  lite_option.threads = option.cpu_thread_num;
-  lite_option.enable_int8 = option.lite_enable_int8;
-  lite_option.enable_fp16 = option.lite_enable_fp16;
-  lite_option.power_mode = static_cast<int>(option.lite_power_mode);
-  lite_option.optimized_model_dir = option.lite_optimized_model_dir;
-  lite_option.nnadapter_subgraph_partition_config_path = option.lite_nnadapter_subgraph_partition_config_path;
-  lite_option.nnadapter_subgraph_partition_config_buffer = option.lite_nnadapter_subgraph_partition_config_buffer;
-  lite_option.nnadapter_device_names = option.lite_nnadapter_device_names;
-  lite_option.nnadapter_context_properties = option.lite_nnadapter_context_properties;
-  lite_option.nnadapter_model_cache_dir = option.lite_nnadapter_model_cache_dir;
-  lite_option.nnadapter_dynamic_shape_info = option.lite_nnadapter_dynamic_shape_info;
-  lite_option.nnadapter_mixed_precision_quantization_config_path = option.lite_nnadapter_mixed_precision_quantization_config_path;
-  lite_option.enable_timvx = option.enable_timvx;
-  lite_option.enable_ascend = option.enable_ascend;
-  lite_option.enable_kunlunxin = option.enable_kunlunxin;
-  lite_option.device_id  = option.device_id;
-  lite_option.kunlunxin_l3_workspace_size  = option.kunlunxin_l3_workspace_size;
-  lite_option.kunlunxin_locked = option.kunlunxin_locked;
-  lite_option.kunlunxin_autotune = option.kunlunxin_autotune;
-  lite_option.kunlunxin_autotune_file = option.kunlunxin_autotune_file;
-  lite_option.kunlunxin_precision  = option.kunlunxin_precision;
-  lite_option.kunlunxin_adaptive_seqlen = option.kunlunxin_adaptive_seqlen;
-  lite_option.kunlunxin_enable_multi_stream = option.kunlunxin_enable_multi_stream;
-
-  FDASSERT(option.model_format == ModelFormat::PADDLE,
-           "LiteBackend only support model format of ModelFormat::PADDLE");
-  backend_ = utils::make_unique<LiteBackend>();
-  auto casted_backend = dynamic_cast<LiteBackend*>(backend_.get());
-  FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file,
-                                          lite_option),
-           "Load model from nb file failed while initializing LiteBackend.");
-#else
-  FDASSERT(false, "LiteBackend is not available, please compiled with "
-                  "ENABLE_LITE_BACKEND=ON.");
-#endif
-}
-
-void Runtime::CreateRKNPU2Backend() {
-#ifdef ENABLE_RKNPU2_BACKEND
-  auto rknpu2_option = RKNPU2BackendOption();
-  rknpu2_option.cpu_name = option.rknpu2_cpu_name_;
-  rknpu2_option.core_mask = option.rknpu2_core_mask_;
-  FDASSERT(option.model_format == ModelFormat::RKNN,
-           "RKNPU2Backend only support model format of ModelFormat::RKNN");
-  backend_ = utils::make_unique<RKNPU2Backend>();
-  auto casted_backend = dynamic_cast<RKNPU2Backend*>(backend_.get());
-  FDASSERT(casted_backend->InitFromRKNN(option.model_file, rknpu2_option),
-           "Load model from nb file failed while initializing LiteBackend.");
-#else
-  FDASSERT(false, "RKNPU2Backend is not available, please compiled with "
-                  "ENABLE_RKNPU2_BACKEND=ON.");
-#endif
-}
-
-void Runtime::CreateSophgoNPUBackend() {
-#ifdef ENABLE_SOPHGO_BACKEND
-  auto sophgo_option = SophgoBackendOption();
-  FDASSERT(option.model_format == ModelFormat::SOPHGO,
-           "SophgoBackend only support model format of ModelFormat::SOPHGO");
-  backend_ = utils::make_unique<SophgoBackend>();
-  auto casted_backend = dynamic_cast<SophgoBackend*>(backend_.get());
-  FDASSERT(casted_backend->InitFromSophgo(option.model_file, sophgo_option),
-           "Load model from nb file failed while initializing LiteBackend.");
-#else
-  FDASSERT(false, "SophgoBackend is not available, please compiled with "
-                  "ENABLE_SOPHGO_BACKEND=ON.");
-#endif
-}
-
-Runtime* Runtime::Clone(void* stream, int device_id) {
-  Runtime* runtime = new Runtime();
-  if (option.backend != Backend::OPENVINO &&
-      option.backend != Backend::PDINFER && option.backend != Backend::TRT) {
-    runtime->Init(option);
-    FDWARNING << "Only OpenVINO/Paddle Inference/TensorRT support \
-                  clone engine to  reduce CPU/GPU memory usage now. For "
-              << option.backend
-              << ", FastDeploy will create a new engine which \
-                  will not share memory  with the current runtime."
-              << std::endl;
-    return runtime;
-  }
-  FDINFO << "Runtime Clone with Backend:: " << Str(option.backend) << " in "
-         << Str(option.device) << "." << std::endl;
-  runtime->option = option;
-  runtime->backend_ = backend_->Clone(stream, device_id);
-  return runtime;
-}
-
-}  // namespace fastdeploy
diff --git a/fastdeploy/runtime.h b/fastdeploy/runtime.h
index 46532b16b..f6c75fe8d 100755
--- a/fastdeploy/runtime.h
+++ b/fastdeploy/runtime.h
@@ -19,573 +19,5 @@
  */
 
 #pragma once
-
-#include <algorithm>
-#include <map>
-#include <vector>
-
-#include "backends/rknpu/rknpu2/rknpu2_config.h"
-#include "fastdeploy/backends/backend.h"
-#include "fastdeploy/utils/perf.h"
-
-/** \brief All C++ FastDeploy APIs are defined inside this namespace
-*
-*/
-namespace fastdeploy {
-
-/*! Inference backend supported in FastDeploy */
-enum Backend {
-  UNKNOWN,  ///< Unknown inference backend
-  ORT,  ///< ONNX Runtime, support Paddle/ONNX format model, CPU / Nvidia GPU
-  TRT,  ///< TensorRT, support Paddle/ONNX format model, Nvidia GPU only
-  PDINFER,  ///< Paddle Inference, support Paddle format model, CPU / Nvidia GPU
-  POROS,    ///< Poros, support TorchScript format model, CPU / Nvidia GPU
-  OPENVINO,  ///< Intel OpenVINO, support Paddle/ONNX format, CPU only
-  LITE,      ///< Paddle Lite, support Paddle format model, ARM CPU only
-  RKNPU2,    ///< RKNPU2, support RKNN format model, Rockchip NPU only
-  SOPHGOTPU,   ///< SOPHGOTPU, support SOPHGO format model, Sophgo TPU only
-};
-
-FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
-                                         const Backend& backend);
-
-/*! Paddle Lite power mode for mobile device. */
-enum LitePowerMode {
-  LITE_POWER_HIGH = 0,       ///< Use Lite Backend with high power mode
-  LITE_POWER_LOW = 1,        ///< Use Lite Backend with low power mode
-  LITE_POWER_FULL = 2,       ///< Use Lite Backend with full power mode
-  LITE_POWER_NO_BIND = 3,    ///< Use Lite Backend with no bind power mode
-  LITE_POWER_RAND_HIGH = 4,  ///< Use Lite Backend with rand high mode
-  LITE_POWER_RAND_LOW = 5    ///< Use Lite Backend with rand low power mode
-};
-
-FASTDEPLOY_DECL std::string Str(const Backend& b);
-FASTDEPLOY_DECL std::string Str(const ModelFormat& f);
-
-/**
- * @brief Get all the available inference backend in FastDeploy
- */
-FASTDEPLOY_DECL std::vector<Backend> GetAvailableBackends();
-
-/**
- * @brief Check if the inference backend available
- */
-FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend);
-
-bool CheckModelFormat(const std::string& model_file,
-                      const ModelFormat& model_format);
-ModelFormat GuessModelFormat(const std::string& model_file);
-
-/*! @brief Option object used when create a new Runtime object
- */
-struct FASTDEPLOY_DECL RuntimeOption {
-  /** \brief Set path of model file and parameter file
-   *
-   * \param[in] model_path Path of model file, e.g ResNet50/model.pdmodel for Paddle format model / ResNet50/model.onnx for ONNX format model
-   * \param[in] params_path Path of parameter file, this only used when the model format is Paddle, e.g Resnet50/model.pdiparams
-   * \param[in] format Format of the loaded model
-   */
-  void SetModelPath(const std::string& model_path,
-                    const std::string& params_path = "",
-                    const ModelFormat& format = ModelFormat::PADDLE);
-
-  /** \brief Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory
-   *
-   * \param[in] model_buffer The memory buffer of model
-   * \param[in] model_buffer_size The size of the model data
-   * \param[in] params_buffer The memory buffer of the combined parameters file
-   * \param[in] params_buffer_size The size of the combined parameters data
-   * \param[in] format Format of the loaded model
-   */
-  void SetModelBuffer(const char * model_buffer,
-                      size_t model_buffer_size,
-                      const char * params_buffer,
-                      size_t params_buffer_size,
-                      const ModelFormat& format = ModelFormat::PADDLE);
-
-  /// Use cpu to inference, the runtime will inference on CPU by default
-  void UseCpu();
-
-  /// Use Nvidia GPU to inference
-  void UseGpu(int gpu_id = 0);
-
-  void UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name =
-                     fastdeploy::rknpu2::CpuName::RK3588,
-                 fastdeploy::rknpu2::CoreMask rknpu2_core =
-                     fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0);
-
-  /// Use TimVX to inference
-  void UseTimVX();
-
-  /// Use Huawei Ascend to inference
-  void UseAscend();
-
-  ///
-  /// \brief Turn on KunlunXin XPU.
-  ///
-  /// \param kunlunxin_id the KunlunXin XPU card to use (default is 0).
-  /// \param l3_workspace_size The size of the video memory allocated by the l3
-  ///         cache, the maximum is 16M.
-  /// \param locked Whether the allocated L3 cache can be locked. If false,
-  ///       it means that the L3 cache is not locked, and the allocated L3
-  ///       cache can be shared by multiple models, and multiple models
-  ///       sharing the L3 cache will be executed sequentially on the card.
-  /// \param autotune Whether to autotune the conv operator in the model. If
-  ///       true, when the conv operator of a certain dimension is executed
-  ///       for the first time, it will automatically search for a better
-  ///       algorithm to improve the performance of subsequent conv operators
-  ///       of the same dimension.
-  /// \param autotune_file Specify the path of the autotune file. If
-  ///       autotune_file is specified, the algorithm specified in the
-  ///       file will be used and autotune will not be performed again.
-  /// \param precision Calculation accuracy of multi_encoder
-  /// \param adaptive_seqlen Is the input of multi_encoder variable length
-  /// \param enable_multi_stream Whether to enable the multi stream of
-  ///        KunlunXin XPU.
-  ///
-  void UseKunlunXin(int kunlunxin_id = 0,
-              int l3_workspace_size = 0xfffc00,
-              bool locked = false,
-              bool autotune = true,
-              const std::string& autotune_file = "",
-              const std::string& precision = "int16",
-              bool adaptive_seqlen = false,
-              bool enable_multi_stream = false);
-
-  /// Use Sophgo to inference
-  void UseSophgo();
-
-  void SetExternalStream(void* external_stream);
-
-  /*
-   * @brief Set number of cpu threads while inference on CPU, by default it will decided by the different backends
-   */
-  void SetCpuThreadNum(int thread_num);
-
-  /// Set ORT graph opt level, default is decide by ONNX Runtime itself
-  void SetOrtGraphOptLevel(int level = -1);
-
-  /// Set Paddle Inference as inference backend, support CPU/GPU
-  void UsePaddleBackend();
-
-  /// Wrapper function of UsePaddleBackend()
-  void UsePaddleInferBackend() { return UsePaddleBackend(); }
-
-  /// Set ONNX Runtime as inference backend, support CPU/GPU
-  void UseOrtBackend();
-
-  /// Set SOPHGO Runtime as inference backend, support CPU/GPU
-  void UseSophgoBackend();
-
-  /// Set TensorRT as inference backend, only support GPU
-  void UseTrtBackend();
-
-  /// Set Poros backend as inference backend, support CPU/GPU
-  void UsePorosBackend();
-
-  /// Set OpenVINO as inference backend, only support CPU
-  void UseOpenVINOBackend();
-
-  /// Set Paddle Lite as inference backend, only support arm cpu
-  void UseLiteBackend();
-
-  /// Wrapper function of UseLiteBackend()
-  void UsePaddleLiteBackend() { return UseLiteBackend(); }
-
-  /// Set mkldnn switch while using Paddle Inference as inference backend
-  void SetPaddleMKLDNN(bool pd_mkldnn = true);
-
-  /*
-   * @brief If TensorRT backend is used, EnablePaddleToTrt will change to use Paddle Inference backend, and use its integrated TensorRT instead.
-   */
-  void EnablePaddleToTrt();
-
-  /**
-   * @brief Delete pass by name while using Paddle Inference as inference backend, this can be called multiple times to delete a set of passes
-   */
-  void DeletePaddleBackendPass(const std::string& delete_pass_name);
-
-  /**
-   * @brief Enable print debug information while using Paddle Inference as inference backend, the backend disable the debug information by default
-   */
-  void EnablePaddleLogInfo();
-
-  /**
-   * @brief Disable print debug information while using Paddle Inference as inference backend
-   */
-  void DisablePaddleLogInfo();
-
-  /**
-   * @brief Set shape cache size while using Paddle Inference with mkldnn, by default it will cache all the difference shape
-   */
-  void SetPaddleMKLDNNCacheSize(int size);
-
-  /**
-   * @brief Set device name for OpenVINO, default 'CPU', can also be 'AUTO', 'GPU', 'GPU.1'....
-   */
-  void SetOpenVINODevice(const std::string& name = "CPU");
-
-  /**
-   * @brief Set shape info for OpenVINO
-   */
-  void SetOpenVINOShapeInfo(
-      const std::map<std::string, std::vector<int64_t>>& shape_info) {
-    ov_shape_infos = shape_info;
-  }
-
-  /**
-   * @brief While use OpenVINO backend with intel GPU, use this interface to specify operators run on CPU
-   */
-  void SetOpenVINOCpuOperators(const std::vector<std::string>& operators) {
-    ov_cpu_operators = operators;
-  }
-
-  /**
-   * @brief Set optimzed model dir for Paddle Lite backend.
-   */
-  void SetLiteOptimizedModelDir(const std::string& optimized_model_dir);
-
-  /**
-   * @brief Set subgraph partition path for Paddle Lite backend.
-   */
-  void SetLiteSubgraphPartitionPath(
-      const std::string& nnadapter_subgraph_partition_config_path);
-
-  /**
-   * @brief Set subgraph partition path for Paddle Lite backend.
-   */
-  void SetLiteSubgraphPartitionConfigBuffer(
-      const std::string& nnadapter_subgraph_partition_config_buffer);
-
-  /**
-   * @brief Set device name for Paddle Lite backend.
-   */
-  void SetLiteDeviceNames(
-      const std::vector<std::string>& nnadapter_device_names);
-
-  /**
-   * @brief Set context properties for Paddle Lite backend.
-   */
-  void  SetLiteContextProperties(
-      const std::string& nnadapter_context_properties);
-
-  /**
-   * @brief Set model cache dir for Paddle Lite backend.
-   */
-  void SetLiteModelCacheDir(
-      const std::string& nnadapter_model_cache_dir);
-
-  /**
-   * @brief Set dynamic shape info for Paddle Lite backend.
-   */
-  void SetLiteDynamicShapeInfo(
-      const std::map<std::string, std::vector<std::vector<int64_t>>>&
-          nnadapter_dynamic_shape_info);
-
-  /**
-   * @brief Set mixed precision quantization config path for Paddle Lite backend.
-   */
-  void SetLiteMixedPrecisionQuantizationConfigPath(
-      const std::string& nnadapter_mixed_precision_quantization_config_path);
-
-  /**
-   * @brief enable half precision while use paddle lite backend
-   */
-  void EnableLiteFP16();
-
-  /**
-   * @brief disable half precision, change to full precision(float32)
-   */
-  void DisableLiteFP16();
-
-  /**
-    * @brief enable int8 precision while use paddle lite backend
-    */
-  void EnableLiteInt8();
-
-  /**
-    * @brief disable int8 precision, change to full precision(float32)
-    */
-  void DisableLiteInt8();
-
-  /**
-   * @brief Set power mode while using Paddle Lite as inference backend, mode(0: LITE_POWER_HIGH; 1: LITE_POWER_LOW; 2: LITE_POWER_FULL; 3: LITE_POWER_NO_BIND, 4: LITE_POWER_RAND_HIGH; 5: LITE_POWER_RAND_LOW, refer [paddle lite](https://paddle-lite.readthedocs.io/zh/latest/api_reference/cxx_api_doc.html#set-power-mode) for more details)
-   */
-  void SetLitePowerMode(LitePowerMode mode);
-
-  /** \brief Set shape range of input tensor for the model that contain dynamic input shape while using TensorRT backend
-   *
-   * \param[in] input_name The name of input for the model which is dynamic shape
-   * \param[in] min_shape The minimal shape for the input tensor
-   * \param[in] opt_shape The optimized shape for the input tensor, just set the most common shape, if set as default value, it will keep same with min_shape
-   * \param[in] max_shape The maximum shape for the input tensor, if set as default value, it will keep same with min_shape
-   */
-  void SetTrtInputShape(
-      const std::string& input_name, const std::vector<int32_t>& min_shape,
-      const std::vector<int32_t>& opt_shape = std::vector<int32_t>(),
-      const std::vector<int32_t>& max_shape = std::vector<int32_t>());
-
-  /// Set max_workspace_size for TensorRT, default 1<<30
-  void SetTrtMaxWorkspaceSize(size_t trt_max_workspace_size);
-
-  /// Set max_batch_size for TensorRT, default 32
-  void SetTrtMaxBatchSize(size_t max_batch_size);
-
-  /**
-   * @brief Enable FP16 inference while using TensorRT backend. Notice: not all the GPU device support FP16, on those device doesn't support FP16, FastDeploy will fallback to FP32 automaticly
-   */
-  void EnableTrtFP16();
-
-  /// Disable FP16 inference while using TensorRT backend
-  void DisableTrtFP16();
-
-  /**
-   * @brief Set cache file path while use TensorRT backend. Loadding a Paddle/ONNX model and initialize TensorRT will take a long time, by this interface it will save the tensorrt engine to `cache_file_path`, and load it directly while execute the code again
-   */
-  void SetTrtCacheFile(const std::string& cache_file_path);
-
-  /**
-   * @brief Enable pinned memory. Pinned memory can be utilized to speedup the data transfer between CPU and GPU. Currently it's only suppurted in TRT backend and Paddle Inference backend.
-   */
-  void EnablePinnedMemory();
-
-  /**
-   * @brief Disable pinned memory
-   */
-  void DisablePinnedMemory();
-
-  /**
-   * @brief Enable to collect shape in paddle trt backend
-   */
-  void EnablePaddleTrtCollectShape();
-
-  /**
-   * @brief Disable to collect shape in paddle trt backend
-   */
-  void DisablePaddleTrtCollectShape();
-
-  /**
-   * @brief Prevent ops running in paddle trt backend
-   */
-  void DisablePaddleTrtOPs(const std::vector<std::string>& ops);
-
-  /*
-   * @brief Set number of streams by the OpenVINO backends
-   */
-  void SetOpenVINOStreams(int num_streams);
-
-  /** \Use Graphcore IPU to inference.
-   *
-   * \param[in] device_num the number of IPUs.
-   * \param[in] micro_batch_size the batch size in the graph, only work when graph has no batch shape info.
-   * \param[in] enable_pipelining enable pipelining.
-   * \param[in] batches_per_step the number of batches per run in pipelining.
-   */
-  void UseIpu(int device_num = 1, int micro_batch_size = 1,
-              bool enable_pipelining = false, int batches_per_step = 1);
-
-  /** \brief Set IPU config.
-   *
-   * \param[in] enable_fp16 enable fp16.
-   * \param[in] replica_num the number of graph replication.
-   * \param[in] available_memory_proportion the available memory proportion for matmul/conv.
-   * \param[in] enable_half_partial enable fp16 partial for matmul, only work with fp16.
-   */
-  void SetIpuConfig(bool enable_fp16 = false, int replica_num = 1,
-                    float available_memory_proportion = 1.0,
-                    bool enable_half_partial = false);
-
-  Backend backend = Backend::UNKNOWN;
-  // for cpu inference and preprocess
-  // default will let the backend choose their own default value
-  int cpu_thread_num = -1;
-  int device_id = 0;
-
-  Device device = Device::CPU;
-
-  void* external_stream_ = nullptr;
-
-  bool enable_pinned_memory = false;
-
-  // ======Only for ORT Backend========
-  // -1 means use default value by ort
-  // 0: ORT_DISABLE_ALL 1: ORT_ENABLE_BASIC 2: ORT_ENABLE_EXTENDED 3:
-  // ORT_ENABLE_ALL
-  int ort_graph_opt_level = -1;
-  int ort_inter_op_num_threads = -1;
-  // 0: ORT_SEQUENTIAL 1: ORT_PARALLEL
-  int ort_execution_mode = -1;
-
-  // ======Only for Paddle Backend=====
-  bool pd_enable_mkldnn = true;
-  bool pd_enable_log_info = false;
-  bool pd_enable_trt = false;
-  bool pd_collect_shape = false;
-  int pd_mkldnn_cache_size = 1;
-  std::vector<std::string> pd_delete_pass_names;
-
-  // ======Only for Paddle IPU Backend =======
-  int ipu_device_num = 1;
-  int ipu_micro_batch_size = 1;
-  bool ipu_enable_pipelining = false;
-  int ipu_batches_per_step = 1;
-  bool ipu_enable_fp16 = false;
-  int ipu_replica_num = 1;
-  float ipu_available_memory_proportion = 1.0;
-  bool ipu_enable_half_partial = false;
-
-  // ======Only for Paddle Lite Backend=====
-  // 0: LITE_POWER_HIGH 1: LITE_POWER_LOW 2: LITE_POWER_FULL
-  // 3: LITE_POWER_NO_BIND 4: LITE_POWER_RAND_HIGH
-  // 5: LITE_POWER_RAND_LOW
-  LitePowerMode lite_power_mode = LitePowerMode::LITE_POWER_NO_BIND;
-  // enable int8 or not
-  bool lite_enable_int8 = false;
-  // enable fp16 or not
-  bool lite_enable_fp16 = false;
-  // optimized model dir for CxxConfig
-  std::string lite_optimized_model_dir = "";
-  std::string lite_nnadapter_subgraph_partition_config_path = "";
-  // and other nnadapter settings for CxxConfig
-  std::string lite_nnadapter_subgraph_partition_config_buffer = "";
-  std::string lite_nnadapter_context_properties = "";
-  std::string lite_nnadapter_model_cache_dir = "";
-  std::string lite_nnadapter_mixed_precision_quantization_config_path = "";
-  std::map<std::string, std::vector<std::vector<int64_t>>>
-    lite_nnadapter_dynamic_shape_info = {{"", {{0}}}};
-  std::vector<std::string> lite_nnadapter_device_names = {};
-
-  bool enable_timvx = false;
-  bool enable_ascend = false;
-  bool enable_kunlunxin = false;
-
-  // ======Only for Trt Backend=======
-  std::map<std::string, std::vector<int32_t>> trt_max_shape;
-  std::map<std::string, std::vector<int32_t>> trt_min_shape;
-  std::map<std::string, std::vector<int32_t>> trt_opt_shape;
-  std::string trt_serialize_file = "";
-  bool trt_enable_fp16 = false;
-  bool trt_enable_int8 = false;
-  size_t trt_max_batch_size = 1;
-  size_t trt_max_workspace_size = 1 << 30;
-  // ======Only for PaddleTrt Backend=======
-  std::vector<std::string> trt_disabled_ops_{};
-
-  // ======Only for Poros Backend=======
-  bool is_dynamic = false;
-  bool long_to_int = true;
-  bool use_nvidia_tf32 = false;
-  int unconst_ops_thres = -1;
-  std::string poros_file = "";
-
-  // ======Only for OpenVINO Backend=======
-  int ov_num_streams = 0;
-  std::string openvino_device = "CPU";
-  std::map<std::string, std::vector<int64_t>> ov_shape_infos;
-  std::vector<std::string> ov_cpu_operators;
-
-  // ======Only for RKNPU2 Backend=======
-  fastdeploy::rknpu2::CpuName rknpu2_cpu_name_ =
-      fastdeploy::rknpu2::CpuName::RK3588;
-  fastdeploy::rknpu2::CoreMask rknpu2_core_mask_ =
-      fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO;
-
-  // ======Only for KunlunXin XPU Backend=======
-  int kunlunxin_l3_workspace_size = 0xfffc00;
-  bool kunlunxin_locked = false;
-  bool kunlunxin_autotune = true;
-  std::string kunlunxin_autotune_file = "";
-  std::string kunlunxin_precision = "int16";
-  bool kunlunxin_adaptive_seqlen = false;
-  bool kunlunxin_enable_multi_stream = false;
-
-  std::string model_file = "";   // Path of model file
-  std::string params_file = "";  // Path of parameters file, can be empty
-  // format of input model
-  ModelFormat model_format = ModelFormat::AUTOREC;
-
-  std::string model_buffer_ = "";
-  std::string params_buffer_ = "";
-  size_t model_buffer_size_ = 0;
-  size_t params_buffer_size_ = 0;
-  bool model_from_memory_ = false;
-};
-
-/*! @brief Runtime object used to inference the loaded model on different devices
- */
-struct FASTDEPLOY_DECL Runtime {
- public:
-  /// Intialize a Runtime object with RuntimeOption
-  bool Init(const RuntimeOption& _option);
-
-  /** \brief Inference the model by the input data, and write to the output
-   *
-   * \param[in] input_tensors Notice the FDTensor::name should keep same with the model's input
-   * \param[in] output_tensors Inference results
-   * \return true if the inference successed, otherwise false
-   */
-  bool Infer(std::vector<FDTensor>& input_tensors,
-             std::vector<FDTensor>* output_tensors);
-
-  /** \brief No params inference the model.
-   *
-   *  the input and output data need to pass through the BindInputTensor and GetOutputTensor interfaces.
-   */
-  bool Infer();
-
-  /** \brief Compile TorchScript Module, only for Poros backend
-   *
-   * \param[in] prewarm_tensors Prewarm datas for compile
-   * \param[in] _option Runtime option
-   * \return true if compile successed, otherwise false
-   */
-  bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
-               const RuntimeOption& _option);
-
-  /** \brief Get number of inputs
-   */
-  int NumInputs() { return backend_->NumInputs(); }
-  /** \brief Get number of outputs
-   */
-  int NumOutputs() { return backend_->NumOutputs(); }
-  /** \brief Get input information by index
-   */
-  TensorInfo GetInputInfo(int index);
-  /** \brief Get output information by index
-   */
-  TensorInfo GetOutputInfo(int index);
-  /** \brief Get all the input information
-   */
-  std::vector<TensorInfo> GetInputInfos();
-  /** \brief Get all the output information
-   */
-  std::vector<TensorInfo> GetOutputInfos();
-  /** \brief Bind FDTensor by name, no copy and share input memory
-   */
-  void BindInputTensor(const std::string& name, FDTensor& input);
-  /** \brief Get output FDTensor by name, no copy and share backend output memory
-   */
-  FDTensor* GetOutputTensor(const std::string& name);
-
-  /** \brief Clone new Runtime when multiple instances of the same model are created
-   *
-   * \param[in] stream CUDA Stream, defualt param is nullptr
-   * \return new Runtime* by this clone
-   */
-  Runtime* Clone(void* stream = nullptr, int device_id = -1);
-
-  RuntimeOption option;
-
- private:
-  void CreateOrtBackend();
-  void CreatePaddleBackend();
-  void CreateTrtBackend();
-  void CreateOpenVINOBackend();
-  void CreateLiteBackend();
-  void CreateRKNPU2Backend();
-  void CreateSophgoNPUBackend();
-  std::unique_ptr<BaseBackend> backend_;
-  std::vector<FDTensor> input_tensors_;
-  std::vector<FDTensor> output_tensors_;
-};
-}  // namespace fastdeploy
+#include "fastdeploy/core/config.h"
+#include "fastdeploy/runtime/runtime.h"
diff --git a/fastdeploy/runtime/enum_variables.cc b/fastdeploy/runtime/enum_variables.cc
new file mode 100644
index 000000000..ed7b87ba6
--- /dev/null
+++ b/fastdeploy/runtime/enum_variables.cc
@@ -0,0 +1,85 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/runtime/enum_variables.h"
+
+namespace fastdeploy {
+std::ostream& operator<<(std::ostream& out, const Backend& backend) {
+  if (backend == Backend::ORT) {
+    out << "Backend::ORT";
+  } else if (backend == Backend::TRT) {
+    out << "Backend::TRT";
+  } else if (backend == Backend::PDINFER) {
+    out << "Backend::PDINFER";
+  } else if (backend == Backend::OPENVINO) {
+    out << "Backend::OPENVINO";
+  } else if (backend == Backend::RKNPU2) {
+    out << "Backend::RKNPU2";
+  } else if (backend == Backend::SOPHGOTPU) {
+    out << "Backend::SOPHGOTPU";
+  } else if (backend == Backend::POROS) {
+    out << "Backend::POROS";
+  } else if (backend == Backend::LITE) {
+    out << "Backend::PDLITE";
+  } else {
+    out << "UNKNOWN-Backend";
+  }
+  return out;
+}
+
+std::ostream& operator<<(std::ostream& out, const Device& d) {
+  switch (d) {
+    case Device::CPU:
+      out << "Device::CPU";
+      break;
+    case Device::GPU:
+      out << "Device::GPU";
+      break;
+    case Device::RKNPU:
+      out << "Device::RKNPU";
+      break;
+    case Device::SOPHGOTPUD:
+      out << "Device::SOPHGOTPUD";
+      break;
+    case Device::TIMVX:
+      out << "Device::TIMVX";
+      break;
+    case Device::KUNLUNXIN:
+      out << "Device::KUNLUNXIN";
+      break;
+    case Device::ASCEND:
+      out << "Device::ASCEND";
+      break;
+    default:
+      out << "Device::UNKOWN";
+  }
+  return out;
+}
+
+std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
+  if (format == ModelFormat::PADDLE) {
+    out << "ModelFormat::PADDLE";
+  } else if (format == ModelFormat::ONNX) {
+    out << "ModelFormat::ONNX";
+  } else if (format == ModelFormat::RKNN) {
+    out << "ModelFormat::RKNN";
+  } else if (format == ModelFormat::SOPHGO) {
+    out << "ModelFormat::SOPHGO";
+  } else if (format == ModelFormat::TORCHSCRIPT) {
+    out << "ModelFormat::TORCHSCRIPT";
+  }
+  out << "UNKNOWN-ModelFormat";
+  return out;
+}
+}  // namespace fastdeploy
diff --git a/fastdeploy/runtime/enum_variables.h b/fastdeploy/runtime/enum_variables.h
new file mode 100644
index 000000000..bfcdd7eef
--- /dev/null
+++ b/fastdeploy/runtime/enum_variables.h
@@ -0,0 +1,79 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*! \file enum_variables.h
+    \brief A brief file description.
+
+    More details
+ */
+
+#pragma once
+#include "fastdeploy/utils/utils.h"
+#include <ostream>
+#include <map>
+
+namespace fastdeploy {
+
+/*! Inference backend supported in FastDeploy */
+enum Backend {
+  UNKNOWN,  ///< Unknown inference backend
+  ORT,  ///< ONNX Runtime, support Paddle/ONNX format model, CPU / Nvidia GPU
+  TRT,  ///< TensorRT, support Paddle/ONNX format model, Nvidia GPU only
+  PDINFER,  ///< Paddle Inference, support Paddle format model, CPU / Nvidia GPU
+  POROS,    ///< Poros, support TorchScript format model, CPU / Nvidia GPU
+  OPENVINO,   ///< Intel OpenVINO, support Paddle/ONNX format, CPU only
+  LITE,       ///< Paddle Lite, support Paddle format model, ARM CPU only
+  RKNPU2,     ///< RKNPU2, support RKNN format model, Rockchip NPU only
+  SOPHGOTPU,  ///< SOPHGOTPU, support SOPHGO format model, Sophgo TPU only
+};
+
+enum FASTDEPLOY_DECL Device {
+  CPU,
+  GPU,
+  RKNPU,
+  IPU,
+  TIMVX,
+  KUNLUNXIN,
+  ASCEND,
+  SOPHGOTPUD
+};
+
+/*! Deep learning model format */
+enum ModelFormat {
+  AUTOREC,      ///< Auto recognize the model format by model file name
+  PADDLE,       ///< Model with paddlepaddle format
+  ONNX,         ///< Model with ONNX format
+  RKNN,         ///< Model with RKNN format
+  TORCHSCRIPT,  ///< Model with TorchScript format
+  SOPHGO,       ///< Model with SOPHGO format
+};
+
+/// Describle all the supported backends for specified model format
+static std::map<ModelFormat, std::vector<Backend>> s_default_backends_cfg = {
+  {ModelFormat::PADDLE, {Backend::PDINFER, Backend::LITE,
+                      Backend::ORT, Backend::OPENVINO, Backend::TRT}},
+  {ModelFormat::ONNX, {Backend::ORT, Backend::OPENVINO, Backend::TRT}},
+  {ModelFormat::RKNN, {Backend::RKNPU2}},
+  {ModelFormat::TORCHSCRIPT, {Backend::POROS}},
+  {ModelFormat::SOPHGO, {Backend::SOPHGOTPU}}
+};
+
+FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, const Backend& b);
+
+FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, const Device& d);
+
+FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
+                                         const ModelFormat& f);
+
+}  // namespace fastdeploy
diff --git a/fastdeploy/runtime/runtime.cc b/fastdeploy/runtime/runtime.cc
new file mode 100644
index 000000000..bb825c8b9
--- /dev/null
+++ b/fastdeploy/runtime/runtime.cc
@@ -0,0 +1,492 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/runtime/runtime.h"
+
+#include "fastdeploy/utils/unique_ptr.h"
+#include "fastdeploy/utils/utils.h"
+
+#ifdef ENABLE_ORT_BACKEND
+#include "fastdeploy/backends/ort/ort_backend.h"
+#endif
+
+#ifdef ENABLE_TRT_BACKEND
+#include "fastdeploy/backends/tensorrt/trt_backend.h"
+#endif
+
+#ifdef ENABLE_PADDLE_BACKEND
+#include "fastdeploy/backends/paddle/paddle_backend.h"
+#endif
+
+#ifdef ENABLE_POROS_BACKEND
+#include "fastdeploy/backends/poros/poros_backend.h"
+#endif
+
+#ifdef ENABLE_OPENVINO_BACKEND
+#include "fastdeploy/backends/openvino/ov_backend.h"
+#endif
+
+#ifdef ENABLE_LITE_BACKEND
+#include "fastdeploy/backends/lite/lite_backend.h"
+#endif
+
+#ifdef ENABLE_RKNPU2_BACKEND
+#include "fastdeploy/backends/rknpu2/rknpu2_backend.h"
+#endif
+
+#ifdef ENABLE_SOPHGO_BACKEND
+#include "fastdeploy/backends/sophgo/sophgo_backend.h"
+#endif
+
+namespace fastdeploy {
+
+bool Runtime::Init(const RuntimeOption& _option) {
+  option = _option;
+  // Choose default backend by model format
+  if (option.backend == Backend::UNKNOWN) {
+    auto iter = s_default_backends_cfg.find(option.model_format);
+    if (iter == s_default_backends_cfg.end()) {
+      FDERROR << "Cannot found a default backend for model format: "
+              << option.model_format
+              << ", please define the inference backend in RuntimeOption."
+              << std::endl;
+      return false;
+    }
+    for (const auto& b : iter->second) {
+      if (IsBackendAvailable(b)) {
+        option.backend = b;
+        FDINFO << "FastDeploy will choose " << b << " to inference this model."
+               << std::endl;
+      }
+    }
+    if (option.backend == Backend::UNKNOWN) {
+      FDERROR << "Cannot found available backends for model format: "
+              << option.model_format << "." << std::endl;
+      return false;
+    }
+  }
+
+  if (option.backend == Backend::ORT) {
+    FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
+             "Backend::ORT only supports Device::CPU/Device::GPU.");
+    CreateOrtBackend();
+    FDINFO << "Runtime initialized with Backend::ORT in " << option.device
+           << "." << std::endl;
+  } else if (option.backend == Backend::TRT) {
+    FDASSERT(option.device == Device::GPU,
+             "Backend::TRT only supports Device::GPU.");
+    CreateTrtBackend();
+    FDINFO << "Runtime initialized with Backend::TRT in " << option.device
+           << "." << std::endl;
+  } else if (option.backend == Backend::PDINFER) {
+    FDASSERT(
+        option.device == Device::CPU || option.device == Device::GPU ||
+            option.device == Device::IPU,
+        "Backend::PDINFER only supports Device::CPU/Device::GPU/Device::IPU.");
+    FDASSERT(
+        option.model_format == ModelFormat::PADDLE,
+        "Backend::PDINFER only supports model format of ModelFormat::PADDLE.");
+    CreatePaddleBackend();
+    FDINFO << "Runtime initialized with Backend::PDINFER in " << option.device
+           << "." << std::endl;
+  } else if (option.backend == Backend::POROS) {
+    FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
+             "Backend::POROS only supports Device::CPU/Device::GPU.");
+    FDASSERT(option.model_format == ModelFormat::TORCHSCRIPT,
+             "Backend::POROS only supports model format of "
+             "ModelFormat::TORCHSCRIPT.");
+    FDINFO << "Runtime initialized with Backend::POROS in " << option.device
+           << "." << std::endl;
+    return true;
+  } else if (option.backend == Backend::OPENVINO) {
+    FDASSERT(option.device == Device::CPU,
+             "Backend::OPENVINO only supports Device::CPU");
+    CreateOpenVINOBackend();
+    FDINFO << "Runtime initialized with Backend::OPENVINO in " << option.device
+           << "." << std::endl;
+  } else if (option.backend == Backend::LITE) {
+    FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX ||
+                 option.device == Device::KUNLUNXIN ||
+                 option.device == Device::ASCEND,
+             "Backend::LITE only supports "
+             "Device::CPU/Device::TIMVX/Device::KUNLUNXIN.");
+    CreateLiteBackend();
+    FDINFO << "Runtime initialized with Backend::LITE in " << option.device
+           << "." << std::endl;
+  } else if (option.backend == Backend::RKNPU2) {
+    FDASSERT(option.device == Device::RKNPU,
+             "Backend::RKNPU2 only supports Device::RKNPU2");
+    CreateRKNPU2Backend();
+
+    FDINFO << "Runtime initialized with Backend::RKNPU2 in " << option.device
+           << "." << std::endl;
+  } else if (option.backend == Backend::SOPHGOTPU) {
+    FDASSERT(option.device == Device::SOPHGOTPUD,
+             "Backend::SOPHGO only supports Device::SOPHGO");
+    CreateSophgoNPUBackend();
+
+    FDINFO << "Runtime initialized with Backend::SOPHGO in " << option.device
+           << "." << std::endl;
+  } else {
+    FDERROR << "Runtime only support "
+               "Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as "
+               "backend now."
+            << std::endl;
+    return false;
+  }
+  return true;
+}
+
+TensorInfo Runtime::GetInputInfo(int index) {
+  return backend_->GetInputInfo(index);
+}
+
+TensorInfo Runtime::GetOutputInfo(int index) {
+  return backend_->GetOutputInfo(index);
+}
+
+std::vector<TensorInfo> Runtime::GetInputInfos() {
+  return backend_->GetInputInfos();
+}
+
+std::vector<TensorInfo> Runtime::GetOutputInfos() {
+  return backend_->GetOutputInfos();
+}
+
+bool Runtime::Infer(std::vector<FDTensor>& input_tensors,
+                    std::vector<FDTensor>* output_tensors) {
+  for (auto& tensor : input_tensors) {
+    FDASSERT(tensor.device_id < 0 || tensor.device_id == option.device_id,
+             "Device id of input tensor(%d) and runtime(%d) are not same.",
+             tensor.device_id, option.device_id);
+  }
+  return backend_->Infer(input_tensors, output_tensors);
+}
+
+bool Runtime::Infer() {
+  bool result = backend_->Infer(input_tensors_, &output_tensors_, false);
+  for (auto& tensor : output_tensors_) {
+    tensor.device_id = option.device_id;
+  }
+  return result;
+}
+
+void Runtime::BindInputTensor(const std::string& name, FDTensor& input) {
+  bool is_exist = false;
+  for (auto& t : input_tensors_) {
+    if (t.name == name) {
+      is_exist = true;
+      t.SetExternalData(input.shape, input.dtype, input.MutableData(),
+                        input.device, input.device_id);
+      break;
+    }
+  }
+  if (!is_exist) {
+    FDTensor new_tensor(name);
+    new_tensor.SetExternalData(input.shape, input.dtype, input.MutableData(),
+                               input.device, input.device_id);
+    input_tensors_.emplace_back(std::move(new_tensor));
+  }
+}
+
+FDTensor* Runtime::GetOutputTensor(const std::string& name) {
+  for (auto& t : output_tensors_) {
+    if (t.name == name) {
+      return &t;
+    }
+  }
+  FDWARNING << "The output name [" << name << "] don't exist." << std::endl;
+  return nullptr;
+}
+
+void Runtime::CreatePaddleBackend() {
+#ifdef ENABLE_PADDLE_BACKEND
+  auto pd_option = PaddleBackendOption();
+  pd_option.model_file = option.model_file;
+  pd_option.params_file = option.params_file;
+  pd_option.enable_mkldnn = option.pd_enable_mkldnn;
+  pd_option.enable_log_info = option.pd_enable_log_info;
+  pd_option.mkldnn_cache_size = option.pd_mkldnn_cache_size;
+  pd_option.use_gpu = (option.device == Device::GPU) ? true : false;
+  pd_option.use_ipu = (option.device == Device::IPU) ? true : false;
+  pd_option.gpu_id = option.device_id;
+  pd_option.delete_pass_names = option.pd_delete_pass_names;
+  pd_option.cpu_thread_num = option.cpu_thread_num;
+  pd_option.enable_pinned_memory = option.enable_pinned_memory;
+  pd_option.external_stream_ = option.external_stream_;
+  pd_option.model_from_memory_ = option.model_from_memory_;
+  if (pd_option.model_from_memory_) {
+    pd_option.model_buffer_ = option.model_buffer_;
+    pd_option.params_buffer_ = option.params_buffer_;
+    pd_option.model_buffer_size_ = option.model_buffer_size_;
+    pd_option.params_buffer_size_ = option.params_buffer_size_;
+  }
+#ifdef ENABLE_TRT_BACKEND
+  if (pd_option.use_gpu && option.pd_enable_trt) {
+    pd_option.enable_trt = true;
+    pd_option.collect_shape = option.pd_collect_shape;
+    auto trt_option = TrtBackendOption();
+    trt_option.gpu_id = option.device_id;
+    trt_option.enable_fp16 = option.trt_enable_fp16;
+    trt_option.max_batch_size = option.trt_max_batch_size;
+    trt_option.max_workspace_size = option.trt_max_workspace_size;
+    trt_option.max_shape = option.trt_max_shape;
+    trt_option.min_shape = option.trt_min_shape;
+    trt_option.opt_shape = option.trt_opt_shape;
+    trt_option.serialize_file = option.trt_serialize_file;
+    trt_option.enable_pinned_memory = option.enable_pinned_memory;
+    pd_option.trt_option = trt_option;
+    pd_option.trt_disabled_ops_ = option.trt_disabled_ops_;
+  }
+#endif
+#ifdef WITH_IPU
+  if (pd_option.use_ipu) {
+    auto ipu_option = IpuOption();
+    ipu_option.ipu_device_num = option.ipu_device_num;
+    ipu_option.ipu_micro_batch_size = option.ipu_micro_batch_size;
+    ipu_option.ipu_enable_pipelining = option.ipu_enable_pipelining;
+    ipu_option.ipu_batches_per_step = option.ipu_batches_per_step;
+    ipu_option.ipu_enable_fp16 = option.ipu_enable_fp16;
+    ipu_option.ipu_replica_num = option.ipu_replica_num;
+    ipu_option.ipu_available_memory_proportion =
+        option.ipu_available_memory_proportion;
+    ipu_option.ipu_enable_half_partial = option.ipu_enable_half_partial;
+    pd_option.ipu_option = ipu_option;
+  }
+#endif
+  FDASSERT(option.model_format == ModelFormat::PADDLE,
+           "PaddleBackend only support model format of ModelFormat::PADDLE.");
+  backend_ = utils::make_unique<PaddleBackend>();
+  auto casted_backend = dynamic_cast<PaddleBackend*>(backend_.get());
+  if (pd_option.model_from_memory_) {
+    FDASSERT(casted_backend->InitFromPaddle(option.model_buffer_,
+                                            option.params_buffer_, pd_option),
+             "Load model from Paddle failed while initliazing PaddleBackend.");
+  } else {
+    FDASSERT(casted_backend->InitFromPaddle(option.model_file,
+                                            option.params_file, pd_option),
+             "Load model from Paddle failed while initliazing PaddleBackend.");
+  }
+#else
+  FDASSERT(false,
+           "PaddleBackend is not available, please compiled with "
+           "ENABLE_PADDLE_BACKEND=ON.");
+#endif
+}
+
+void Runtime::CreateOpenVINOBackend() {
+#ifdef ENABLE_OPENVINO_BACKEND
+  auto ov_option = OpenVINOBackendOption();
+  ov_option.cpu_thread_num = option.cpu_thread_num;
+  ov_option.device = option.openvino_device;
+  ov_option.shape_infos = option.ov_shape_infos;
+  ov_option.num_streams = option.ov_num_streams;
+  for (const auto& op : option.ov_cpu_operators) {
+    ov_option.cpu_operators.insert(op);
+  }
+  FDASSERT(option.model_format == ModelFormat::PADDLE ||
+               option.model_format == ModelFormat::ONNX,
+           "OpenVINOBackend only support model format of ModelFormat::PADDLE / "
+           "ModelFormat::ONNX.");
+  backend_ = utils::make_unique<OpenVINOBackend>();
+  auto casted_backend = dynamic_cast<OpenVINOBackend*>(backend_.get());
+
+  if (option.model_format == ModelFormat::ONNX) {
+    FDASSERT(casted_backend->InitFromOnnx(option.model_file, ov_option),
+             "Load model from ONNX failed while initliazing OrtBackend.");
+  } else {
+    FDASSERT(casted_backend->InitFromPaddle(option.model_file,
+                                            option.params_file, ov_option),
+             "Load model from Paddle failed while initliazing OrtBackend.");
+  }
+#else
+  FDASSERT(false,
+           "OpenVINOBackend is not available, please compiled with "
+           "ENABLE_OPENVINO_BACKEND=ON.");
+#endif
+}
+
+void Runtime::CreateOrtBackend() {
+#ifdef ENABLE_ORT_BACKEND
+  auto ort_option = OrtBackendOption();
+  ort_option.graph_optimization_level = option.ort_graph_opt_level;
+  ort_option.intra_op_num_threads = option.cpu_thread_num;
+  ort_option.inter_op_num_threads = option.ort_inter_op_num_threads;
+  ort_option.execution_mode = option.ort_execution_mode;
+  ort_option.use_gpu = (option.device == Device::GPU) ? true : false;
+  ort_option.gpu_id = option.device_id;
+  ort_option.external_stream_ = option.external_stream_;
+
+  FDASSERT(option.model_format == ModelFormat::PADDLE ||
+               option.model_format == ModelFormat::ONNX,
+           "OrtBackend only support model format of ModelFormat::PADDLE / "
+           "ModelFormat::ONNX.");
+  backend_ = utils::make_unique<OrtBackend>();
+  auto casted_backend = dynamic_cast<OrtBackend*>(backend_.get());
+  if (option.model_format == ModelFormat::ONNX) {
+    FDASSERT(casted_backend->InitFromOnnx(option.model_file, ort_option),
+             "Load model from ONNX failed while initliazing OrtBackend.");
+  } else {
+    FDASSERT(casted_backend->InitFromPaddle(option.model_file,
+                                            option.params_file, ort_option),
+             "Load model from Paddle failed while initliazing OrtBackend.");
+  }
+#else
+  FDASSERT(false,
+           "OrtBackend is not available, please compiled with "
+           "ENABLE_ORT_BACKEND=ON.");
+#endif
+}
+
+void Runtime::CreateTrtBackend() {
+#ifdef ENABLE_TRT_BACKEND
+  auto trt_option = TrtBackendOption();
+  trt_option.model_file = option.model_file;
+  trt_option.params_file = option.params_file;
+  trt_option.model_format = option.model_format;
+  trt_option.gpu_id = option.device_id;
+  trt_option.enable_fp16 = option.trt_enable_fp16;
+  trt_option.enable_int8 = option.trt_enable_int8;
+  trt_option.max_batch_size = option.trt_max_batch_size;
+  trt_option.max_workspace_size = option.trt_max_workspace_size;
+  trt_option.max_shape = option.trt_max_shape;
+  trt_option.min_shape = option.trt_min_shape;
+  trt_option.opt_shape = option.trt_opt_shape;
+  trt_option.serialize_file = option.trt_serialize_file;
+  trt_option.enable_pinned_memory = option.enable_pinned_memory;
+  trt_option.external_stream_ = option.external_stream_;
+
+  FDASSERT(option.model_format == ModelFormat::PADDLE ||
+               option.model_format == ModelFormat::ONNX,
+           "TrtBackend only support model format of ModelFormat::PADDLE / "
+           "ModelFormat::ONNX.");
+  backend_ = utils::make_unique<TrtBackend>();
+  auto casted_backend = dynamic_cast<TrtBackend*>(backend_.get());
+  if (option.model_format == ModelFormat::ONNX) {
+    FDASSERT(casted_backend->InitFromOnnx(option.model_file, trt_option),
+             "Load model from ONNX failed while initliazing TrtBackend.");
+  } else {
+    FDASSERT(casted_backend->InitFromPaddle(option.model_file,
+                                            option.params_file, trt_option),
+             "Load model from Paddle failed while initliazing TrtBackend.");
+  }
+#else
+  FDASSERT(false,
+           "TrtBackend is not available, please compiled with "
+           "ENABLE_TRT_BACKEND=ON.");
+#endif
+}
+
+void Runtime::CreateLiteBackend() {
+#ifdef ENABLE_LITE_BACKEND
+  auto lite_option = LiteBackendOption();
+  lite_option.threads = option.cpu_thread_num;
+  lite_option.enable_int8 = option.lite_enable_int8;
+  lite_option.enable_fp16 = option.lite_enable_fp16;
+  lite_option.power_mode = static_cast<int>(option.lite_power_mode);
+  lite_option.optimized_model_dir = option.lite_optimized_model_dir;
+  lite_option.nnadapter_subgraph_partition_config_path =
+      option.lite_nnadapter_subgraph_partition_config_path;
+  lite_option.nnadapter_subgraph_partition_config_buffer =
+      option.lite_nnadapter_subgraph_partition_config_buffer;
+  lite_option.nnadapter_device_names = option.lite_nnadapter_device_names;
+  lite_option.nnadapter_context_properties =
+      option.lite_nnadapter_context_properties;
+  lite_option.nnadapter_model_cache_dir = option.lite_nnadapter_model_cache_dir;
+  lite_option.nnadapter_dynamic_shape_info =
+      option.lite_nnadapter_dynamic_shape_info;
+  lite_option.nnadapter_mixed_precision_quantization_config_path =
+      option.lite_nnadapter_mixed_precision_quantization_config_path;
+  lite_option.enable_timvx = option.enable_timvx;
+  lite_option.enable_ascend = option.enable_ascend;
+  lite_option.enable_kunlunxin = option.enable_kunlunxin;
+  lite_option.device_id = option.device_id;
+  lite_option.kunlunxin_l3_workspace_size = option.kunlunxin_l3_workspace_size;
+  lite_option.kunlunxin_locked = option.kunlunxin_locked;
+  lite_option.kunlunxin_autotune = option.kunlunxin_autotune;
+  lite_option.kunlunxin_autotune_file = option.kunlunxin_autotune_file;
+  lite_option.kunlunxin_precision = option.kunlunxin_precision;
+  lite_option.kunlunxin_adaptive_seqlen = option.kunlunxin_adaptive_seqlen;
+  lite_option.kunlunxin_enable_multi_stream =
+      option.kunlunxin_enable_multi_stream;
+
+  FDASSERT(option.model_format == ModelFormat::PADDLE,
+           "LiteBackend only support model format of ModelFormat::PADDLE");
+  backend_ = utils::make_unique<LiteBackend>();
+  auto casted_backend = dynamic_cast<LiteBackend*>(backend_.get());
+  FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file,
+                                          lite_option),
+           "Load model from nb file failed while initializing LiteBackend.");
+#else
+  FDASSERT(false,
+           "LiteBackend is not available, please compiled with "
+           "ENABLE_LITE_BACKEND=ON.");
+#endif
+}
+
+void Runtime::CreateRKNPU2Backend() {
+#ifdef ENABLE_RKNPU2_BACKEND
+  auto rknpu2_option = RKNPU2BackendOption();
+  rknpu2_option.cpu_name = option.rknpu2_cpu_name_;
+  rknpu2_option.core_mask = option.rknpu2_core_mask_;
+  FDASSERT(option.model_format == ModelFormat::RKNN,
+           "RKNPU2Backend only support model format of ModelFormat::RKNN");
+  backend_ = utils::make_unique<RKNPU2Backend>();
+  auto casted_backend = dynamic_cast<RKNPU2Backend*>(backend_.get());
+  FDASSERT(casted_backend->InitFromRKNN(option.model_file, rknpu2_option),
+           "Load model from nb file failed while initializing LiteBackend.");
+#else
+  FDASSERT(false,
+           "RKNPU2Backend is not available, please compiled with "
+           "ENABLE_RKNPU2_BACKEND=ON.");
+#endif
+}
+
+void Runtime::CreateSophgoNPUBackend() {
+#ifdef ENABLE_SOPHGO_BACKEND
+  auto sophgo_option = SophgoBackendOption();
+  FDASSERT(option.model_format == ModelFormat::SOPHGO,
+           "SophgoBackend only support model format of ModelFormat::SOPHGO");
+  backend_ = utils::make_unique<SophgoBackend>();
+  auto casted_backend = dynamic_cast<SophgoBackend*>(backend_.get());
+  FDASSERT(casted_backend->InitFromSophgo(option.model_file, sophgo_option),
+           "Load model from nb file failed while initializing LiteBackend.");
+#else
+  FDASSERT(false,
+           "SophgoBackend is not available, please compiled with "
+           "ENABLE_SOPHGO_BACKEND=ON.");
+#endif
+}
+
+Runtime* Runtime::Clone(void* stream, int device_id) {
+  Runtime* runtime = new Runtime();
+  if (option.backend != Backend::OPENVINO &&
+      option.backend != Backend::PDINFER && option.backend != Backend::TRT) {
+    runtime->Init(option);
+    FDWARNING << "Only OpenVINO/Paddle Inference/TensorRT support \
+                  clone engine to  reduce CPU/GPU memory usage now. For "
+              << option.backend
+              << ", FastDeploy will create a new engine which \
+                  will not share memory  with the current runtime."
+              << std::endl;
+    return runtime;
+  }
+  FDINFO << "Runtime Clone with Backend:: " << option.backend << " in "
+         << option.device << "." << std::endl;
+  runtime->option = option;
+  runtime->backend_ = backend_->Clone(stream, device_id);
+  return runtime;
+}
+
+}  // namespace fastdeploy
diff --git a/fastdeploy/runtime/runtime.h b/fastdeploy/runtime/runtime.h
new file mode 100755
index 000000000..36a661463
--- /dev/null
+++ b/fastdeploy/runtime/runtime.h
@@ -0,0 +1,109 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*! \file runtime.h
+    \brief A brief file description.
+
+    More details
+ */
+
+#pragma once
+#include "fastdeploy/backends/backend.h"
+#include "fastdeploy/core/fd_tensor.h"
+#include "fastdeploy/runtime/runtime_option.h"
+#include "fastdeploy/utils/perf.h"
+
+/** \brief All C++ FastDeploy APIs are defined inside this namespace
+*
+*/
+namespace fastdeploy {
+
+/*! @brief Runtime object used to inference the loaded model on different devices
+ */
+struct FASTDEPLOY_DECL Runtime {
+ public:
+  /// Intialize a Runtime object with RuntimeOption
+  bool Init(const RuntimeOption& _option);
+
+  /** \brief Inference the model by the input data, and write to the output
+   *
+   * \param[in] input_tensors Notice the FDTensor::name should keep same with the model's input
+   * \param[in] output_tensors Inference results
+   * \return true if the inference successed, otherwise false
+   */
+  bool Infer(std::vector<FDTensor>& input_tensors,
+             std::vector<FDTensor>* output_tensors);
+
+  /** \brief No params inference the model.
+   *
+   *  the input and output data need to pass through the BindInputTensor and GetOutputTensor interfaces.
+   */
+  bool Infer();
+
+  /** \brief Compile TorchScript Module, only for Poros backend
+   *
+   * \param[in] prewarm_tensors Prewarm datas for compile
+   * \param[in] _option Runtime option
+   * \return true if compile successed, otherwise false
+   */
+  bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
+               const RuntimeOption& _option);
+
+  /** \brief Get number of inputs
+   */
+  int NumInputs() { return backend_->NumInputs(); }
+  /** \brief Get number of outputs
+   */
+  int NumOutputs() { return backend_->NumOutputs(); }
+  /** \brief Get input information by index
+   */
+  TensorInfo GetInputInfo(int index);
+  /** \brief Get output information by index
+   */
+  TensorInfo GetOutputInfo(int index);
+  /** \brief Get all the input information
+   */
+  std::vector<TensorInfo> GetInputInfos();
+  /** \brief Get all the output information
+   */
+  std::vector<TensorInfo> GetOutputInfos();
+  /** \brief Bind FDTensor by name, no copy and share input memory
+   */
+  void BindInputTensor(const std::string& name, FDTensor& input);
+  /** \brief Get output FDTensor by name, no copy and share backend output memory
+   */
+  FDTensor* GetOutputTensor(const std::string& name);
+
+  /** \brief Clone new Runtime when multiple instances of the same model are created
+   *
+   * \param[in] stream CUDA Stream, defualt param is nullptr
+   * \return new Runtime* by this clone
+   */
+  Runtime* Clone(void* stream = nullptr, int device_id = -1);
+
+  RuntimeOption option;
+
+ private:
+  void CreateOrtBackend();
+  void CreatePaddleBackend();
+  void CreateTrtBackend();
+  void CreateOpenVINOBackend();
+  void CreateLiteBackend();
+  void CreateRKNPU2Backend();
+  void CreateSophgoNPUBackend();
+  std::unique_ptr<BaseBackend> backend_;
+  std::vector<FDTensor> input_tensors_;
+  std::vector<FDTensor> output_tensors_;
+};
+}  // namespace fastdeploy
diff --git a/fastdeploy/runtime/runtime_option.cc b/fastdeploy/runtime/runtime_option.cc
new file mode 100644
index 000000000..8e2ab6af8
--- /dev/null
+++ b/fastdeploy/runtime/runtime_option.cc
@@ -0,0 +1,515 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/runtime/runtime.h"
+#include "fastdeploy/utils/unique_ptr.h"
+#include "fastdeploy/utils/utils.h"
+
+namespace fastdeploy {
+
+std::vector<Backend> GetAvailableBackends() {
+  std::vector<Backend> backends;
+#ifdef ENABLE_ORT_BACKEND
+  backends.push_back(Backend::ORT);
+#endif
+#ifdef ENABLE_TRT_BACKEND
+  backends.push_back(Backend::TRT);
+#endif
+#ifdef ENABLE_PADDLE_BACKEND
+  backends.push_back(Backend::PDINFER);
+#endif
+#ifdef ENABLE_POROS_BACKEND
+  backends.push_back(Backend::POROS);
+#endif
+#ifdef ENABLE_OPENVINO_BACKEND
+  backends.push_back(Backend::OPENVINO);
+#endif
+#ifdef ENABLE_LITE_BACKEND
+  backends.push_back(Backend::LITE);
+#endif
+#ifdef ENABLE_RKNPU2_BACKEND
+  backends.push_back(Backend::RKNPU2);
+#endif
+#ifdef ENABLE_SOPHGO_BACKEND
+  backends.push_back(Backend::SOPHGOTPU);
+#endif
+  return backends;
+}
+
+bool IsBackendAvailable(const Backend& backend) {
+  std::vector<Backend> backends = GetAvailableBackends();
+  for (size_t i = 0; i < backends.size(); ++i) {
+    if (backend == backends[i]) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool CheckModelFormat(const std::string& model_file,
+                      const ModelFormat& model_format) {
+  if (model_format == ModelFormat::PADDLE) {
+    if (model_file.size() < 8 ||
+        model_file.substr(model_file.size() - 8, 8) != ".pdmodel") {
+      FDERROR << "With model format of ModelFormat::PADDLE, the model file "
+                 "should ends with `.pdmodel`, but now it's "
+              << model_file << std::endl;
+      return false;
+    }
+  } else if (model_format == ModelFormat::ONNX) {
+    if (model_file.size() < 5 ||
+        model_file.substr(model_file.size() - 5, 5) != ".onnx") {
+      FDERROR << "With model format of ModelFormat::ONNX, the model file "
+                 "should ends with `.onnx`, but now it's "
+              << model_file << std::endl;
+      return false;
+    }
+  } else if (model_format == ModelFormat::RKNN) {
+    if (model_file.size() < 5 ||
+        model_file.substr(model_file.size() - 5, 5) != ".rknn") {
+      FDERROR << "With model format of ModelFormat::RKNN, the model file "
+                 "should ends with `.rknn`, but now it's "
+              << model_file << std::endl;
+      return false;
+    }
+  } else if (model_format == ModelFormat::TORCHSCRIPT) {
+    if (model_file.size() < 3 ||
+        model_file.substr(model_file.size() - 3, 3) != ".pt") {
+      FDERROR
+          << "With model format of ModelFormat::TORCHSCRIPT, the model file "
+             "should ends with `.pt`, but now it's "
+          << model_file << std::endl;
+      return false;
+    }
+  } else if (model_format == ModelFormat::SOPHGO) {
+    if (model_file.size() < 7 ||
+        model_file.substr(model_file.size() - 7, 7) != ".bmodel") {
+      FDERROR << "With model format of ModelFormat::SOPHGO, the model file "
+                 "should ends with `.bmodel`, but now it's "
+              << model_file << std::endl;
+      return false;
+    }
+  } else {
+    FDERROR
+        << "Only support model format with frontend ModelFormat::PADDLE / "
+           "ModelFormat::ONNX / ModelFormat::RKNN / ModelFormat::TORCHSCRIPT."
+        << std::endl;
+    return false;
+  }
+  return true;
+}
+
+ModelFormat GuessModelFormat(const std::string& model_file) {
+  if (model_file.size() > 8 &&
+      model_file.substr(model_file.size() - 8, 8) == ".pdmodel") {
+    FDINFO << "Model Format: PaddlePaddle." << std::endl;
+    return ModelFormat::PADDLE;
+  } else if (model_file.size() > 5 &&
+             model_file.substr(model_file.size() - 5, 5) == ".onnx") {
+    FDINFO << "Model Format: ONNX." << std::endl;
+    return ModelFormat::ONNX;
+  } else if (model_file.size() > 3 &&
+             model_file.substr(model_file.size() - 3, 3) == ".pt") {
+    FDINFO << "Model Format: Torchscript." << std::endl;
+    return ModelFormat::TORCHSCRIPT;
+  } else if (model_file.size() > 5 &&
+             model_file.substr(model_file.size() - 5, 5) == ".rknn") {
+    FDINFO << "Model Format: RKNN." << std::endl;
+    return ModelFormat::RKNN;
+  } else if (model_file.size() > 7 &&
+             model_file.substr(model_file.size() - 7, 7) == ".bmodel") {
+    FDINFO << "Model Format: SOPHGO." << std::endl;
+    return ModelFormat::SOPHGO;
+  }
+
+  FDERROR << "Cannot guess which model format you are using, please set "
+             "RuntimeOption::model_format manually."
+          << std::endl;
+  return ModelFormat::PADDLE;
+}
+
+void RuntimeOption::SetModelPath(const std::string& model_path,
+                                 const std::string& params_path,
+                                 const ModelFormat& format) {
+  if (format == ModelFormat::PADDLE) {
+    model_file = model_path;
+    params_file = params_path;
+    model_format = ModelFormat::PADDLE;
+  } else if (format == ModelFormat::ONNX) {
+    model_file = model_path;
+    model_format = ModelFormat::ONNX;
+  } else if (format == ModelFormat::TORCHSCRIPT) {
+    model_file = model_path;
+    model_format = ModelFormat::TORCHSCRIPT;
+  } else {
+    FDASSERT(false,
+             "The model format only can be "
+             "ModelFormat::PADDLE/ModelFormat::ONNX/ModelFormat::TORCHSCRIPT.");
+  }
+}
+
+void RuntimeOption::SetModelBuffer(const char* model_buffer,
+                                   size_t model_buffer_size,
+                                   const char* params_buffer,
+                                   size_t params_buffer_size,
+                                   const ModelFormat& format) {
+  model_buffer_size_ = model_buffer_size;
+  params_buffer_size_ = params_buffer_size;
+  model_from_memory_ = true;
+  if (format == ModelFormat::PADDLE) {
+    model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size);
+    params_buffer_ =
+        std::string(params_buffer, params_buffer + params_buffer_size);
+    model_format = ModelFormat::PADDLE;
+  } else if (format == ModelFormat::ONNX) {
+    model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size);
+    model_format = ModelFormat::ONNX;
+  } else if (format == ModelFormat::TORCHSCRIPT) {
+    model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size);
+    model_format = ModelFormat::TORCHSCRIPT;
+  } else {
+    FDASSERT(false,
+             "The model format only can be "
+             "ModelFormat::PADDLE/ModelFormat::ONNX/ModelFormat::TORCHSCRIPT.");
+  }
+}
+
+void RuntimeOption::UseGpu(int gpu_id) {
+#ifdef WITH_GPU
+  device = Device::GPU;
+  device_id = gpu_id;
+#else
+  FDWARNING << "The FastDeploy didn't compile with GPU, will force to use CPU."
+            << std::endl;
+  device = Device::CPU;
+#endif
+}
+
+void RuntimeOption::UseCpu() { device = Device::CPU; }
+
+void RuntimeOption::UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name,
+                              fastdeploy::rknpu2::CoreMask rknpu2_core) {
+  rknpu2_cpu_name_ = rknpu2_name;
+  rknpu2_core_mask_ = rknpu2_core;
+  device = Device::RKNPU;
+}
+
+void RuntimeOption::UseTimVX() {
+  enable_timvx = true;
+  device = Device::TIMVX;
+}
+
+void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size,
+                                 bool locked, bool autotune,
+                                 const std::string& autotune_file,
+                                 const std::string& precision,
+                                 bool adaptive_seqlen,
+                                 bool enable_multi_stream) {
+  enable_kunlunxin = true;
+  device_id = kunlunxin_id;
+  kunlunxin_l3_workspace_size = l3_workspace_size;
+  kunlunxin_locked = locked;
+  kunlunxin_autotune = autotune;
+  kunlunxin_autotune_file = autotune_file;
+  kunlunxin_precision = precision;
+  kunlunxin_adaptive_seqlen = adaptive_seqlen;
+  kunlunxin_enable_multi_stream = enable_multi_stream;
+  device = Device::KUNLUNXIN;
+}
+
+void RuntimeOption::UseAscend() {
+  enable_ascend = true;
+  device = Device::ASCEND;
+}
+
+void RuntimeOption::UseSophgo() {
+  device = Device::SOPHGOTPUD;
+  UseSophgoBackend();
+}
+
+void RuntimeOption::SetExternalStream(void* external_stream) {
+  external_stream_ = external_stream;
+}
+
+void RuntimeOption::SetCpuThreadNum(int thread_num) {
+  FDASSERT(thread_num > 0, "The thread_num must be greater than 0.");
+  cpu_thread_num = thread_num;
+}
+
+void RuntimeOption::SetOrtGraphOptLevel(int level) {
+  std::vector<int> supported_level{-1, 0, 1, 2};
+  auto valid_level = std::find(supported_level.begin(), supported_level.end(),
+                               level) != supported_level.end();
+  FDASSERT(valid_level, "The level must be -1, 0, 1, 2.");
+  ort_graph_opt_level = level;
+}
+
+// use paddle inference backend
+void RuntimeOption::UsePaddleBackend() {
+#ifdef ENABLE_PADDLE_BACKEND
+  backend = Backend::PDINFER;
+#else
+  FDASSERT(false, "The FastDeploy didn't compile with Paddle Inference.");
+#endif
+}
+
+// use onnxruntime backend
+void RuntimeOption::UseOrtBackend() {
+#ifdef ENABLE_ORT_BACKEND
+  backend = Backend::ORT;
+#else
+  FDASSERT(false, "The FastDeploy didn't compile with OrtBackend.");
+#endif
+}
+
+// use sophgoruntime backend
+void RuntimeOption::UseSophgoBackend() {
+#ifdef ENABLE_SOPHGO_BACKEND
+  backend = Backend::SOPHGOTPU;
+#else
+  FDASSERT(false, "The FastDeploy didn't compile with SophgoBackend.");
+#endif
+}
+
+// use poros backend
+void RuntimeOption::UsePorosBackend() {
+#ifdef ENABLE_POROS_BACKEND
+  backend = Backend::POROS;
+#else
+  FDASSERT(false, "The FastDeploy didn't compile with PorosBackend.");
+#endif
+}
+
+void RuntimeOption::UseTrtBackend() {
+#ifdef ENABLE_TRT_BACKEND
+  backend = Backend::TRT;
+#else
+  FDASSERT(false, "The FastDeploy didn't compile with TrtBackend.");
+#endif
+}
+
+void RuntimeOption::UseOpenVINOBackend() {
+#ifdef ENABLE_OPENVINO_BACKEND
+  backend = Backend::OPENVINO;
+#else
+  FDASSERT(false, "The FastDeploy didn't compile with OpenVINO.");
+#endif
+}
+
+void RuntimeOption::UseLiteBackend() {
+#ifdef ENABLE_LITE_BACKEND
+  backend = Backend::LITE;
+#else
+  FDASSERT(false, "The FastDeploy didn't compile with Paddle Lite.");
+#endif
+}
+
+void RuntimeOption::SetPaddleMKLDNN(bool pd_mkldnn) {
+  pd_enable_mkldnn = pd_mkldnn;
+}
+
+void RuntimeOption::DeletePaddleBackendPass(const std::string& pass_name) {
+  pd_delete_pass_names.push_back(pass_name);
+}
+void RuntimeOption::EnablePaddleLogInfo() { pd_enable_log_info = true; }
+
+void RuntimeOption::DisablePaddleLogInfo() { pd_enable_log_info = false; }
+
+void RuntimeOption::EnablePaddleToTrt() {
+  FDASSERT(backend == Backend::TRT,
+           "Should call UseTrtBackend() before call EnablePaddleToTrt().");
+#ifdef ENABLE_PADDLE_BACKEND
+  FDINFO << "While using TrtBackend with EnablePaddleToTrt, FastDeploy will "
+            "change to use Paddle Inference Backend."
+         << std::endl;
+  backend = Backend::PDINFER;
+  pd_enable_trt = true;
+#else
+  FDASSERT(false,
+           "While using TrtBackend with EnablePaddleToTrt, require the "
+           "FastDeploy is compiled with Paddle Inference Backend, "
+           "please rebuild your FastDeploy.");
+#endif
+}
+
+void RuntimeOption::SetPaddleMKLDNNCacheSize(int size) {
+  FDASSERT(size > 0, "Parameter size must greater than 0.");
+  pd_mkldnn_cache_size = size;
+}
+
+void RuntimeOption::SetOpenVINODevice(const std::string& name) {
+  openvino_device = name;
+}
+
+void RuntimeOption::EnableLiteFP16() { lite_enable_fp16 = true; }
+
+void RuntimeOption::DisableLiteFP16() { lite_enable_fp16 = false; }
+void RuntimeOption::EnableLiteInt8() { lite_enable_int8 = true; }
+
+void RuntimeOption::DisableLiteInt8() { lite_enable_int8 = false; }
+void RuntimeOption::SetLitePowerMode(LitePowerMode mode) {
+  lite_power_mode = mode;
+}
+
+void RuntimeOption::SetLiteOptimizedModelDir(
+    const std::string& optimized_model_dir) {
+  lite_optimized_model_dir = optimized_model_dir;
+}
+
+void RuntimeOption::SetLiteSubgraphPartitionPath(
+    const std::string& nnadapter_subgraph_partition_config_path) {
+  lite_nnadapter_subgraph_partition_config_path =
+      nnadapter_subgraph_partition_config_path;
+}
+
+void RuntimeOption::SetLiteSubgraphPartitionConfigBuffer(
+    const std::string& nnadapter_subgraph_partition_config_buffer) {
+  lite_nnadapter_subgraph_partition_config_buffer =
+      nnadapter_subgraph_partition_config_buffer;
+}
+
+void RuntimeOption::SetLiteDeviceNames(
+    const std::vector<std::string>& nnadapter_device_names) {
+  lite_nnadapter_device_names = nnadapter_device_names;
+}
+
+void RuntimeOption::SetLiteContextProperties(
+    const std::string& nnadapter_context_properties) {
+  lite_nnadapter_context_properties = nnadapter_context_properties;
+}
+
+void RuntimeOption::SetLiteModelCacheDir(
+    const std::string& nnadapter_model_cache_dir) {
+  lite_nnadapter_model_cache_dir = nnadapter_model_cache_dir;
+}
+
+void RuntimeOption::SetLiteDynamicShapeInfo(
+    const std::map<std::string, std::vector<std::vector<int64_t>>>&
+        nnadapter_dynamic_shape_info) {
+  lite_nnadapter_dynamic_shape_info = nnadapter_dynamic_shape_info;
+}
+
+void RuntimeOption::SetLiteMixedPrecisionQuantizationConfigPath(
+    const std::string& nnadapter_mixed_precision_quantization_config_path) {
+  lite_nnadapter_mixed_precision_quantization_config_path =
+      nnadapter_mixed_precision_quantization_config_path;
+}
+
+void RuntimeOption::SetTrtInputShape(const std::string& input_name,
+                                     const std::vector<int32_t>& min_shape,
+                                     const std::vector<int32_t>& opt_shape,
+                                     const std::vector<int32_t>& max_shape) {
+  trt_min_shape[input_name].clear();
+  trt_max_shape[input_name].clear();
+  trt_opt_shape[input_name].clear();
+  trt_min_shape[input_name].assign(min_shape.begin(), min_shape.end());
+  if (opt_shape.size() == 0) {
+    trt_opt_shape[input_name].assign(min_shape.begin(), min_shape.end());
+  } else {
+    trt_opt_shape[input_name].assign(opt_shape.begin(), opt_shape.end());
+  }
+  if (max_shape.size() == 0) {
+    trt_max_shape[input_name].assign(min_shape.begin(), min_shape.end());
+  } else {
+    trt_max_shape[input_name].assign(max_shape.begin(), max_shape.end());
+  }
+}
+
+void RuntimeOption::SetTrtMaxWorkspaceSize(size_t max_workspace_size) {
+  trt_max_workspace_size = max_workspace_size;
+}
+void RuntimeOption::SetTrtMaxBatchSize(size_t max_batch_size) {
+  trt_max_batch_size = max_batch_size;
+}
+
+void RuntimeOption::EnableTrtFP16() { trt_enable_fp16 = true; }
+
+void RuntimeOption::DisableTrtFP16() { trt_enable_fp16 = false; }
+
+void RuntimeOption::EnablePinnedMemory() { enable_pinned_memory = true; }
+
+void RuntimeOption::DisablePinnedMemory() { enable_pinned_memory = false; }
+
+void RuntimeOption::SetTrtCacheFile(const std::string& cache_file_path) {
+  trt_serialize_file = cache_file_path;
+}
+
+void RuntimeOption::SetOpenVINOStreams(int num_streams) {
+  ov_num_streams = num_streams;
+}
+
+bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
+                      const RuntimeOption& _option) {
+#ifdef ENABLE_POROS_BACKEND
+  option = _option;
+  auto poros_option = PorosBackendOption();
+  poros_option.use_gpu = (option.device == Device::GPU) ? true : false;
+  poros_option.gpu_id = option.device_id;
+  poros_option.long_to_int = option.long_to_int;
+  poros_option.use_nvidia_tf32 = option.use_nvidia_tf32;
+  poros_option.unconst_ops_thres = option.unconst_ops_thres;
+  poros_option.poros_file = option.poros_file;
+  poros_option.is_dynamic = option.is_dynamic;
+  poros_option.enable_fp16 = option.trt_enable_fp16;
+  poros_option.max_batch_size = option.trt_max_batch_size;
+  poros_option.max_workspace_size = option.trt_max_workspace_size;
+  FDASSERT(
+      option.model_format == ModelFormat::TORCHSCRIPT,
+      "PorosBackend only support model format of ModelFormat::TORCHSCRIPT.");
+  backend_ = utils::make_unique<PorosBackend>();
+  auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get());
+  FDASSERT(
+      casted_backend->Compile(option.model_file, prewarm_tensors, poros_option),
+      "Load model from Torchscript failed while initliazing PorosBackend.");
+#else
+  FDASSERT(false,
+           "PorosBackend is not available, please compiled with "
+           "ENABLE_POROS_BACKEND=ON.");
+#endif
+  return true;
+}
+
+void RuntimeOption::EnablePaddleTrtCollectShape() { pd_collect_shape = true; }
+
+void RuntimeOption::DisablePaddleTrtCollectShape() { pd_collect_shape = false; }
+
+void RuntimeOption::DisablePaddleTrtOPs(const std::vector<std::string>& ops) {
+  trt_disabled_ops_.insert(trt_disabled_ops_.end(), ops.begin(), ops.end());
+}
+
+void RuntimeOption::UseIpu(int device_num, int micro_batch_size,
+                           bool enable_pipelining, int batches_per_step) {
+#ifdef WITH_IPU
+  device = Device::IPU;
+  ipu_device_num = device_num;
+  ipu_micro_batch_size = micro_batch_size;
+  ipu_enable_pipelining = enable_pipelining;
+  ipu_batches_per_step = batches_per_step;
+#else
+  FDWARNING << "The FastDeploy didn't compile with IPU, will force to use CPU."
+            << std::endl;
+  device = Device::CPU;
+#endif
+}
+
+void RuntimeOption::SetIpuConfig(bool enable_fp16, int replica_num,
+                                 float available_memory_proportion,
+                                 bool enable_half_partial) {
+  ipu_enable_fp16 = enable_fp16;
+  ipu_replica_num = replica_num;
+  ipu_available_memory_proportion = available_memory_proportion;
+  ipu_enable_half_partial = enable_half_partial;
+}
+
+}  // namespace fastdeploy
diff --git a/fastdeploy/runtime/runtime_option.h b/fastdeploy/runtime/runtime_option.h
new file mode 100644
index 000000000..6b1f1caa7
--- /dev/null
+++ b/fastdeploy/runtime/runtime_option.h
@@ -0,0 +1,482 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*! \file runtime_option.h
+    \brief A brief file description.
+
+    More details
+ */
+
+#pragma once
+
+#include <algorithm>
+#include <map>
+#include <vector>
+#include "fastdeploy/runtime/enum_variables.h"
+#include "fastdeploy/backends/lite/option.h"
+#include "fastdeploy/backends/openvino/option.h"
+#include "fastdeploy/backends/ort/option.h"
+#include "fastdeploy/backends/paddle/option.h"
+#include "fastdeploy/backends/poros/option.h"
+#include "fastdeploy/backends/rknpu2/option.h"
+#include "fastdeploy/backends/sophgo/option.h"
+#include "fastdeploy/backends/tensorrt/option.h"
+
+namespace fastdeploy {
+
+/**
+ * @brief Get all the available inference backend in FastDeploy
+ */
+FASTDEPLOY_DECL std::vector<Backend> GetAvailableBackends();
+
+/**
+ * @brief Check if the inference backend available
+ */
+FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend);
+
+bool CheckModelFormat(const std::string& model_file,
+                      const ModelFormat& model_format);
+ModelFormat GuessModelFormat(const std::string& model_file);
+
+/*! @brief Option object used when create a new Runtime object
+ */
+struct FASTDEPLOY_DECL RuntimeOption {
+  /** \brief Set path of model file and parameter file
+   *
+   * \param[in] model_path Path of model file, e.g ResNet50/model.pdmodel for Paddle format model / ResNet50/model.onnx for ONNX format model
+   * \param[in] params_path Path of parameter file, this only used when the model format is Paddle, e.g Resnet50/model.pdiparams
+   * \param[in] format Format of the loaded model
+   */
+  void SetModelPath(const std::string& model_path,
+                    const std::string& params_path = "",
+                    const ModelFormat& format = ModelFormat::PADDLE);
+
+  /** \brief Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory
+   *
+   * \param[in] model_buffer The memory buffer of model
+   * \param[in] model_buffer_size The size of the model data
+   * \param[in] params_buffer The memory buffer of the combined parameters file
+   * \param[in] params_buffer_size The size of the combined parameters data
+   * \param[in] format Format of the loaded model
+   */
+  void SetModelBuffer(const char* model_buffer, size_t model_buffer_size,
+                      const char* params_buffer, size_t params_buffer_size,
+                      const ModelFormat& format = ModelFormat::PADDLE);
+
+  /// Use cpu to inference, the runtime will inference on CPU by default
+  void UseCpu();
+
+  /// Use Nvidia GPU to inference
+  void UseGpu(int gpu_id = 0);
+
+  void UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name =
+                     fastdeploy::rknpu2::CpuName::RK3588,
+                 fastdeploy::rknpu2::CoreMask rknpu2_core =
+                     fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0);
+
+  /// Use TimVX to inference
+  void UseTimVX();
+
+  /// Use Huawei Ascend to inference
+  void UseAscend();
+
+  ///
+  /// \brief Turn on KunlunXin XPU.
+  ///
+  /// \param kunlunxin_id the KunlunXin XPU card to use (default is 0).
+  /// \param l3_workspace_size The size of the video memory allocated by the l3
+  ///         cache, the maximum is 16M.
+  /// \param locked Whether the allocated L3 cache can be locked. If false,
+  ///       it means that the L3 cache is not locked, and the allocated L3
+  ///       cache can be shared by multiple models, and multiple models
+  ///       sharing the L3 cache will be executed sequentially on the card.
+  /// \param autotune Whether to autotune the conv operator in the model. If
+  ///       true, when the conv operator of a certain dimension is executed
+  ///       for the first time, it will automatically search for a better
+  ///       algorithm to improve the performance of subsequent conv operators
+  ///       of the same dimension.
+  /// \param autotune_file Specify the path of the autotune file. If
+  ///       autotune_file is specified, the algorithm specified in the
+  ///       file will be used and autotune will not be performed again.
+  /// \param precision Calculation accuracy of multi_encoder
+  /// \param adaptive_seqlen Is the input of multi_encoder variable length
+  /// \param enable_multi_stream Whether to enable the multi stream of
+  ///        KunlunXin XPU.
+  ///
+  void UseKunlunXin(int kunlunxin_id = 0, int l3_workspace_size = 0xfffc00,
+                    bool locked = false, bool autotune = true,
+                    const std::string& autotune_file = "",
+                    const std::string& precision = "int16",
+                    bool adaptive_seqlen = false,
+                    bool enable_multi_stream = false);
+
+  /// Use Sophgo to inference
+  void UseSophgo();
+
+  void SetExternalStream(void* external_stream);
+
+  /*
+   * @brief Set number of cpu threads while inference on CPU, by default it will decided by the different backends
+   */
+  void SetCpuThreadNum(int thread_num);
+
+  /// Set ORT graph opt level, default is decide by ONNX Runtime itself
+  void SetOrtGraphOptLevel(int level = -1);
+
+  /// Set Paddle Inference as inference backend, support CPU/GPU
+  void UsePaddleBackend();
+
+  /// Wrapper function of UsePaddleBackend()
+  void UsePaddleInferBackend() { return UsePaddleBackend(); }
+
+  /// Set ONNX Runtime as inference backend, support CPU/GPU
+  void UseOrtBackend();
+
+  /// Set SOPHGO Runtime as inference backend, support CPU/GPU
+  void UseSophgoBackend();
+
+  /// Set TensorRT as inference backend, only support GPU
+  void UseTrtBackend();
+
+  /// Set Poros backend as inference backend, support CPU/GPU
+  void UsePorosBackend();
+
+  /// Set OpenVINO as inference backend, only support CPU
+  void UseOpenVINOBackend();
+
+  /// Set Paddle Lite as inference backend, only support arm cpu
+  void UseLiteBackend();
+
+  /// Wrapper function of UseLiteBackend()
+  void UsePaddleLiteBackend() { return UseLiteBackend(); }
+
+  /// Set mkldnn switch while using Paddle Inference as inference backend
+  void SetPaddleMKLDNN(bool pd_mkldnn = true);
+
+  /*
+   * @brief If TensorRT backend is used, EnablePaddleToTrt will change to use Paddle Inference backend, and use its integrated TensorRT instead.
+   */
+  void EnablePaddleToTrt();
+
+  /**
+   * @brief Delete pass by name while using Paddle Inference as inference backend, this can be called multiple times to delete a set of passes
+   */
+  void DeletePaddleBackendPass(const std::string& delete_pass_name);
+
+  /**
+   * @brief Enable print debug information while using Paddle Inference as inference backend, the backend disable the debug information by default
+   */
+  void EnablePaddleLogInfo();
+
+  /**
+   * @brief Disable print debug information while using Paddle Inference as inference backend
+   */
+  void DisablePaddleLogInfo();
+
+  /**
+   * @brief Set shape cache size while using Paddle Inference with mkldnn, by default it will cache all the difference shape
+   */
+  void SetPaddleMKLDNNCacheSize(int size);
+
+  /**
+   * @brief Set device name for OpenVINO, default 'CPU', can also be 'AUTO', 'GPU', 'GPU.1'....
+   */
+  void SetOpenVINODevice(const std::string& name = "CPU");
+
+  /**
+   * @brief Set shape info for OpenVINO
+   */
+  void SetOpenVINOShapeInfo(
+      const std::map<std::string, std::vector<int64_t>>& shape_info) {
+    ov_shape_infos = shape_info;
+  }
+
+  /**
+   * @brief While use OpenVINO backend with intel GPU, use this interface to specify operators run on CPU
+   */
+  void SetOpenVINOCpuOperators(const std::vector<std::string>& operators) {
+    ov_cpu_operators = operators;
+  }
+
+  /**
+   * @brief Set optimzed model dir for Paddle Lite backend.
+   */
+  void SetLiteOptimizedModelDir(const std::string& optimized_model_dir);
+
+  /**
+   * @brief Set subgraph partition path for Paddle Lite backend.
+   */
+  void SetLiteSubgraphPartitionPath(
+      const std::string& nnadapter_subgraph_partition_config_path);
+
+  /**
+   * @brief Set subgraph partition path for Paddle Lite backend.
+   */
+  void SetLiteSubgraphPartitionConfigBuffer(
+      const std::string& nnadapter_subgraph_partition_config_buffer);
+
+  /**
+   * @brief Set device name for Paddle Lite backend.
+   */
+  void
+  SetLiteDeviceNames(const std::vector<std::string>& nnadapter_device_names);
+
+  /**
+   * @brief Set context properties for Paddle Lite backend.
+   */
+  void
+  SetLiteContextProperties(const std::string& nnadapter_context_properties);
+
+  /**
+   * @brief Set model cache dir for Paddle Lite backend.
+   */
+  void SetLiteModelCacheDir(const std::string& nnadapter_model_cache_dir);
+
+  /**
+   * @brief Set dynamic shape info for Paddle Lite backend.
+   */
+  void SetLiteDynamicShapeInfo(
+      const std::map<std::string, std::vector<std::vector<int64_t>>>&
+          nnadapter_dynamic_shape_info);
+
+  /**
+   * @brief Set mixed precision quantization config path for Paddle Lite backend.
+   */
+  void SetLiteMixedPrecisionQuantizationConfigPath(
+      const std::string& nnadapter_mixed_precision_quantization_config_path);
+
+  /**
+   * @brief enable half precision while use paddle lite backend
+   */
+  void EnableLiteFP16();
+
+  /**
+   * @brief disable half precision, change to full precision(float32)
+   */
+  void DisableLiteFP16();
+
+  /**
+    * @brief enable int8 precision while use paddle lite backend
+    */
+  void EnableLiteInt8();
+
+  /**
+    * @brief disable int8 precision, change to full precision(float32)
+    */
+  void DisableLiteInt8();
+
+  /**
+   * @brief Set power mode while using Paddle Lite as inference backend, mode(0: LITE_POWER_HIGH; 1: LITE_POWER_LOW; 2: LITE_POWER_FULL; 3: LITE_POWER_NO_BIND, 4: LITE_POWER_RAND_HIGH; 5: LITE_POWER_RAND_LOW, refer [paddle lite](https://paddle-lite.readthedocs.io/zh/latest/api_reference/cxx_api_doc.html#set-power-mode) for more details)
+   */
+  void SetLitePowerMode(LitePowerMode mode);
+
+  /** \brief Set shape range of input tensor for the model that contain dynamic input shape while using TensorRT backend
+   *
+   * \param[in] input_name The name of input for the model which is dynamic shape
+   * \param[in] min_shape The minimal shape for the input tensor
+   * \param[in] opt_shape The optimized shape for the input tensor, just set the most common shape, if set as default value, it will keep same with min_shape
+   * \param[in] max_shape The maximum shape for the input tensor, if set as default value, it will keep same with min_shape
+   */
+  void SetTrtInputShape(
+      const std::string& input_name, const std::vector<int32_t>& min_shape,
+      const std::vector<int32_t>& opt_shape = std::vector<int32_t>(),
+      const std::vector<int32_t>& max_shape = std::vector<int32_t>());
+
+  /// Set max_workspace_size for TensorRT, default 1<<30
+  void SetTrtMaxWorkspaceSize(size_t trt_max_workspace_size);
+
+  /// Set max_batch_size for TensorRT, default 32
+  void SetTrtMaxBatchSize(size_t max_batch_size);
+
+  /**
+   * @brief Enable FP16 inference while using TensorRT backend. Notice: not all the GPU device support FP16, on those device doesn't support FP16, FastDeploy will fallback to FP32 automaticly
+   */
+  void EnableTrtFP16();
+
+  /// Disable FP16 inference while using TensorRT backend
+  void DisableTrtFP16();
+
+  /**
+   * @brief Set cache file path while use TensorRT backend. Loadding a Paddle/ONNX model and initialize TensorRT will take a long time, by this interface it will save the tensorrt engine to `cache_file_path`, and load it directly while execute the code again
+   */
+  void SetTrtCacheFile(const std::string& cache_file_path);
+
+  /**
+   * @brief Enable pinned memory. Pinned memory can be utilized to speedup the data transfer between CPU and GPU. Currently it's only suppurted in TRT backend and Paddle Inference backend.
+   */
+  void EnablePinnedMemory();
+
+  /**
+   * @brief Disable pinned memory
+   */
+  void DisablePinnedMemory();
+
+  /**
+   * @brief Enable to collect shape in paddle trt backend
+   */
+  void EnablePaddleTrtCollectShape();
+
+  /**
+   * @brief Disable to collect shape in paddle trt backend
+   */
+  void DisablePaddleTrtCollectShape();
+
+  /**
+   * @brief Prevent ops running in paddle trt backend
+   */
+  void DisablePaddleTrtOPs(const std::vector<std::string>& ops);
+
+  /*
+   * @brief Set number of streams by the OpenVINO backends
+   */
+  void SetOpenVINOStreams(int num_streams);
+
+  /** \Use Graphcore IPU to inference.
+   *
+   * \param[in] device_num the number of IPUs.
+   * \param[in] micro_batch_size the batch size in the graph, only work when graph has no batch shape info.
+   * \param[in] enable_pipelining enable pipelining.
+   * \param[in] batches_per_step the number of batches per run in pipelining.
+   */
+  void UseIpu(int device_num = 1, int micro_batch_size = 1,
+              bool enable_pipelining = false, int batches_per_step = 1);
+
+  /** \brief Set IPU config.
+   *
+   * \param[in] enable_fp16 enable fp16.
+   * \param[in] replica_num the number of graph replication.
+   * \param[in] available_memory_proportion the available memory proportion for matmul/conv.
+   * \param[in] enable_half_partial enable fp16 partial for matmul, only work with fp16.
+   */
+  void SetIpuConfig(bool enable_fp16 = false, int replica_num = 1,
+                    float available_memory_proportion = 1.0,
+                    bool enable_half_partial = false);
+
+  Backend backend = Backend::UNKNOWN;
+  // for cpu inference and preprocess
+  // default will let the backend choose their own default value
+  int cpu_thread_num = -1;
+  int device_id = 0;
+
+  Device device = Device::CPU;
+
+  void* external_stream_ = nullptr;
+
+  bool enable_pinned_memory = false;
+
+  // ======Only for ORT Backend========
+  // -1 means use default value by ort
+  // 0: ORT_DISABLE_ALL 1: ORT_ENABLE_BASIC 2: ORT_ENABLE_EXTENDED 3:
+  // ORT_ENABLE_ALL
+  int ort_graph_opt_level = -1;
+  int ort_inter_op_num_threads = -1;
+  // 0: ORT_SEQUENTIAL 1: ORT_PARALLEL
+  int ort_execution_mode = -1;
+
+  // ======Only for Paddle Backend=====
+  bool pd_enable_mkldnn = true;
+  bool pd_enable_log_info = false;
+  bool pd_enable_trt = false;
+  bool pd_collect_shape = false;
+  int pd_mkldnn_cache_size = 1;
+  std::vector<std::string> pd_delete_pass_names;
+
+  // ======Only for Paddle IPU Backend =======
+  int ipu_device_num = 1;
+  int ipu_micro_batch_size = 1;
+  bool ipu_enable_pipelining = false;
+  int ipu_batches_per_step = 1;
+  bool ipu_enable_fp16 = false;
+  int ipu_replica_num = 1;
+  float ipu_available_memory_proportion = 1.0;
+  bool ipu_enable_half_partial = false;
+
+  // ======Only for Paddle Lite Backend=====
+  // 0: LITE_POWER_HIGH 1: LITE_POWER_LOW 2: LITE_POWER_FULL
+  // 3: LITE_POWER_NO_BIND 4: LITE_POWER_RAND_HIGH
+  // 5: LITE_POWER_RAND_LOW
+  LitePowerMode lite_power_mode = LitePowerMode::LITE_POWER_NO_BIND;
+  // enable int8 or not
+  bool lite_enable_int8 = false;
+  // enable fp16 or not
+  bool lite_enable_fp16 = false;
+  // optimized model dir for CxxConfig
+  std::string lite_optimized_model_dir = "";
+  std::string lite_nnadapter_subgraph_partition_config_path = "";
+  // and other nnadapter settings for CxxConfig
+  std::string lite_nnadapter_subgraph_partition_config_buffer = "";
+  std::string lite_nnadapter_context_properties = "";
+  std::string lite_nnadapter_model_cache_dir = "";
+  std::string lite_nnadapter_mixed_precision_quantization_config_path = "";
+  std::map<std::string, std::vector<std::vector<int64_t>>>
+      lite_nnadapter_dynamic_shape_info = {{"", {{0}}}};
+  std::vector<std::string> lite_nnadapter_device_names = {};
+
+  bool enable_timvx = false;
+  bool enable_ascend = false;
+  bool enable_kunlunxin = false;
+
+  // ======Only for Trt Backend=======
+  std::map<std::string, std::vector<int32_t>> trt_max_shape;
+  std::map<std::string, std::vector<int32_t>> trt_min_shape;
+  std::map<std::string, std::vector<int32_t>> trt_opt_shape;
+  std::string trt_serialize_file = "";
+  bool trt_enable_fp16 = false;
+  bool trt_enable_int8 = false;
+  size_t trt_max_batch_size = 1;
+  size_t trt_max_workspace_size = 1 << 30;
+  // ======Only for PaddleTrt Backend=======
+  std::vector<std::string> trt_disabled_ops_{};
+
+  // ======Only for Poros Backend=======
+  bool is_dynamic = false;
+  bool long_to_int = true;
+  bool use_nvidia_tf32 = false;
+  int unconst_ops_thres = -1;
+  std::string poros_file = "";
+
+  // ======Only for OpenVINO Backend=======
+  int ov_num_streams = 0;
+  std::string openvino_device = "CPU";
+  std::map<std::string, std::vector<int64_t>> ov_shape_infos;
+  std::vector<std::string> ov_cpu_operators;
+
+  // ======Only for RKNPU2 Backend=======
+  fastdeploy::rknpu2::CpuName rknpu2_cpu_name_ =
+      fastdeploy::rknpu2::CpuName::RK3588;
+  fastdeploy::rknpu2::CoreMask rknpu2_core_mask_ =
+      fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO;
+
+  // ======Only for KunlunXin XPU Backend=======
+  int kunlunxin_l3_workspace_size = 0xfffc00;
+  bool kunlunxin_locked = false;
+  bool kunlunxin_autotune = true;
+  std::string kunlunxin_autotune_file = "";
+  std::string kunlunxin_precision = "int16";
+  bool kunlunxin_adaptive_seqlen = false;
+  bool kunlunxin_enable_multi_stream = false;
+
+  std::string model_file = "";   // Path of model file
+  std::string params_file = "";  // Path of parameters file, can be empty
+  // format of input model
+  ModelFormat model_format = ModelFormat::PADDLE;
+
+  std::string model_buffer_ = "";
+  std::string params_buffer_ = "";
+  size_t model_buffer_size_ = 0;
+  size_t params_buffer_size_ = 0;
+  bool model_from_memory_ = false;
+};
+
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc b/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc
index cb6a460c8..1c93ef5b4 100755
--- a/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc
+++ b/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc
@@ -75,14 +75,14 @@ YOLOv7End2EndTRT::YOLOv7End2EndTRT(const std::string& model_file,
   runtime_option.model_format = model_format;
   runtime_option.model_file = model_file;
   if (runtime_option.device != Device::GPU) {
-    FDWARNING << Str(runtime_option.device)
+    FDWARNING << runtime_option.device
               << " is not support for YOLOv7End2EndTRT,"
               << "will fallback to Device::GPU." << std::endl;
     runtime_option.device = Device::GPU;
   }
   if (runtime_option.backend != Backend::UNKNOWN) {
     if (runtime_option.backend != Backend::TRT) {
-      FDWARNING << Str(runtime_option.backend)
+      FDWARNING << runtime_option.backend
                 << " is not support for YOLOv7End2EndTRT,"
                 << "will fallback to Backend::TRT." << std::endl;
       runtime_option.backend = Backend::TRT;
@@ -347,4 +347,4 @@ bool YOLOv7End2EndTRT::Predict(cv::Mat* im, DetectionResult* result,
 
 }  // namespace detection
 }  // namespace vision
-}  // namespace fastdeploy
\ No newline at end of file
+}  // namespace fastdeploy