[Other] Upgrade runtime module (#1068)

* Upgrade runtime module * Update option.h * Fix build error * Move enumerates * little modification * little modification * little modification: * Remove some useless flags
2025-10-06 00:57:33 +08:00 · 2023-01-06 13:44:05 +08:00
parent 1135d33dd7
commit d7a65e5c70
31 changed files with 1838 additions and 1778 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -71,15 +71,12 @@ option(WITH_ASCEND "Whether to compile for Huawei Ascend deploy." OFF)
 option(WITH_TIMVX "Whether to compile for TIMVX deploy." OFF)
 option(WITH_KUNLUNXIN "Whether to compile for KunlunXin XPU deploy." OFF)
 option(WITH_TESTING "Whether to compile with unittest." OFF)
 ############################# Options for Android cross compiling #########################
 option(WITH_OPENCV_STATIC "Use OpenCV static lib for Android." OFF)
 option(WITH_LITE_STATIC "Use Paddle Lite static lib for Android." OFF)
 option(WITH_OPENMP "Use OpenMP support for Android." OFF)
 # Please don't open this flag now, some bugs exists.
 # Only support Linux Now
 # option(ENABLE_OPENCV_CUDA "Whether to enable opencv with cuda, this will allow process image with GPU." OFF)
 # Whether to build fastdeploy with vision/text/... examples, only for testings.
 option(BUILD_EXAMPLES "Whether to build fastdeploy with vision examples" OFF)
@@ -187,7 +184,6 @@ add_definitions(-DFASTDEPLOY_LIB)
 configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/core/config.h.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/core/config.h)
 configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/main.cc.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/main.cc)
 file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*.cc)
 file(GLOB_RECURSE FDTENSOR_FUNC_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/function/*.cc)
 file(GLOB_RECURSE FDTENSOR_FUNC_CUDA_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/function/*.cu)
 file(GLOB_RECURSE DEPLOY_OP_CUDA_KERNEL_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/op_cuda_kernels/*.cu)
 file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/ort/*.cc)
@@ -195,7 +191,7 @@ file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fas
 file(GLOB_RECURSE DEPLOY_POROS_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/poros/*.cc)
 file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cpp)
 file(GLOB_RECURSE DEPLOY_OPENVINO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/openvino/*.cc)
-file(GLOB_RECURSE DEPLOY_RKNPU2_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/rknpu/rknpu2/*.cc)
+file(GLOB_RECURSE DEPLOY_RKNPU2_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/rknpu2/*.cc)
 file(GLOB_RECURSE DEPLOY_SOPHGO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/sophgo/*.cc)
 file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/lite/*.cc)
 file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cc)
@@ -420,15 +416,6 @@ endif()
 if(ENABLE_VISION)
  add_definitions(-DENABLE_VISION)
  add_definitions(-DENABLE_VISION_VISUALIZE)
  if(ENABLE_OPENCV_CUDA)
    if(NOT WITH_GPU)
      message(FATAL_ERROR "ENABLE_OPENCV_CUDA is available on Linux and WITH_GPU=ON, but now WITH_GPU=OFF.")
    endif()
    if(APPLE OR ANDROID OR IOS OR WIN32)
      message(FATAL_ERROR "Cannot enable opencv with cuda in mac/ios/android/windows os, please set -DENABLE_OPENCV_CUDA=OFF.")
    endif()
    add_definitions(-DENABLE_OPENCV_CUDA)
  endif()
  add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/yaml-cpp)
  list(APPEND DEPEND_LIBS yaml-cpp)
  if(BUILD_CUDA_SRC)
--- a/FastDeploy.cmake.in
+++ b/FastDeploy.cmake.in
@@ -20,7 +20,6 @@ set(PADDLEINFERENCE_VERSION @PADDLEINFERENCE_VERSION@)
 set(OPENVINO_VERSION @OPENVINO_VERSION@)
 set(WITH_LITE_STATIC @WITH_LITE_STATIC@)
 set(WITH_OPENCV_STATIC @WITH_OPENCV_STATIC@)
 # set(ENABLE_OPENCV_CUDA @ENABLE_OPENCV_CUDA@)
 set(OPENCV_FILENAME @OPENCV_FILENAME@)
 set(OPENVINO_FILENAME @OPENVINO_FILENAME@)
 set(PADDLELITE_FILENAME @PADDLELITE_FILENAME@)
--- a/cmake/opencv.cmake
+++ b/cmake/opencv.cmake
@@ -42,12 +42,6 @@ else()
  if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
    set(OPENCV_FILENAME "opencv-linux-aarch64-3.4.14")
  endif()
  if(ENABLE_OPENCV_CUDA)
    if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
      message(FATAL_ERROR "Cannot set ENABLE_OPENCV_CUDA=ON while in linux-aarch64 platform.")
    endif()
    set(OPENCV_FILENAME "opencv-linux-x64-gpu-3.4.16")
  endif()
 endif()
 if(NOT OPENCV_FILENAME)
--- a/cmake/timvx.cmake
+++ b/cmake/timvx.cmake
@@ -29,11 +29,6 @@ if(${WITH_GPU})
    set(WITH_GPU OFF)
 endif()
 if(${ENABLE_OPENCV_CUDA})
    message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_OPENCV_CUDA=OFF") 
    set(ENABLE_OPENCV_CUDA OFF) 
 endif()
 if(${ENABLE_TEXT})
    set(ENABLE_TEXT OFF CACHE BOOL "Force ENABLE_TEXT OFF" FORCE)
    message(STATUS "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_TEXT=OFF")
--- a/fastdeploy/backends/lite/option.h
+++ b/fastdeploy/backends/lite/option.h
@@ -14,6 +14,7 @@
 #pragma once
 #include "fastdeploy/core/fd_type.h"
 #include <iostream>
 #include <memory>
 #include <string>
@@ -21,6 +22,16 @@
 #include <map>
 namespace fastdeploy {
 /*! Paddle Lite power mode for mobile device. */
 enum LitePowerMode {
  LITE_POWER_HIGH = 0,       ///< Use Lite Backend with high power mode
  LITE_POWER_LOW = 1,        ///< Use Lite Backend with low power mode
  LITE_POWER_FULL = 2,       ///< Use Lite Backend with full power mode
  LITE_POWER_NO_BIND = 3,    ///< Use Lite Backend with no bind power mode
  LITE_POWER_RAND_HIGH = 4,  ///< Use Lite Backend with rand high mode
  LITE_POWER_RAND_LOW = 5    ///< Use Lite Backend with rand low power mode
 };
 struct LiteBackendOption {
  // cpu num threads
  int threads = 1;
--- a/fastdeploy/backends/openvino/option.h
+++ b/fastdeploy/backends/openvino/option.h
@@ -14,6 +14,7 @@
 #pragma once
 #include "fastdeploy/core/fd_type.h"
 #include <iostream>
 #include <memory>
 #include <string>
--- a/fastdeploy/backends/ort/option.h
+++ b/fastdeploy/backends/ort/option.h
@@ -14,6 +14,7 @@
 #pragma once
 #include "fastdeploy/core/fd_type.h"
 #include <iostream>
 #include <memory>
 #include <string>
--- a/fastdeploy/backends/paddle/option.h
+++ b/fastdeploy/backends/paddle/option.h
@@ -14,6 +14,7 @@
 #pragma once
 #include "fastdeploy/core/fd_type.h"
 #include <iostream>
 #include <memory>
 #include <string>
--- a/fastdeploy/backends/paddle/paddle_backend.cc
+++ b/fastdeploy/backends/paddle/paddle_backend.cc
@@ -31,6 +31,8 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
      config_.Exp_DisableTensorRtOPs(option.trt_disabled_ops_);
      auto precision = paddle_infer::PrecisionType::kFloat32;
      if (option.trt_option.enable_fp16) {
        FDINFO << "Will try to use tensorrt fp16 inference with Paddle Backend."
               << std::endl;
        precision = paddle_infer::PrecisionType::kHalf;
      }
      bool use_static = false;
--- a/fastdeploy/backends/poros/option.h
+++ b/fastdeploy/backends/poros/option.h
@@ -14,6 +14,7 @@
 #pragma once
 #include "fastdeploy/core/fd_type.h"
 #include <iostream>
 #include <memory>
 #include <string>
--- a/fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h
+++ b/fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h
--- a/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.cc
+++ b/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.cc
@@ -11,7 +11,7 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h"
+#include "fastdeploy/backends/rknpu2/rknpu2_backend.h"
 #include "fastdeploy/utils/perf.h"
 namespace fastdeploy {
 RKNPU2Backend::~RKNPU2Backend() {
@@ -478,4 +478,4 @@ RKNPU2Backend::FDDataTypeToRknnTensorType(fastdeploy::FDDataType type) {
  FDERROR << "rknn_tensor_type don't support this type" << std::endl;
  return RKNN_TENSOR_TYPE_MAX;
 }
-}  // namespace fastdeploy
+}  // namespace fastdeploy
--- a/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h
+++ b/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h
@@ -14,7 +14,7 @@
 #pragma once
 #include "fastdeploy/backends/backend.h"
-#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h"
+#include "fastdeploy/backends/rknpu2/option.h"
 #include "fastdeploy/core/fd_tensor.h"
 #include "rknn_api.h"  // NOLINT
 #include <cstring>
--- a/fastdeploy/backends/sophgo/option.h
+++ b/fastdeploy/backends/sophgo/option.h
@@ -13,6 +13,7 @@
 // limitations under the License.
 #pragma once
 #include "fastdeploy/core/fd_type.h"
 #include <cstring>
 #include <iostream>
 #include <memory>
--- a/fastdeploy/backends/tensorrt/option.h
+++ b/fastdeploy/backends/tensorrt/option.h
@@ -13,6 +13,7 @@
 // limitations under the License.
 #pragma once
 #include "fastdeploy/core/fd_type.h"
 #include <iostream>
 #include <map>
 #include <string>
--- a/fastdeploy/core/config.h.in
+++ b/fastdeploy/core/config.h.in
@@ -57,10 +57,6 @@
 #cmakedefine ENABLE_TEXT
 #endif
 #ifndef ENABLE_OPENCV_CUDA
 #cmakedefine ENABLE_OPENCV_CUDA
 #endif
 #ifdef ENABLE_VISION
 #ifndef ENABLE_VISION_VISUALIZE
 #define ENABLE_VISION_VISUALIZE
--- a/fastdeploy/core/fd_tensor.h
+++ b/fastdeploy/core/fd_tensor.h
@@ -21,11 +21,11 @@
 #include "fastdeploy/core/allocate.h"
 #include "fastdeploy/core/fd_scalar.h"
 #include "fastdeploy/core/fd_type.h"
 #include "fastdeploy/runtime/enum_variables.h"
 namespace fastdeploy {
 struct FASTDEPLOY_DECL FDTensor {
  // std::vector<int8_t> data;
  void* buffer_ = nullptr;
  std::vector<int64_t> shape = {0};
--- a/fastdeploy/core/fd_type.cc
+++ b/fastdeploy/core/fd_type.cc
@@ -44,70 +44,6 @@ int FDDataTypeSize(const FDDataType& data_type) {
  return -1;
 }
 std::string Str(const Device& d) {
  std::string out;
  switch (d) {
    case Device::CPU:
      out = "Device::CPU";
      break;
    case Device::GPU:
      out = "Device::GPU";
      break;
    case Device::RKNPU:
      out = "Device::RKNPU";
      break;
    case Device::SOPHGOTPUD:
      out = "Device::SOPHGOTPUD";
      break;
    case Device::IPU:
      out = "Device::IPU";
      break;
    case Device::TIMVX:
      out = "Device::TIMVX";
      break;
    case Device::ASCEND:
      out = "Device::ASCEND";
      break;
    case Device::KUNLUNXIN:
      out = "Device::KUNLUNXIN";
      break;
    default:
      out = "Device::UNKOWN";
  }
  return out;
 }
 std::ostream& operator<<(std::ostream& out,const Device& d){
  switch (d) {
  case Device::CPU:
    out << "Device::CPU";
    break;
  case Device::GPU:
    out << "Device::GPU";
    break;
  case Device::RKNPU:
    out << "Device::RKNPU";
    break;
  case Device::SOPHGOTPUD:
    out << "Device::SOPHGOTPUD";
    break;
  case Device::TIMVX:
    out << "Device::TIMVX";
    break;
  case Device::KUNLUNXIN:
    out << "Device::KUNLUNXIN";
    break;
  case Device::ASCEND:
    out << "Device::ASCEND";
    break;
  default:
    out << "Device::UNKOWN";
  }
  return out;
 }
 std::string Str(const FDDataType& fdt) {
  std::string out;
  switch (fdt) {
@@ -144,37 +80,37 @@ std::string Str(const FDDataType& fdt) {
  return out;
 }
-std::ostream& operator<<(std::ostream& out,const FDDataType& fdt){
+std::ostream& operator<<(std::ostream& out, const FDDataType& fdt) {
  switch (fdt) {
-  case FDDataType::BOOL:
+    case FDDataType::BOOL:
-    out << "FDDataType::BOOL";
+      out << "FDDataType::BOOL";
-    break;
+      break;
-  case FDDataType::INT16:
+    case FDDataType::INT16:
-    out << "FDDataType::INT16";
+      out << "FDDataType::INT16";
-    break;
+      break;
-  case FDDataType::INT32:
+    case FDDataType::INT32:
-    out << "FDDataType::INT32";
+      out << "FDDataType::INT32";
-    break;
+      break;
-  case FDDataType::INT64:
+    case FDDataType::INT64:
-    out << "FDDataType::INT64";
+      out << "FDDataType::INT64";
-    break;
+      break;
-  case FDDataType::FP32:
+    case FDDataType::FP32:
-    out << "FDDataType::FP32";
+      out << "FDDataType::FP32";
-    break;
+      break;
-  case FDDataType::FP64:
+    case FDDataType::FP64:
-    out << "FDDataType::FP64";
+      out << "FDDataType::FP64";
-    break;
+      break;
-  case FDDataType::FP16:
+    case FDDataType::FP16:
-    out << "FDDataType::FP16";
+      out << "FDDataType::FP16";
-    break;
+      break;
-  case FDDataType::UINT8:
+    case FDDataType::UINT8:
-    out << "FDDataType::UINT8";
+      out << "FDDataType::UINT8";
-    break;
+      break;
-  case FDDataType::INT8:
+    case FDDataType::INT8:
-    out << "FDDataType::INT8";
+      out << "FDDataType::INT8";
-    break;
+      break;
-  default:
+    default:
-    out << "FDDataType::UNKNOWN";
+      out << "FDDataType::UNKNOWN";
  }
  return out;
 }
@@ -206,35 +142,4 @@ const FDDataType TypeToDataType<uint8_t>::dtype = UINT8;
 template <>
 const FDDataType TypeToDataType<int8_t>::dtype = INT8;
 std::string Str(const ModelFormat& f) {
  if (f == ModelFormat::PADDLE) {
    return "ModelFormat::PADDLE";
  } else if (f == ModelFormat::ONNX) {
    return "ModelFormat::ONNX";
  } else if (f == ModelFormat::RKNN) {
    return "ModelFormat::RKNN";
  } else if (f == ModelFormat::SOPHGO) {
    return "ModelFormat::SOPHGO";
  } else if (f == ModelFormat::TORCHSCRIPT) {
    return "ModelFormat::TORCHSCRIPT";
  }
  return "UNKNOWN-ModelFormat";
 }
 std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
  if (format == ModelFormat::PADDLE) {
    out << "ModelFormat::PADDLE";
  } else if (format == ModelFormat::ONNX) {
    out << "ModelFormat::ONNX";
  } else if (format == ModelFormat::RKNN) {
    out << "ModelFormat::RKNN";
  } else if (format == ModelFormat::SOPHGO) {
    out << "ModelFormat::SOPHGO";
  } else if (format == ModelFormat::TORCHSCRIPT) {
    out << "ModelFormat::TORCHSCRIPT";
  }
  out << "UNKNOWN-ModelFormat";
  return out;
 }
 }  // namespace fastdeploy
--- a/fastdeploy/core/fd_type.h
+++ b/fastdeploy/core/fd_type.h
@@ -22,11 +22,6 @@
 namespace fastdeploy {
 enum FASTDEPLOY_DECL Device {CPU, GPU, RKNPU, IPU, TIMVX, KUNLUNXIN, ASCEND,
                              SOPHGOTPUD};
 FASTDEPLOY_DECL std::string Str(const Device& d);
 enum FASTDEPLOY_DECL FDDataType {
  BOOL,
  INT16,
@@ -52,7 +47,6 @@ enum FASTDEPLOY_DECL FDDataType {
  INT8
 };
 FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, const Device& d);
 FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
                                         const FDDataType& fdt);
@@ -66,17 +60,4 @@ struct FASTDEPLOY_DECL TypeToDataType {
  static const FDDataType dtype;
 };
 /*! Deep learning model format */
 enum ModelFormat {
  AUTOREC,      ///< Auto recognize the model format by model file name
  PADDLE,       ///< Model with paddlepaddle format
  ONNX,         ///< Model with ONNX format
  RKNN,         ///< Model with RKNN format
  TORCHSCRIPT,  ///< Model with TorchScript format
  SOPHGO,       ///< Model with SOPHGO format
 };
 FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
                                         const ModelFormat& format);
 }  // namespace fastdeploy
--- a/fastdeploy/fastdeploy_model.h
+++ b/fastdeploy/fastdeploy_model.h
@@ -121,9 +121,7 @@ class FASTDEPLOY_DECL FastDeployModel {
    std::vector<FDTensor>().swap(reused_output_tensors_);
  }
-  virtual fastdeploy::Runtime* CloneRuntime() {
+  virtual fastdeploy::Runtime* CloneRuntime() { return runtime_->Clone(); }
    return runtime_->Clone();
  }
  virtual bool SetRuntime(fastdeploy::Runtime* clone_runtime) {
    runtime_ = std::unique_ptr<Runtime>(clone_runtime);
--- a/fastdeploy/pybind/main.h
+++ b/fastdeploy/pybind/main.h
@@ -21,7 +21,7 @@
 #include <type_traits>
-#include "fastdeploy/runtime.h"
+#include "fastdeploy/runtime/runtime.h"
 #ifdef ENABLE_VISION
 #include "fastdeploy/vision.h"
--- a/fastdeploy/pybind/rknpu2_config_pybind.cc
+++ b/fastdeploy/pybind/rknpu2_config_pybind.cc
@@ -11,23 +11,27 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h"
+#include "fastdeploy/backends/rknpu2/option.h"
 #include "fastdeploy/pybind/main.h"
 namespace fastdeploy {
 void BindRKNPU2Config(pybind11::module& m) {
-  pybind11::enum_<fastdeploy::rknpu2::CpuName>(m, "CpuName", pybind11::arithmetic(),
+  pybind11::enum_<fastdeploy::rknpu2::CpuName>(
-                           "CpuName for inference.")
+      m, "CpuName", pybind11::arithmetic(), "CpuName for inference.")
      .value("RK356X", fastdeploy::rknpu2::CpuName::RK356X)
      .value("RK3588", fastdeploy::rknpu2::CpuName::RK3588)
      .value("UNDEFINED", fastdeploy::rknpu2::CpuName::UNDEFINED);
-  pybind11::enum_<fastdeploy::rknpu2::CoreMask>(m, "CoreMask", pybind11::arithmetic(),
+  pybind11::enum_<fastdeploy::rknpu2::CoreMask>(
-                            "CoreMask for inference.")
+      m, "CoreMask", pybind11::arithmetic(), "CoreMask for inference.")
-      .value("RKNN_NPU_CORE_AUTO", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO)
+      .value("RKNN_NPU_CORE_AUTO",
             fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO)
      .value("RKNN_NPU_CORE_0", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0)
      .value("RKNN_NPU_CORE_1", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_1)
      .value("RKNN_NPU_CORE_2", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_2)
-      .value("RKNN_NPU_CORE_0_1", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0_1)
+      .value("RKNN_NPU_CORE_0_1",
-      .value("RKNN_NPU_CORE_0_1_2", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0_1_2)
+             fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0_1)
-      .value("RKNN_NPU_CORE_UNDEFINED", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_UNDEFINED);
+      .value("RKNN_NPU_CORE_0_1_2",
             fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0_1_2)
      .value("RKNN_NPU_CORE_UNDEFINED",
             fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_UNDEFINED);
 }
-} // namespace fastdeploy
+}  // namespace fastdeploy
--- a/fastdeploy/runtime.cc
+++ b/fastdeploy/runtime.cc
--- a/fastdeploy/runtime.h
+++ b/fastdeploy/runtime.h
@@ -19,573 +19,5 @@
 */
 #pragma once
-
+#include "fastdeploy/core/config.h"
-#include <algorithm>
+#include "fastdeploy/runtime/runtime.h"
 #include <map>
 #include <vector>
 #include "backends/rknpu/rknpu2/rknpu2_config.h"
 #include "fastdeploy/backends/backend.h"
 #include "fastdeploy/utils/perf.h"
 /** \brief All C++ FastDeploy APIs are defined inside this namespace
 *
 */
 namespace fastdeploy {
 /*! Inference backend supported in FastDeploy */
 enum Backend {
  UNKNOWN,  ///< Unknown inference backend
  ORT,  ///< ONNX Runtime, support Paddle/ONNX format model, CPU / Nvidia GPU
  TRT,  ///< TensorRT, support Paddle/ONNX format model, Nvidia GPU only
  PDINFER,  ///< Paddle Inference, support Paddle format model, CPU / Nvidia GPU
  POROS,    ///< Poros, support TorchScript format model, CPU / Nvidia GPU
  OPENVINO,  ///< Intel OpenVINO, support Paddle/ONNX format, CPU only
  LITE,      ///< Paddle Lite, support Paddle format model, ARM CPU only
  RKNPU2,    ///< RKNPU2, support RKNN format model, Rockchip NPU only
  SOPHGOTPU,   ///< SOPHGOTPU, support SOPHGO format model, Sophgo TPU only
 };
 FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
                                         const Backend& backend);
 /*! Paddle Lite power mode for mobile device. */
 enum LitePowerMode {
  LITE_POWER_HIGH = 0,       ///< Use Lite Backend with high power mode
  LITE_POWER_LOW = 1,        ///< Use Lite Backend with low power mode
  LITE_POWER_FULL = 2,       ///< Use Lite Backend with full power mode
  LITE_POWER_NO_BIND = 3,    ///< Use Lite Backend with no bind power mode
  LITE_POWER_RAND_HIGH = 4,  ///< Use Lite Backend with rand high mode
  LITE_POWER_RAND_LOW = 5    ///< Use Lite Backend with rand low power mode
 };
 FASTDEPLOY_DECL std::string Str(const Backend& b);
 FASTDEPLOY_DECL std::string Str(const ModelFormat& f);
 /**
 * @brief Get all the available inference backend in FastDeploy
 */
 FASTDEPLOY_DECL std::vector<Backend> GetAvailableBackends();
 /**
 * @brief Check if the inference backend available
 */
 FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend);
 bool CheckModelFormat(const std::string& model_file,
                      const ModelFormat& model_format);
 ModelFormat GuessModelFormat(const std::string& model_file);
 /*! @brief Option object used when create a new Runtime object
 */
 struct FASTDEPLOY_DECL RuntimeOption {
  /** \brief Set path of model file and parameter file
   *
   * \param[in] model_path Path of model file, e.g ResNet50/model.pdmodel for Paddle format model / ResNet50/model.onnx for ONNX format model
   * \param[in] params_path Path of parameter file, this only used when the model format is Paddle, e.g Resnet50/model.pdiparams
   * \param[in] format Format of the loaded model
   */
  void SetModelPath(const std::string& model_path,
                    const std::string& params_path = "",
                    const ModelFormat& format = ModelFormat::PADDLE);
  /** \brief Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory
   *
   * \param[in] model_buffer The memory buffer of model
   * \param[in] model_buffer_size The size of the model data
   * \param[in] params_buffer The memory buffer of the combined parameters file
   * \param[in] params_buffer_size The size of the combined parameters data
   * \param[in] format Format of the loaded model
   */
  void SetModelBuffer(const char * model_buffer,
                      size_t model_buffer_size,
                      const char * params_buffer,
                      size_t params_buffer_size,
                      const ModelFormat& format = ModelFormat::PADDLE);
  /// Use cpu to inference, the runtime will inference on CPU by default
  void UseCpu();
  /// Use Nvidia GPU to inference
  void UseGpu(int gpu_id = 0);
  void UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name =
                     fastdeploy::rknpu2::CpuName::RK3588,
                 fastdeploy::rknpu2::CoreMask rknpu2_core =
                     fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0);
  /// Use TimVX to inference
  void UseTimVX();
  /// Use Huawei Ascend to inference
  void UseAscend();
  ///
  /// \brief Turn on KunlunXin XPU.
  ///
  /// \param kunlunxin_id the KunlunXin XPU card to use (default is 0).
  /// \param l3_workspace_size The size of the video memory allocated by the l3
  ///         cache, the maximum is 16M.
  /// \param locked Whether the allocated L3 cache can be locked. If false,
  ///       it means that the L3 cache is not locked, and the allocated L3
  ///       cache can be shared by multiple models, and multiple models
  ///       sharing the L3 cache will be executed sequentially on the card.
  /// \param autotune Whether to autotune the conv operator in the model. If
  ///       true, when the conv operator of a certain dimension is executed
  ///       for the first time, it will automatically search for a better
  ///       algorithm to improve the performance of subsequent conv operators
  ///       of the same dimension.
  /// \param autotune_file Specify the path of the autotune file. If
  ///       autotune_file is specified, the algorithm specified in the
  ///       file will be used and autotune will not be performed again.
  /// \param precision Calculation accuracy of multi_encoder
  /// \param adaptive_seqlen Is the input of multi_encoder variable length
  /// \param enable_multi_stream Whether to enable the multi stream of
  ///        KunlunXin XPU.
  ///
  void UseKunlunXin(int kunlunxin_id = 0,
              int l3_workspace_size = 0xfffc00,
              bool locked = false,
              bool autotune = true,
              const std::string& autotune_file = "",
              const std::string& precision = "int16",
              bool adaptive_seqlen = false,
              bool enable_multi_stream = false);
  /// Use Sophgo to inference
  void UseSophgo();
  void SetExternalStream(void* external_stream);
  /*
   * @brief Set number of cpu threads while inference on CPU, by default it will decided by the different backends
   */
  void SetCpuThreadNum(int thread_num);
  /// Set ORT graph opt level, default is decide by ONNX Runtime itself
  void SetOrtGraphOptLevel(int level = -1);
  /// Set Paddle Inference as inference backend, support CPU/GPU
  void UsePaddleBackend();
  /// Wrapper function of UsePaddleBackend()
  void UsePaddleInferBackend() { return UsePaddleBackend(); }
  /// Set ONNX Runtime as inference backend, support CPU/GPU
  void UseOrtBackend();
  /// Set SOPHGO Runtime as inference backend, support CPU/GPU
  void UseSophgoBackend();
  /// Set TensorRT as inference backend, only support GPU
  void UseTrtBackend();
  /// Set Poros backend as inference backend, support CPU/GPU
  void UsePorosBackend();
  /// Set OpenVINO as inference backend, only support CPU
  void UseOpenVINOBackend();
  /// Set Paddle Lite as inference backend, only support arm cpu
  void UseLiteBackend();
  /// Wrapper function of UseLiteBackend()
  void UsePaddleLiteBackend() { return UseLiteBackend(); }
  /// Set mkldnn switch while using Paddle Inference as inference backend
  void SetPaddleMKLDNN(bool pd_mkldnn = true);
  /*
   * @brief If TensorRT backend is used, EnablePaddleToTrt will change to use Paddle Inference backend, and use its integrated TensorRT instead.
   */
  void EnablePaddleToTrt();
  /**
   * @brief Delete pass by name while using Paddle Inference as inference backend, this can be called multiple times to delete a set of passes
   */
  void DeletePaddleBackendPass(const std::string& delete_pass_name);
  /**
   * @brief Enable print debug information while using Paddle Inference as inference backend, the backend disable the debug information by default
   */
  void EnablePaddleLogInfo();
  /**
   * @brief Disable print debug information while using Paddle Inference as inference backend
   */
  void DisablePaddleLogInfo();
  /**
   * @brief Set shape cache size while using Paddle Inference with mkldnn, by default it will cache all the difference shape
   */
  void SetPaddleMKLDNNCacheSize(int size);
  /**
   * @brief Set device name for OpenVINO, default 'CPU', can also be 'AUTO', 'GPU', 'GPU.1'....
   */
  void SetOpenVINODevice(const std::string& name = "CPU");
  /**
   * @brief Set shape info for OpenVINO
   */
  void SetOpenVINOShapeInfo(
      const std::map<std::string, std::vector<int64_t>>& shape_info) {
    ov_shape_infos = shape_info;
  }
  /**
   * @brief While use OpenVINO backend with intel GPU, use this interface to specify operators run on CPU
   */
  void SetOpenVINOCpuOperators(const std::vector<std::string>& operators) {
    ov_cpu_operators = operators;
  }
  /**
   * @brief Set optimzed model dir for Paddle Lite backend.
   */
  void SetLiteOptimizedModelDir(const std::string& optimized_model_dir);
  /**
   * @brief Set subgraph partition path for Paddle Lite backend.
   */
  void SetLiteSubgraphPartitionPath(
      const std::string& nnadapter_subgraph_partition_config_path);
  /**
   * @brief Set subgraph partition path for Paddle Lite backend.
   */
  void SetLiteSubgraphPartitionConfigBuffer(
      const std::string& nnadapter_subgraph_partition_config_buffer);
  /**
   * @brief Set device name for Paddle Lite backend.
   */
  void SetLiteDeviceNames(
      const std::vector<std::string>& nnadapter_device_names);
  /**
   * @brief Set context properties for Paddle Lite backend.
   */
  void  SetLiteContextProperties(
      const std::string& nnadapter_context_properties);
  /**
   * @brief Set model cache dir for Paddle Lite backend.
   */
  void SetLiteModelCacheDir(
      const std::string& nnadapter_model_cache_dir);
  /**
   * @brief Set dynamic shape info for Paddle Lite backend.
   */
  void SetLiteDynamicShapeInfo(
      const std::map<std::string, std::vector<std::vector<int64_t>>>&
          nnadapter_dynamic_shape_info);
  /**
   * @brief Set mixed precision quantization config path for Paddle Lite backend.
   */
  void SetLiteMixedPrecisionQuantizationConfigPath(
      const std::string& nnadapter_mixed_precision_quantization_config_path);
  /**
   * @brief enable half precision while use paddle lite backend
   */
  void EnableLiteFP16();
  /**
   * @brief disable half precision, change to full precision(float32)
   */
  void DisableLiteFP16();
  /**
    * @brief enable int8 precision while use paddle lite backend
    */
  void EnableLiteInt8();
  /**
    * @brief disable int8 precision, change to full precision(float32)
    */
  void DisableLiteInt8();
  /**
   * @brief Set power mode while using Paddle Lite as inference backend, mode(0: LITE_POWER_HIGH; 1: LITE_POWER_LOW; 2: LITE_POWER_FULL; 3: LITE_POWER_NO_BIND, 4: LITE_POWER_RAND_HIGH; 5: LITE_POWER_RAND_LOW, refer [paddle lite](https://paddle-lite.readthedocs.io/zh/latest/api_reference/cxx_api_doc.html#set-power-mode) for more details)
   */
  void SetLitePowerMode(LitePowerMode mode);
  /** \brief Set shape range of input tensor for the model that contain dynamic input shape while using TensorRT backend
   *
   * \param[in] input_name The name of input for the model which is dynamic shape
   * \param[in] min_shape The minimal shape for the input tensor
   * \param[in] opt_shape The optimized shape for the input tensor, just set the most common shape, if set as default value, it will keep same with min_shape
   * \param[in] max_shape The maximum shape for the input tensor, if set as default value, it will keep same with min_shape
   */
  void SetTrtInputShape(
      const std::string& input_name, const std::vector<int32_t>& min_shape,
      const std::vector<int32_t>& opt_shape = std::vector<int32_t>(),
      const std::vector<int32_t>& max_shape = std::vector<int32_t>());
  /// Set max_workspace_size for TensorRT, default 1<<30
  void SetTrtMaxWorkspaceSize(size_t trt_max_workspace_size);
  /// Set max_batch_size for TensorRT, default 32
  void SetTrtMaxBatchSize(size_t max_batch_size);
  /**
   * @brief Enable FP16 inference while using TensorRT backend. Notice: not all the GPU device support FP16, on those device doesn't support FP16, FastDeploy will fallback to FP32 automaticly
   */
  void EnableTrtFP16();
  /// Disable FP16 inference while using TensorRT backend
  void DisableTrtFP16();
  /**
   * @brief Set cache file path while use TensorRT backend. Loadding a Paddle/ONNX model and initialize TensorRT will take a long time, by this interface it will save the tensorrt engine to `cache_file_path`, and load it directly while execute the code again
   */
  void SetTrtCacheFile(const std::string& cache_file_path);
  /**
   * @brief Enable pinned memory. Pinned memory can be utilized to speedup the data transfer between CPU and GPU. Currently it's only suppurted in TRT backend and Paddle Inference backend.
   */
  void EnablePinnedMemory();
  /**
   * @brief Disable pinned memory
   */
  void DisablePinnedMemory();
  /**
   * @brief Enable to collect shape in paddle trt backend
   */
  void EnablePaddleTrtCollectShape();
  /**
   * @brief Disable to collect shape in paddle trt backend
   */
  void DisablePaddleTrtCollectShape();
  /**
   * @brief Prevent ops running in paddle trt backend
   */
  void DisablePaddleTrtOPs(const std::vector<std::string>& ops);
  /*
   * @brief Set number of streams by the OpenVINO backends
   */
  void SetOpenVINOStreams(int num_streams);
  /** \Use Graphcore IPU to inference.
   *
   * \param[in] device_num the number of IPUs.
   * \param[in] micro_batch_size the batch size in the graph, only work when graph has no batch shape info.
   * \param[in] enable_pipelining enable pipelining.
   * \param[in] batches_per_step the number of batches per run in pipelining.
   */
  void UseIpu(int device_num = 1, int micro_batch_size = 1,
              bool enable_pipelining = false, int batches_per_step = 1);
  /** \brief Set IPU config.
   *
   * \param[in] enable_fp16 enable fp16.
   * \param[in] replica_num the number of graph replication.
   * \param[in] available_memory_proportion the available memory proportion for matmul/conv.
   * \param[in] enable_half_partial enable fp16 partial for matmul, only work with fp16.
   */
  void SetIpuConfig(bool enable_fp16 = false, int replica_num = 1,
                    float available_memory_proportion = 1.0,
                    bool enable_half_partial = false);
  Backend backend = Backend::UNKNOWN;
  // for cpu inference and preprocess
  // default will let the backend choose their own default value
  int cpu_thread_num = -1;
  int device_id = 0;
  Device device = Device::CPU;
  void* external_stream_ = nullptr;
  bool enable_pinned_memory = false;
  // ======Only for ORT Backend========
  // -1 means use default value by ort
  // 0: ORT_DISABLE_ALL 1: ORT_ENABLE_BASIC 2: ORT_ENABLE_EXTENDED 3:
  // ORT_ENABLE_ALL
  int ort_graph_opt_level = -1;
  int ort_inter_op_num_threads = -1;
  // 0: ORT_SEQUENTIAL 1: ORT_PARALLEL
  int ort_execution_mode = -1;
  // ======Only for Paddle Backend=====
  bool pd_enable_mkldnn = true;
  bool pd_enable_log_info = false;
  bool pd_enable_trt = false;
  bool pd_collect_shape = false;
  int pd_mkldnn_cache_size = 1;
  std::vector<std::string> pd_delete_pass_names;
  // ======Only for Paddle IPU Backend =======
  int ipu_device_num = 1;
  int ipu_micro_batch_size = 1;
  bool ipu_enable_pipelining = false;
  int ipu_batches_per_step = 1;
  bool ipu_enable_fp16 = false;
  int ipu_replica_num = 1;
  float ipu_available_memory_proportion = 1.0;
  bool ipu_enable_half_partial = false;
  // ======Only for Paddle Lite Backend=====
  // 0: LITE_POWER_HIGH 1: LITE_POWER_LOW 2: LITE_POWER_FULL
  // 3: LITE_POWER_NO_BIND 4: LITE_POWER_RAND_HIGH
  // 5: LITE_POWER_RAND_LOW
  LitePowerMode lite_power_mode = LitePowerMode::LITE_POWER_NO_BIND;
  // enable int8 or not
  bool lite_enable_int8 = false;
  // enable fp16 or not
  bool lite_enable_fp16 = false;
  // optimized model dir for CxxConfig
  std::string lite_optimized_model_dir = "";
  std::string lite_nnadapter_subgraph_partition_config_path = "";
  // and other nnadapter settings for CxxConfig
  std::string lite_nnadapter_subgraph_partition_config_buffer = "";
  std::string lite_nnadapter_context_properties = "";
  std::string lite_nnadapter_model_cache_dir = "";
  std::string lite_nnadapter_mixed_precision_quantization_config_path = "";
  std::map<std::string, std::vector<std::vector<int64_t>>>
    lite_nnadapter_dynamic_shape_info = {{"", {{0}}}};
  std::vector<std::string> lite_nnadapter_device_names = {};
  bool enable_timvx = false;
  bool enable_ascend = false;
  bool enable_kunlunxin = false;
  // ======Only for Trt Backend=======
  std::map<std::string, std::vector<int32_t>> trt_max_shape;
  std::map<std::string, std::vector<int32_t>> trt_min_shape;
  std::map<std::string, std::vector<int32_t>> trt_opt_shape;
  std::string trt_serialize_file = "";
  bool trt_enable_fp16 = false;
  bool trt_enable_int8 = false;
  size_t trt_max_batch_size = 1;
  size_t trt_max_workspace_size = 1 << 30;
  // ======Only for PaddleTrt Backend=======
  std::vector<std::string> trt_disabled_ops_{};
  // ======Only for Poros Backend=======
  bool is_dynamic = false;
  bool long_to_int = true;
  bool use_nvidia_tf32 = false;
  int unconst_ops_thres = -1;
  std::string poros_file = "";
  // ======Only for OpenVINO Backend=======
  int ov_num_streams = 0;
  std::string openvino_device = "CPU";
  std::map<std::string, std::vector<int64_t>> ov_shape_infos;
  std::vector<std::string> ov_cpu_operators;
  // ======Only for RKNPU2 Backend=======
  fastdeploy::rknpu2::CpuName rknpu2_cpu_name_ =
      fastdeploy::rknpu2::CpuName::RK3588;
  fastdeploy::rknpu2::CoreMask rknpu2_core_mask_ =
      fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO;
  // ======Only for KunlunXin XPU Backend=======
  int kunlunxin_l3_workspace_size = 0xfffc00;
  bool kunlunxin_locked = false;
  bool kunlunxin_autotune = true;
  std::string kunlunxin_autotune_file = "";
  std::string kunlunxin_precision = "int16";
  bool kunlunxin_adaptive_seqlen = false;
  bool kunlunxin_enable_multi_stream = false;
  std::string model_file = "";   // Path of model file
  std::string params_file = "";  // Path of parameters file, can be empty
  // format of input model
  ModelFormat model_format = ModelFormat::AUTOREC;
  std::string model_buffer_ = "";
  std::string params_buffer_ = "";
  size_t model_buffer_size_ = 0;
  size_t params_buffer_size_ = 0;
  bool model_from_memory_ = false;
 };
 /*! @brief Runtime object used to inference the loaded model on different devices
 */
 struct FASTDEPLOY_DECL Runtime {
 public:
  /// Intialize a Runtime object with RuntimeOption
  bool Init(const RuntimeOption& _option);
  /** \brief Inference the model by the input data, and write to the output
   *
   * \param[in] input_tensors Notice the FDTensor::name should keep same with the model's input
   * \param[in] output_tensors Inference results
   * \return true if the inference successed, otherwise false
   */
  bool Infer(std::vector<FDTensor>& input_tensors,
             std::vector<FDTensor>* output_tensors);
  /** \brief No params inference the model.
   *
   *  the input and output data need to pass through the BindInputTensor and GetOutputTensor interfaces.
   */
  bool Infer();
  /** \brief Compile TorchScript Module, only for Poros backend
   *
   * \param[in] prewarm_tensors Prewarm datas for compile
   * \param[in] _option Runtime option
   * \return true if compile successed, otherwise false
   */
  bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
               const RuntimeOption& _option);
  /** \brief Get number of inputs
   */
  int NumInputs() { return backend_->NumInputs(); }
  /** \brief Get number of outputs
   */
  int NumOutputs() { return backend_->NumOutputs(); }
  /** \brief Get input information by index
   */
  TensorInfo GetInputInfo(int index);
  /** \brief Get output information by index
   */
  TensorInfo GetOutputInfo(int index);
  /** \brief Get all the input information
   */
  std::vector<TensorInfo> GetInputInfos();
  /** \brief Get all the output information
   */
  std::vector<TensorInfo> GetOutputInfos();
  /** \brief Bind FDTensor by name, no copy and share input memory
   */
  void BindInputTensor(const std::string& name, FDTensor& input);
  /** \brief Get output FDTensor by name, no copy and share backend output memory
   */
  FDTensor* GetOutputTensor(const std::string& name);
  /** \brief Clone new Runtime when multiple instances of the same model are created
   *
   * \param[in] stream CUDA Stream, defualt param is nullptr
   * \return new Runtime* by this clone
   */
  Runtime* Clone(void* stream = nullptr, int device_id = -1);
  RuntimeOption option;
 private:
  void CreateOrtBackend();
  void CreatePaddleBackend();
  void CreateTrtBackend();
  void CreateOpenVINOBackend();
  void CreateLiteBackend();
  void CreateRKNPU2Backend();
  void CreateSophgoNPUBackend();
  std::unique_ptr<BaseBackend> backend_;
  std::vector<FDTensor> input_tensors_;
  std::vector<FDTensor> output_tensors_;
 };
 }  // namespace fastdeploy
--- a/fastdeploy/runtime/enum_variables.cc
+++ b/fastdeploy/runtime/enum_variables.cc
@@ -0,0 +1,85 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fastdeploy/runtime/enum_variables.h"
 namespace fastdeploy {
 std::ostream& operator<<(std::ostream& out, const Backend& backend) {
  if (backend == Backend::ORT) {
    out << "Backend::ORT";
  } else if (backend == Backend::TRT) {
    out << "Backend::TRT";
  } else if (backend == Backend::PDINFER) {
    out << "Backend::PDINFER";
  } else if (backend == Backend::OPENVINO) {
    out << "Backend::OPENVINO";
  } else if (backend == Backend::RKNPU2) {
    out << "Backend::RKNPU2";
  } else if (backend == Backend::SOPHGOTPU) {
    out << "Backend::SOPHGOTPU";
  } else if (backend == Backend::POROS) {
    out << "Backend::POROS";
  } else if (backend == Backend::LITE) {
    out << "Backend::PDLITE";
  } else {
    out << "UNKNOWN-Backend";
  }
  return out;
 }
 std::ostream& operator<<(std::ostream& out, const Device& d) {
  switch (d) {
    case Device::CPU:
      out << "Device::CPU";
      break;
    case Device::GPU:
      out << "Device::GPU";
      break;
    case Device::RKNPU:
      out << "Device::RKNPU";
      break;
    case Device::SOPHGOTPUD:
      out << "Device::SOPHGOTPUD";
      break;
    case Device::TIMVX:
      out << "Device::TIMVX";
      break;
    case Device::KUNLUNXIN:
      out << "Device::KUNLUNXIN";
      break;
    case Device::ASCEND:
      out << "Device::ASCEND";
      break;
    default:
      out << "Device::UNKOWN";
  }
  return out;
 }
 std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
  if (format == ModelFormat::PADDLE) {
    out << "ModelFormat::PADDLE";
  } else if (format == ModelFormat::ONNX) {
    out << "ModelFormat::ONNX";
  } else if (format == ModelFormat::RKNN) {
    out << "ModelFormat::RKNN";
  } else if (format == ModelFormat::SOPHGO) {
    out << "ModelFormat::SOPHGO";
  } else if (format == ModelFormat::TORCHSCRIPT) {
    out << "ModelFormat::TORCHSCRIPT";
  }
  out << "UNKNOWN-ModelFormat";
  return out;
 }
 }  // namespace fastdeploy
--- a/fastdeploy/runtime/enum_variables.h
+++ b/fastdeploy/runtime/enum_variables.h
@@ -0,0 +1,79 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 /*! \file enum_variables.h
    \brief A brief file description.
    More details
 */
 #pragma once
 #include "fastdeploy/utils/utils.h"
 #include <ostream>
 #include <map>
 namespace fastdeploy {
 /*! Inference backend supported in FastDeploy */
 enum Backend {
  UNKNOWN,  ///< Unknown inference backend
  ORT,  ///< ONNX Runtime, support Paddle/ONNX format model, CPU / Nvidia GPU
  TRT,  ///< TensorRT, support Paddle/ONNX format model, Nvidia GPU only
  PDINFER,  ///< Paddle Inference, support Paddle format model, CPU / Nvidia GPU
  POROS,    ///< Poros, support TorchScript format model, CPU / Nvidia GPU
  OPENVINO,   ///< Intel OpenVINO, support Paddle/ONNX format, CPU only
  LITE,       ///< Paddle Lite, support Paddle format model, ARM CPU only
  RKNPU2,     ///< RKNPU2, support RKNN format model, Rockchip NPU only
  SOPHGOTPU,  ///< SOPHGOTPU, support SOPHGO format model, Sophgo TPU only
 };
 enum FASTDEPLOY_DECL Device {
  CPU,
  GPU,
  RKNPU,
  IPU,
  TIMVX,
  KUNLUNXIN,
  ASCEND,
  SOPHGOTPUD
 };
 /*! Deep learning model format */
 enum ModelFormat {
  AUTOREC,      ///< Auto recognize the model format by model file name
  PADDLE,       ///< Model with paddlepaddle format
  ONNX,         ///< Model with ONNX format
  RKNN,         ///< Model with RKNN format
  TORCHSCRIPT,  ///< Model with TorchScript format
  SOPHGO,       ///< Model with SOPHGO format
 };
 /// Describle all the supported backends for specified model format
 static std::map<ModelFormat, std::vector<Backend>> s_default_backends_cfg = {
  {ModelFormat::PADDLE, {Backend::PDINFER, Backend::LITE,
                      Backend::ORT, Backend::OPENVINO, Backend::TRT}},
  {ModelFormat::ONNX, {Backend::ORT, Backend::OPENVINO, Backend::TRT}},
  {ModelFormat::RKNN, {Backend::RKNPU2}},
  {ModelFormat::TORCHSCRIPT, {Backend::POROS}},
  {ModelFormat::SOPHGO, {Backend::SOPHGOTPU}}
 };
 FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, const Backend& b);
 FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, const Device& d);
 FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
                                         const ModelFormat& f);
 }  // namespace fastdeploy
--- a/fastdeploy/runtime/runtime.cc
+++ b/fastdeploy/runtime/runtime.cc
@@ -0,0 +1,492 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fastdeploy/runtime/runtime.h"
 #include "fastdeploy/utils/unique_ptr.h"
 #include "fastdeploy/utils/utils.h"
 #ifdef ENABLE_ORT_BACKEND
 #include "fastdeploy/backends/ort/ort_backend.h"
 #endif
 #ifdef ENABLE_TRT_BACKEND
 #include "fastdeploy/backends/tensorrt/trt_backend.h"
 #endif
 #ifdef ENABLE_PADDLE_BACKEND
 #include "fastdeploy/backends/paddle/paddle_backend.h"
 #endif
 #ifdef ENABLE_POROS_BACKEND
 #include "fastdeploy/backends/poros/poros_backend.h"
 #endif
 #ifdef ENABLE_OPENVINO_BACKEND
 #include "fastdeploy/backends/openvino/ov_backend.h"
 #endif
 #ifdef ENABLE_LITE_BACKEND
 #include "fastdeploy/backends/lite/lite_backend.h"
 #endif
 #ifdef ENABLE_RKNPU2_BACKEND
 #include "fastdeploy/backends/rknpu2/rknpu2_backend.h"
 #endif
 #ifdef ENABLE_SOPHGO_BACKEND
 #include "fastdeploy/backends/sophgo/sophgo_backend.h"
 #endif
 namespace fastdeploy {
 bool Runtime::Init(const RuntimeOption& _option) {
  option = _option;
  // Choose default backend by model format
  if (option.backend == Backend::UNKNOWN) {
    auto iter = s_default_backends_cfg.find(option.model_format);
    if (iter == s_default_backends_cfg.end()) {
      FDERROR << "Cannot found a default backend for model format: "
              << option.model_format
              << ", please define the inference backend in RuntimeOption."
              << std::endl;
      return false;
    }
    for (const auto& b : iter->second) {
      if (IsBackendAvailable(b)) {
        option.backend = b;
        FDINFO << "FastDeploy will choose " << b << " to inference this model."
               << std::endl;
      }
    }
    if (option.backend == Backend::UNKNOWN) {
      FDERROR << "Cannot found available backends for model format: "
              << option.model_format << "." << std::endl;
      return false;
    }
  }
  if (option.backend == Backend::ORT) {
    FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
             "Backend::ORT only supports Device::CPU/Device::GPU.");
    CreateOrtBackend();
    FDINFO << "Runtime initialized with Backend::ORT in " << option.device
           << "." << std::endl;
  } else if (option.backend == Backend::TRT) {
    FDASSERT(option.device == Device::GPU,
             "Backend::TRT only supports Device::GPU.");
    CreateTrtBackend();
    FDINFO << "Runtime initialized with Backend::TRT in " << option.device
           << "." << std::endl;
  } else if (option.backend == Backend::PDINFER) {
    FDASSERT(
        option.device == Device::CPU || option.device == Device::GPU ||
            option.device == Device::IPU,
        "Backend::PDINFER only supports Device::CPU/Device::GPU/Device::IPU.");
    FDASSERT(
        option.model_format == ModelFormat::PADDLE,
        "Backend::PDINFER only supports model format of ModelFormat::PADDLE.");
    CreatePaddleBackend();
    FDINFO << "Runtime initialized with Backend::PDINFER in " << option.device
           << "." << std::endl;
  } else if (option.backend == Backend::POROS) {
    FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
             "Backend::POROS only supports Device::CPU/Device::GPU.");
    FDASSERT(option.model_format == ModelFormat::TORCHSCRIPT,
             "Backend::POROS only supports model format of "
             "ModelFormat::TORCHSCRIPT.");
    FDINFO << "Runtime initialized with Backend::POROS in " << option.device
           << "." << std::endl;
    return true;
  } else if (option.backend == Backend::OPENVINO) {
    FDASSERT(option.device == Device::CPU,
             "Backend::OPENVINO only supports Device::CPU");
    CreateOpenVINOBackend();
    FDINFO << "Runtime initialized with Backend::OPENVINO in " << option.device
           << "." << std::endl;
  } else if (option.backend == Backend::LITE) {
    FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX ||
                 option.device == Device::KUNLUNXIN ||
                 option.device == Device::ASCEND,
             "Backend::LITE only supports "
             "Device::CPU/Device::TIMVX/Device::KUNLUNXIN.");
    CreateLiteBackend();
    FDINFO << "Runtime initialized with Backend::LITE in " << option.device
           << "." << std::endl;
  } else if (option.backend == Backend::RKNPU2) {
    FDASSERT(option.device == Device::RKNPU,
             "Backend::RKNPU2 only supports Device::RKNPU2");
    CreateRKNPU2Backend();
    FDINFO << "Runtime initialized with Backend::RKNPU2 in " << option.device
           << "." << std::endl;
  } else if (option.backend == Backend::SOPHGOTPU) {
    FDASSERT(option.device == Device::SOPHGOTPUD,
             "Backend::SOPHGO only supports Device::SOPHGO");
    CreateSophgoNPUBackend();
    FDINFO << "Runtime initialized with Backend::SOPHGO in " << option.device
           << "." << std::endl;
  } else {
    FDERROR << "Runtime only support "
               "Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as "
               "backend now."
            << std::endl;
    return false;
  }
  return true;
 }
 TensorInfo Runtime::GetInputInfo(int index) {
  return backend_->GetInputInfo(index);
 }
 TensorInfo Runtime::GetOutputInfo(int index) {
  return backend_->GetOutputInfo(index);
 }
 std::vector<TensorInfo> Runtime::GetInputInfos() {
  return backend_->GetInputInfos();
 }
 std::vector<TensorInfo> Runtime::GetOutputInfos() {
  return backend_->GetOutputInfos();
 }
 bool Runtime::Infer(std::vector<FDTensor>& input_tensors,
                    std::vector<FDTensor>* output_tensors) {
  for (auto& tensor : input_tensors) {
    FDASSERT(tensor.device_id < 0 || tensor.device_id == option.device_id,
             "Device id of input tensor(%d) and runtime(%d) are not same.",
             tensor.device_id, option.device_id);
  }
  return backend_->Infer(input_tensors, output_tensors);
 }
 bool Runtime::Infer() {
  bool result = backend_->Infer(input_tensors_, &output_tensors_, false);
  for (auto& tensor : output_tensors_) {
    tensor.device_id = option.device_id;
  }
  return result;
 }
 void Runtime::BindInputTensor(const std::string& name, FDTensor& input) {
  bool is_exist = false;
  for (auto& t : input_tensors_) {
    if (t.name == name) {
      is_exist = true;
      t.SetExternalData(input.shape, input.dtype, input.MutableData(),
                        input.device, input.device_id);
      break;
    }
  }
  if (!is_exist) {
    FDTensor new_tensor(name);
    new_tensor.SetExternalData(input.shape, input.dtype, input.MutableData(),
                               input.device, input.device_id);
    input_tensors_.emplace_back(std::move(new_tensor));
  }
 }
 FDTensor* Runtime::GetOutputTensor(const std::string& name) {
  for (auto& t : output_tensors_) {
    if (t.name == name) {
      return &t;
    }
  }
  FDWARNING << "The output name [" << name << "] don't exist." << std::endl;
  return nullptr;
 }
 void Runtime::CreatePaddleBackend() {
 #ifdef ENABLE_PADDLE_BACKEND
  auto pd_option = PaddleBackendOption();
  pd_option.model_file = option.model_file;
  pd_option.params_file = option.params_file;
  pd_option.enable_mkldnn = option.pd_enable_mkldnn;
  pd_option.enable_log_info = option.pd_enable_log_info;
  pd_option.mkldnn_cache_size = option.pd_mkldnn_cache_size;
  pd_option.use_gpu = (option.device == Device::GPU) ? true : false;
  pd_option.use_ipu = (option.device == Device::IPU) ? true : false;
  pd_option.gpu_id = option.device_id;
  pd_option.delete_pass_names = option.pd_delete_pass_names;
  pd_option.cpu_thread_num = option.cpu_thread_num;
  pd_option.enable_pinned_memory = option.enable_pinned_memory;
  pd_option.external_stream_ = option.external_stream_;
  pd_option.model_from_memory_ = option.model_from_memory_;
  if (pd_option.model_from_memory_) {
    pd_option.model_buffer_ = option.model_buffer_;
    pd_option.params_buffer_ = option.params_buffer_;
    pd_option.model_buffer_size_ = option.model_buffer_size_;
    pd_option.params_buffer_size_ = option.params_buffer_size_;
  }
 #ifdef ENABLE_TRT_BACKEND
  if (pd_option.use_gpu && option.pd_enable_trt) {
    pd_option.enable_trt = true;
    pd_option.collect_shape = option.pd_collect_shape;
    auto trt_option = TrtBackendOption();
    trt_option.gpu_id = option.device_id;
    trt_option.enable_fp16 = option.trt_enable_fp16;
    trt_option.max_batch_size = option.trt_max_batch_size;
    trt_option.max_workspace_size = option.trt_max_workspace_size;
    trt_option.max_shape = option.trt_max_shape;
    trt_option.min_shape = option.trt_min_shape;
    trt_option.opt_shape = option.trt_opt_shape;
    trt_option.serialize_file = option.trt_serialize_file;
    trt_option.enable_pinned_memory = option.enable_pinned_memory;
    pd_option.trt_option = trt_option;
    pd_option.trt_disabled_ops_ = option.trt_disabled_ops_;
  }
 #endif
 #ifdef WITH_IPU
  if (pd_option.use_ipu) {
    auto ipu_option = IpuOption();
    ipu_option.ipu_device_num = option.ipu_device_num;
    ipu_option.ipu_micro_batch_size = option.ipu_micro_batch_size;
    ipu_option.ipu_enable_pipelining = option.ipu_enable_pipelining;
    ipu_option.ipu_batches_per_step = option.ipu_batches_per_step;
    ipu_option.ipu_enable_fp16 = option.ipu_enable_fp16;
    ipu_option.ipu_replica_num = option.ipu_replica_num;
    ipu_option.ipu_available_memory_proportion =
        option.ipu_available_memory_proportion;
    ipu_option.ipu_enable_half_partial = option.ipu_enable_half_partial;
    pd_option.ipu_option = ipu_option;
  }
 #endif
  FDASSERT(option.model_format == ModelFormat::PADDLE,
           "PaddleBackend only support model format of ModelFormat::PADDLE.");
  backend_ = utils::make_unique<PaddleBackend>();
  auto casted_backend = dynamic_cast<PaddleBackend*>(backend_.get());
  if (pd_option.model_from_memory_) {
    FDASSERT(casted_backend->InitFromPaddle(option.model_buffer_,
                                            option.params_buffer_, pd_option),
             "Load model from Paddle failed while initliazing PaddleBackend.");
  } else {
    FDASSERT(casted_backend->InitFromPaddle(option.model_file,
                                            option.params_file, pd_option),
             "Load model from Paddle failed while initliazing PaddleBackend.");
  }
 #else
  FDASSERT(false,
           "PaddleBackend is not available, please compiled with "
           "ENABLE_PADDLE_BACKEND=ON.");
 #endif
 }
 void Runtime::CreateOpenVINOBackend() {
 #ifdef ENABLE_OPENVINO_BACKEND
  auto ov_option = OpenVINOBackendOption();
  ov_option.cpu_thread_num = option.cpu_thread_num;
  ov_option.device = option.openvino_device;
  ov_option.shape_infos = option.ov_shape_infos;
  ov_option.num_streams = option.ov_num_streams;
  for (const auto& op : option.ov_cpu_operators) {
    ov_option.cpu_operators.insert(op);
  }
  FDASSERT(option.model_format == ModelFormat::PADDLE ||
               option.model_format == ModelFormat::ONNX,
           "OpenVINOBackend only support model format of ModelFormat::PADDLE / "
           "ModelFormat::ONNX.");
  backend_ = utils::make_unique<OpenVINOBackend>();
  auto casted_backend = dynamic_cast<OpenVINOBackend*>(backend_.get());
  if (option.model_format == ModelFormat::ONNX) {
    FDASSERT(casted_backend->InitFromOnnx(option.model_file, ov_option),
             "Load model from ONNX failed while initliazing OrtBackend.");
  } else {
    FDASSERT(casted_backend->InitFromPaddle(option.model_file,
                                            option.params_file, ov_option),
             "Load model from Paddle failed while initliazing OrtBackend.");
  }
 #else
  FDASSERT(false,
           "OpenVINOBackend is not available, please compiled with "
           "ENABLE_OPENVINO_BACKEND=ON.");
 #endif
 }
 void Runtime::CreateOrtBackend() {
 #ifdef ENABLE_ORT_BACKEND
  auto ort_option = OrtBackendOption();
  ort_option.graph_optimization_level = option.ort_graph_opt_level;
  ort_option.intra_op_num_threads = option.cpu_thread_num;
  ort_option.inter_op_num_threads = option.ort_inter_op_num_threads;
  ort_option.execution_mode = option.ort_execution_mode;
  ort_option.use_gpu = (option.device == Device::GPU) ? true : false;
  ort_option.gpu_id = option.device_id;
  ort_option.external_stream_ = option.external_stream_;
  FDASSERT(option.model_format == ModelFormat::PADDLE ||
               option.model_format == ModelFormat::ONNX,
           "OrtBackend only support model format of ModelFormat::PADDLE / "
           "ModelFormat::ONNX.");
  backend_ = utils::make_unique<OrtBackend>();
  auto casted_backend = dynamic_cast<OrtBackend*>(backend_.get());
  if (option.model_format == ModelFormat::ONNX) {
    FDASSERT(casted_backend->InitFromOnnx(option.model_file, ort_option),
             "Load model from ONNX failed while initliazing OrtBackend.");
  } else {
    FDASSERT(casted_backend->InitFromPaddle(option.model_file,
                                            option.params_file, ort_option),
             "Load model from Paddle failed while initliazing OrtBackend.");
  }
 #else
  FDASSERT(false,
           "OrtBackend is not available, please compiled with "
           "ENABLE_ORT_BACKEND=ON.");
 #endif
 }
 void Runtime::CreateTrtBackend() {
 #ifdef ENABLE_TRT_BACKEND
  auto trt_option = TrtBackendOption();
  trt_option.model_file = option.model_file;
  trt_option.params_file = option.params_file;
  trt_option.model_format = option.model_format;
  trt_option.gpu_id = option.device_id;
  trt_option.enable_fp16 = option.trt_enable_fp16;
  trt_option.enable_int8 = option.trt_enable_int8;
  trt_option.max_batch_size = option.trt_max_batch_size;
  trt_option.max_workspace_size = option.trt_max_workspace_size;
  trt_option.max_shape = option.trt_max_shape;
  trt_option.min_shape = option.trt_min_shape;
  trt_option.opt_shape = option.trt_opt_shape;
  trt_option.serialize_file = option.trt_serialize_file;
  trt_option.enable_pinned_memory = option.enable_pinned_memory;
  trt_option.external_stream_ = option.external_stream_;
  FDASSERT(option.model_format == ModelFormat::PADDLE ||
               option.model_format == ModelFormat::ONNX,
           "TrtBackend only support model format of ModelFormat::PADDLE / "
           "ModelFormat::ONNX.");
  backend_ = utils::make_unique<TrtBackend>();
  auto casted_backend = dynamic_cast<TrtBackend*>(backend_.get());
  if (option.model_format == ModelFormat::ONNX) {
    FDASSERT(casted_backend->InitFromOnnx(option.model_file, trt_option),
             "Load model from ONNX failed while initliazing TrtBackend.");
  } else {
    FDASSERT(casted_backend->InitFromPaddle(option.model_file,
                                            option.params_file, trt_option),
             "Load model from Paddle failed while initliazing TrtBackend.");
  }
 #else
  FDASSERT(false,
           "TrtBackend is not available, please compiled with "
           "ENABLE_TRT_BACKEND=ON.");
 #endif
 }
 void Runtime::CreateLiteBackend() {
 #ifdef ENABLE_LITE_BACKEND
  auto lite_option = LiteBackendOption();
  lite_option.threads = option.cpu_thread_num;
  lite_option.enable_int8 = option.lite_enable_int8;
  lite_option.enable_fp16 = option.lite_enable_fp16;
  lite_option.power_mode = static_cast<int>(option.lite_power_mode);
  lite_option.optimized_model_dir = option.lite_optimized_model_dir;
  lite_option.nnadapter_subgraph_partition_config_path =
      option.lite_nnadapter_subgraph_partition_config_path;
  lite_option.nnadapter_subgraph_partition_config_buffer =
      option.lite_nnadapter_subgraph_partition_config_buffer;
  lite_option.nnadapter_device_names = option.lite_nnadapter_device_names;
  lite_option.nnadapter_context_properties =
      option.lite_nnadapter_context_properties;
  lite_option.nnadapter_model_cache_dir = option.lite_nnadapter_model_cache_dir;
  lite_option.nnadapter_dynamic_shape_info =
      option.lite_nnadapter_dynamic_shape_info;
  lite_option.nnadapter_mixed_precision_quantization_config_path =
      option.lite_nnadapter_mixed_precision_quantization_config_path;
  lite_option.enable_timvx = option.enable_timvx;
  lite_option.enable_ascend = option.enable_ascend;
  lite_option.enable_kunlunxin = option.enable_kunlunxin;
  lite_option.device_id = option.device_id;
  lite_option.kunlunxin_l3_workspace_size = option.kunlunxin_l3_workspace_size;
  lite_option.kunlunxin_locked = option.kunlunxin_locked;
  lite_option.kunlunxin_autotune = option.kunlunxin_autotune;
  lite_option.kunlunxin_autotune_file = option.kunlunxin_autotune_file;
  lite_option.kunlunxin_precision = option.kunlunxin_precision;
  lite_option.kunlunxin_adaptive_seqlen = option.kunlunxin_adaptive_seqlen;
  lite_option.kunlunxin_enable_multi_stream =
      option.kunlunxin_enable_multi_stream;
  FDASSERT(option.model_format == ModelFormat::PADDLE,
           "LiteBackend only support model format of ModelFormat::PADDLE");
  backend_ = utils::make_unique<LiteBackend>();
  auto casted_backend = dynamic_cast<LiteBackend*>(backend_.get());
  FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file,
                                          lite_option),
           "Load model from nb file failed while initializing LiteBackend.");
 #else
  FDASSERT(false,
           "LiteBackend is not available, please compiled with "
           "ENABLE_LITE_BACKEND=ON.");
 #endif
 }
 void Runtime::CreateRKNPU2Backend() {
 #ifdef ENABLE_RKNPU2_BACKEND
  auto rknpu2_option = RKNPU2BackendOption();
  rknpu2_option.cpu_name = option.rknpu2_cpu_name_;
  rknpu2_option.core_mask = option.rknpu2_core_mask_;
  FDASSERT(option.model_format == ModelFormat::RKNN,
           "RKNPU2Backend only support model format of ModelFormat::RKNN");
  backend_ = utils::make_unique<RKNPU2Backend>();
  auto casted_backend = dynamic_cast<RKNPU2Backend*>(backend_.get());
  FDASSERT(casted_backend->InitFromRKNN(option.model_file, rknpu2_option),
           "Load model from nb file failed while initializing LiteBackend.");
 #else
  FDASSERT(false,
           "RKNPU2Backend is not available, please compiled with "
           "ENABLE_RKNPU2_BACKEND=ON.");
 #endif
 }
 void Runtime::CreateSophgoNPUBackend() {
 #ifdef ENABLE_SOPHGO_BACKEND
  auto sophgo_option = SophgoBackendOption();
  FDASSERT(option.model_format == ModelFormat::SOPHGO,
           "SophgoBackend only support model format of ModelFormat::SOPHGO");
  backend_ = utils::make_unique<SophgoBackend>();
  auto casted_backend = dynamic_cast<SophgoBackend*>(backend_.get());
  FDASSERT(casted_backend->InitFromSophgo(option.model_file, sophgo_option),
           "Load model from nb file failed while initializing LiteBackend.");
 #else
  FDASSERT(false,
           "SophgoBackend is not available, please compiled with "
           "ENABLE_SOPHGO_BACKEND=ON.");
 #endif
 }
 Runtime* Runtime::Clone(void* stream, int device_id) {
  Runtime* runtime = new Runtime();
  if (option.backend != Backend::OPENVINO &&
      option.backend != Backend::PDINFER && option.backend != Backend::TRT) {
    runtime->Init(option);
    FDWARNING << "Only OpenVINO/Paddle Inference/TensorRT support \
                  clone engine to  reduce CPU/GPU memory usage now. For "
              << option.backend
              << ", FastDeploy will create a new engine which \
                  will not share memory  with the current runtime."
              << std::endl;
    return runtime;
  }
  FDINFO << "Runtime Clone with Backend:: " << option.backend << " in "
         << option.device << "." << std::endl;
  runtime->option = option;
  runtime->backend_ = backend_->Clone(stream, device_id);
  return runtime;
 }
 }  // namespace fastdeploy
--- a/fastdeploy/runtime/runtime.h
+++ b/fastdeploy/runtime/runtime.h
@@ -0,0 +1,109 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 /*! \file runtime.h
    \brief A brief file description.
    More details
 */
 #pragma once
 #include "fastdeploy/backends/backend.h"
 #include "fastdeploy/core/fd_tensor.h"
 #include "fastdeploy/runtime/runtime_option.h"
 #include "fastdeploy/utils/perf.h"
 /** \brief All C++ FastDeploy APIs are defined inside this namespace
 *
 */
 namespace fastdeploy {
 /*! @brief Runtime object used to inference the loaded model on different devices
 */
 struct FASTDEPLOY_DECL Runtime {
 public:
  /// Intialize a Runtime object with RuntimeOption
  bool Init(const RuntimeOption& _option);
  /** \brief Inference the model by the input data, and write to the output
   *
   * \param[in] input_tensors Notice the FDTensor::name should keep same with the model's input
   * \param[in] output_tensors Inference results
   * \return true if the inference successed, otherwise false
   */
  bool Infer(std::vector<FDTensor>& input_tensors,
             std::vector<FDTensor>* output_tensors);
  /** \brief No params inference the model.
   *
   *  the input and output data need to pass through the BindInputTensor and GetOutputTensor interfaces.
   */
  bool Infer();
  /** \brief Compile TorchScript Module, only for Poros backend
   *
   * \param[in] prewarm_tensors Prewarm datas for compile
   * \param[in] _option Runtime option
   * \return true if compile successed, otherwise false
   */
  bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
               const RuntimeOption& _option);
  /** \brief Get number of inputs
   */
  int NumInputs() { return backend_->NumInputs(); }
  /** \brief Get number of outputs
   */
  int NumOutputs() { return backend_->NumOutputs(); }
  /** \brief Get input information by index
   */
  TensorInfo GetInputInfo(int index);
  /** \brief Get output information by index
   */
  TensorInfo GetOutputInfo(int index);
  /** \brief Get all the input information
   */
  std::vector<TensorInfo> GetInputInfos();
  /** \brief Get all the output information
   */
  std::vector<TensorInfo> GetOutputInfos();
  /** \brief Bind FDTensor by name, no copy and share input memory
   */
  void BindInputTensor(const std::string& name, FDTensor& input);
  /** \brief Get output FDTensor by name, no copy and share backend output memory
   */
  FDTensor* GetOutputTensor(const std::string& name);
  /** \brief Clone new Runtime when multiple instances of the same model are created
   *
   * \param[in] stream CUDA Stream, defualt param is nullptr
   * \return new Runtime* by this clone
   */
  Runtime* Clone(void* stream = nullptr, int device_id = -1);
  RuntimeOption option;
 private:
  void CreateOrtBackend();
  void CreatePaddleBackend();
  void CreateTrtBackend();
  void CreateOpenVINOBackend();
  void CreateLiteBackend();
  void CreateRKNPU2Backend();
  void CreateSophgoNPUBackend();
  std::unique_ptr<BaseBackend> backend_;
  std::vector<FDTensor> input_tensors_;
  std::vector<FDTensor> output_tensors_;
 };
 }  // namespace fastdeploy
--- a/fastdeploy/runtime/runtime_option.cc
+++ b/fastdeploy/runtime/runtime_option.cc
@@ -0,0 +1,515 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fastdeploy/runtime/runtime.h"
 #include "fastdeploy/utils/unique_ptr.h"
 #include "fastdeploy/utils/utils.h"
 namespace fastdeploy {
 std::vector<Backend> GetAvailableBackends() {
  std::vector<Backend> backends;
 #ifdef ENABLE_ORT_BACKEND
  backends.push_back(Backend::ORT);
 #endif
 #ifdef ENABLE_TRT_BACKEND
  backends.push_back(Backend::TRT);
 #endif
 #ifdef ENABLE_PADDLE_BACKEND
  backends.push_back(Backend::PDINFER);
 #endif
 #ifdef ENABLE_POROS_BACKEND
  backends.push_back(Backend::POROS);
 #endif
 #ifdef ENABLE_OPENVINO_BACKEND
  backends.push_back(Backend::OPENVINO);
 #endif
 #ifdef ENABLE_LITE_BACKEND
  backends.push_back(Backend::LITE);
 #endif
 #ifdef ENABLE_RKNPU2_BACKEND
  backends.push_back(Backend::RKNPU2);
 #endif
 #ifdef ENABLE_SOPHGO_BACKEND
  backends.push_back(Backend::SOPHGOTPU);
 #endif
  return backends;
 }
 bool IsBackendAvailable(const Backend& backend) {
  std::vector<Backend> backends = GetAvailableBackends();
  for (size_t i = 0; i < backends.size(); ++i) {
    if (backend == backends[i]) {
      return true;
    }
  }
  return false;
 }
 bool CheckModelFormat(const std::string& model_file,
                      const ModelFormat& model_format) {
  if (model_format == ModelFormat::PADDLE) {
    if (model_file.size() < 8 ||
        model_file.substr(model_file.size() - 8, 8) != ".pdmodel") {
      FDERROR << "With model format of ModelFormat::PADDLE, the model file "
                 "should ends with `.pdmodel`, but now it's "
              << model_file << std::endl;
      return false;
    }
  } else if (model_format == ModelFormat::ONNX) {
    if (model_file.size() < 5 ||
        model_file.substr(model_file.size() - 5, 5) != ".onnx") {
      FDERROR << "With model format of ModelFormat::ONNX, the model file "
                 "should ends with `.onnx`, but now it's "
              << model_file << std::endl;
      return false;
    }
  } else if (model_format == ModelFormat::RKNN) {
    if (model_file.size() < 5 ||
        model_file.substr(model_file.size() - 5, 5) != ".rknn") {
      FDERROR << "With model format of ModelFormat::RKNN, the model file "
                 "should ends with `.rknn`, but now it's "
              << model_file << std::endl;
      return false;
    }
  } else if (model_format == ModelFormat::TORCHSCRIPT) {
    if (model_file.size() < 3 ||
        model_file.substr(model_file.size() - 3, 3) != ".pt") {
      FDERROR
          << "With model format of ModelFormat::TORCHSCRIPT, the model file "
             "should ends with `.pt`, but now it's "
          << model_file << std::endl;
      return false;
    }
  } else if (model_format == ModelFormat::SOPHGO) {
    if (model_file.size() < 7 ||
        model_file.substr(model_file.size() - 7, 7) != ".bmodel") {
      FDERROR << "With model format of ModelFormat::SOPHGO, the model file "
                 "should ends with `.bmodel`, but now it's "
              << model_file << std::endl;
      return false;
    }
  } else {
    FDERROR
        << "Only support model format with frontend ModelFormat::PADDLE / "
           "ModelFormat::ONNX / ModelFormat::RKNN / ModelFormat::TORCHSCRIPT."
        << std::endl;
    return false;
  }
  return true;
 }
 ModelFormat GuessModelFormat(const std::string& model_file) {
  if (model_file.size() > 8 &&
      model_file.substr(model_file.size() - 8, 8) == ".pdmodel") {
    FDINFO << "Model Format: PaddlePaddle." << std::endl;
    return ModelFormat::PADDLE;
  } else if (model_file.size() > 5 &&
             model_file.substr(model_file.size() - 5, 5) == ".onnx") {
    FDINFO << "Model Format: ONNX." << std::endl;
    return ModelFormat::ONNX;
  } else if (model_file.size() > 3 &&
             model_file.substr(model_file.size() - 3, 3) == ".pt") {
    FDINFO << "Model Format: Torchscript." << std::endl;
    return ModelFormat::TORCHSCRIPT;
  } else if (model_file.size() > 5 &&
             model_file.substr(model_file.size() - 5, 5) == ".rknn") {
    FDINFO << "Model Format: RKNN." << std::endl;
    return ModelFormat::RKNN;
  } else if (model_file.size() > 7 &&
             model_file.substr(model_file.size() - 7, 7) == ".bmodel") {
    FDINFO << "Model Format: SOPHGO." << std::endl;
    return ModelFormat::SOPHGO;
  }
  FDERROR << "Cannot guess which model format you are using, please set "
             "RuntimeOption::model_format manually."
          << std::endl;
  return ModelFormat::PADDLE;
 }
 void RuntimeOption::SetModelPath(const std::string& model_path,
                                 const std::string& params_path,
                                 const ModelFormat& format) {
  if (format == ModelFormat::PADDLE) {
    model_file = model_path;
    params_file = params_path;
    model_format = ModelFormat::PADDLE;
  } else if (format == ModelFormat::ONNX) {
    model_file = model_path;
    model_format = ModelFormat::ONNX;
  } else if (format == ModelFormat::TORCHSCRIPT) {
    model_file = model_path;
    model_format = ModelFormat::TORCHSCRIPT;
  } else {
    FDASSERT(false,
             "The model format only can be "
             "ModelFormat::PADDLE/ModelFormat::ONNX/ModelFormat::TORCHSCRIPT.");
  }
 }
 void RuntimeOption::SetModelBuffer(const char* model_buffer,
                                   size_t model_buffer_size,
                                   const char* params_buffer,
                                   size_t params_buffer_size,
                                   const ModelFormat& format) {
  model_buffer_size_ = model_buffer_size;
  params_buffer_size_ = params_buffer_size;
  model_from_memory_ = true;
  if (format == ModelFormat::PADDLE) {
    model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size);
    params_buffer_ =
        std::string(params_buffer, params_buffer + params_buffer_size);
    model_format = ModelFormat::PADDLE;
  } else if (format == ModelFormat::ONNX) {
    model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size);
    model_format = ModelFormat::ONNX;
  } else if (format == ModelFormat::TORCHSCRIPT) {
    model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size);
    model_format = ModelFormat::TORCHSCRIPT;
  } else {
    FDASSERT(false,
             "The model format only can be "
             "ModelFormat::PADDLE/ModelFormat::ONNX/ModelFormat::TORCHSCRIPT.");
  }
 }
 void RuntimeOption::UseGpu(int gpu_id) {
 #ifdef WITH_GPU
  device = Device::GPU;
  device_id = gpu_id;
 #else
  FDWARNING << "The FastDeploy didn't compile with GPU, will force to use CPU."
            << std::endl;
  device = Device::CPU;
 #endif
 }
 void RuntimeOption::UseCpu() { device = Device::CPU; }
 void RuntimeOption::UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name,
                              fastdeploy::rknpu2::CoreMask rknpu2_core) {
  rknpu2_cpu_name_ = rknpu2_name;
  rknpu2_core_mask_ = rknpu2_core;
  device = Device::RKNPU;
 }
 void RuntimeOption::UseTimVX() {
  enable_timvx = true;
  device = Device::TIMVX;
 }
 void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size,
                                 bool locked, bool autotune,
                                 const std::string& autotune_file,
                                 const std::string& precision,
                                 bool adaptive_seqlen,
                                 bool enable_multi_stream) {
  enable_kunlunxin = true;
  device_id = kunlunxin_id;
  kunlunxin_l3_workspace_size = l3_workspace_size;
  kunlunxin_locked = locked;
  kunlunxin_autotune = autotune;
  kunlunxin_autotune_file = autotune_file;
  kunlunxin_precision = precision;
  kunlunxin_adaptive_seqlen = adaptive_seqlen;
  kunlunxin_enable_multi_stream = enable_multi_stream;
  device = Device::KUNLUNXIN;
 }
 void RuntimeOption::UseAscend() {
  enable_ascend = true;
  device = Device::ASCEND;
 }
 void RuntimeOption::UseSophgo() {
  device = Device::SOPHGOTPUD;
  UseSophgoBackend();
 }
 void RuntimeOption::SetExternalStream(void* external_stream) {
  external_stream_ = external_stream;
 }
 void RuntimeOption::SetCpuThreadNum(int thread_num) {
  FDASSERT(thread_num > 0, "The thread_num must be greater than 0.");
  cpu_thread_num = thread_num;
 }
 void RuntimeOption::SetOrtGraphOptLevel(int level) {
  std::vector<int> supported_level{-1, 0, 1, 2};
  auto valid_level = std::find(supported_level.begin(), supported_level.end(),
                               level) != supported_level.end();
  FDASSERT(valid_level, "The level must be -1, 0, 1, 2.");
  ort_graph_opt_level = level;
 }
 // use paddle inference backend
 void RuntimeOption::UsePaddleBackend() {
 #ifdef ENABLE_PADDLE_BACKEND
  backend = Backend::PDINFER;
 #else
  FDASSERT(false, "The FastDeploy didn't compile with Paddle Inference.");
 #endif
 }
 // use onnxruntime backend
 void RuntimeOption::UseOrtBackend() {
 #ifdef ENABLE_ORT_BACKEND
  backend = Backend::ORT;
 #else
  FDASSERT(false, "The FastDeploy didn't compile with OrtBackend.");
 #endif
 }
 // use sophgoruntime backend
 void RuntimeOption::UseSophgoBackend() {
 #ifdef ENABLE_SOPHGO_BACKEND
  backend = Backend::SOPHGOTPU;
 #else
  FDASSERT(false, "The FastDeploy didn't compile with SophgoBackend.");
 #endif
 }
 // use poros backend
 void RuntimeOption::UsePorosBackend() {
 #ifdef ENABLE_POROS_BACKEND
  backend = Backend::POROS;
 #else
  FDASSERT(false, "The FastDeploy didn't compile with PorosBackend.");
 #endif
 }
 void RuntimeOption::UseTrtBackend() {
 #ifdef ENABLE_TRT_BACKEND
  backend = Backend::TRT;
 #else
  FDASSERT(false, "The FastDeploy didn't compile with TrtBackend.");
 #endif
 }
 void RuntimeOption::UseOpenVINOBackend() {
 #ifdef ENABLE_OPENVINO_BACKEND
  backend = Backend::OPENVINO;
 #else
  FDASSERT(false, "The FastDeploy didn't compile with OpenVINO.");
 #endif
 }
 void RuntimeOption::UseLiteBackend() {
 #ifdef ENABLE_LITE_BACKEND
  backend = Backend::LITE;
 #else
  FDASSERT(false, "The FastDeploy didn't compile with Paddle Lite.");
 #endif
 }
 void RuntimeOption::SetPaddleMKLDNN(bool pd_mkldnn) {
  pd_enable_mkldnn = pd_mkldnn;
 }
 void RuntimeOption::DeletePaddleBackendPass(const std::string& pass_name) {
  pd_delete_pass_names.push_back(pass_name);
 }
 void RuntimeOption::EnablePaddleLogInfo() { pd_enable_log_info = true; }
 void RuntimeOption::DisablePaddleLogInfo() { pd_enable_log_info = false; }
 void RuntimeOption::EnablePaddleToTrt() {
  FDASSERT(backend == Backend::TRT,
           "Should call UseTrtBackend() before call EnablePaddleToTrt().");
 #ifdef ENABLE_PADDLE_BACKEND
  FDINFO << "While using TrtBackend with EnablePaddleToTrt, FastDeploy will "
            "change to use Paddle Inference Backend."
         << std::endl;
  backend = Backend::PDINFER;
  pd_enable_trt = true;
 #else
  FDASSERT(false,
           "While using TrtBackend with EnablePaddleToTrt, require the "
           "FastDeploy is compiled with Paddle Inference Backend, "
           "please rebuild your FastDeploy.");
 #endif
 }
 void RuntimeOption::SetPaddleMKLDNNCacheSize(int size) {
  FDASSERT(size > 0, "Parameter size must greater than 0.");
  pd_mkldnn_cache_size = size;
 }
 void RuntimeOption::SetOpenVINODevice(const std::string& name) {
  openvino_device = name;
 }
 void RuntimeOption::EnableLiteFP16() { lite_enable_fp16 = true; }
 void RuntimeOption::DisableLiteFP16() { lite_enable_fp16 = false; }
 void RuntimeOption::EnableLiteInt8() { lite_enable_int8 = true; }
 void RuntimeOption::DisableLiteInt8() { lite_enable_int8 = false; }
 void RuntimeOption::SetLitePowerMode(LitePowerMode mode) {
  lite_power_mode = mode;
 }
 void RuntimeOption::SetLiteOptimizedModelDir(
    const std::string& optimized_model_dir) {
  lite_optimized_model_dir = optimized_model_dir;
 }
 void RuntimeOption::SetLiteSubgraphPartitionPath(
    const std::string& nnadapter_subgraph_partition_config_path) {
  lite_nnadapter_subgraph_partition_config_path =
      nnadapter_subgraph_partition_config_path;
 }
 void RuntimeOption::SetLiteSubgraphPartitionConfigBuffer(
    const std::string& nnadapter_subgraph_partition_config_buffer) {
  lite_nnadapter_subgraph_partition_config_buffer =
      nnadapter_subgraph_partition_config_buffer;
 }
 void RuntimeOption::SetLiteDeviceNames(
    const std::vector<std::string>& nnadapter_device_names) {
  lite_nnadapter_device_names = nnadapter_device_names;
 }
 void RuntimeOption::SetLiteContextProperties(
    const std::string& nnadapter_context_properties) {
  lite_nnadapter_context_properties = nnadapter_context_properties;
 }
 void RuntimeOption::SetLiteModelCacheDir(
    const std::string& nnadapter_model_cache_dir) {
  lite_nnadapter_model_cache_dir = nnadapter_model_cache_dir;
 }
 void RuntimeOption::SetLiteDynamicShapeInfo(
    const std::map<std::string, std::vector<std::vector<int64_t>>>&
        nnadapter_dynamic_shape_info) {
  lite_nnadapter_dynamic_shape_info = nnadapter_dynamic_shape_info;
 }
 void RuntimeOption::SetLiteMixedPrecisionQuantizationConfigPath(
    const std::string& nnadapter_mixed_precision_quantization_config_path) {
  lite_nnadapter_mixed_precision_quantization_config_path =
      nnadapter_mixed_precision_quantization_config_path;
 }
 void RuntimeOption::SetTrtInputShape(const std::string& input_name,
                                     const std::vector<int32_t>& min_shape,
                                     const std::vector<int32_t>& opt_shape,
                                     const std::vector<int32_t>& max_shape) {
  trt_min_shape[input_name].clear();
  trt_max_shape[input_name].clear();
  trt_opt_shape[input_name].clear();
  trt_min_shape[input_name].assign(min_shape.begin(), min_shape.end());
  if (opt_shape.size() == 0) {
    trt_opt_shape[input_name].assign(min_shape.begin(), min_shape.end());
  } else {
    trt_opt_shape[input_name].assign(opt_shape.begin(), opt_shape.end());
  }
  if (max_shape.size() == 0) {
    trt_max_shape[input_name].assign(min_shape.begin(), min_shape.end());
  } else {
    trt_max_shape[input_name].assign(max_shape.begin(), max_shape.end());
  }
 }
 void RuntimeOption::SetTrtMaxWorkspaceSize(size_t max_workspace_size) {
  trt_max_workspace_size = max_workspace_size;
 }
 void RuntimeOption::SetTrtMaxBatchSize(size_t max_batch_size) {
  trt_max_batch_size = max_batch_size;
 }
 void RuntimeOption::EnableTrtFP16() { trt_enable_fp16 = true; }
 void RuntimeOption::DisableTrtFP16() { trt_enable_fp16 = false; }
 void RuntimeOption::EnablePinnedMemory() { enable_pinned_memory = true; }
 void RuntimeOption::DisablePinnedMemory() { enable_pinned_memory = false; }
 void RuntimeOption::SetTrtCacheFile(const std::string& cache_file_path) {
  trt_serialize_file = cache_file_path;
 }
 void RuntimeOption::SetOpenVINOStreams(int num_streams) {
  ov_num_streams = num_streams;
 }
 bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
                      const RuntimeOption& _option) {
 #ifdef ENABLE_POROS_BACKEND
  option = _option;
  auto poros_option = PorosBackendOption();
  poros_option.use_gpu = (option.device == Device::GPU) ? true : false;
  poros_option.gpu_id = option.device_id;
  poros_option.long_to_int = option.long_to_int;
  poros_option.use_nvidia_tf32 = option.use_nvidia_tf32;
  poros_option.unconst_ops_thres = option.unconst_ops_thres;
  poros_option.poros_file = option.poros_file;
  poros_option.is_dynamic = option.is_dynamic;
  poros_option.enable_fp16 = option.trt_enable_fp16;
  poros_option.max_batch_size = option.trt_max_batch_size;
  poros_option.max_workspace_size = option.trt_max_workspace_size;
  FDASSERT(
      option.model_format == ModelFormat::TORCHSCRIPT,
      "PorosBackend only support model format of ModelFormat::TORCHSCRIPT.");
  backend_ = utils::make_unique<PorosBackend>();
  auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get());
  FDASSERT(
      casted_backend->Compile(option.model_file, prewarm_tensors, poros_option),
      "Load model from Torchscript failed while initliazing PorosBackend.");
 #else
  FDASSERT(false,
           "PorosBackend is not available, please compiled with "
           "ENABLE_POROS_BACKEND=ON.");
 #endif
  return true;
 }
 void RuntimeOption::EnablePaddleTrtCollectShape() { pd_collect_shape = true; }
 void RuntimeOption::DisablePaddleTrtCollectShape() { pd_collect_shape = false; }
 void RuntimeOption::DisablePaddleTrtOPs(const std::vector<std::string>& ops) {
  trt_disabled_ops_.insert(trt_disabled_ops_.end(), ops.begin(), ops.end());
 }
 void RuntimeOption::UseIpu(int device_num, int micro_batch_size,
                           bool enable_pipelining, int batches_per_step) {
 #ifdef WITH_IPU
  device = Device::IPU;
  ipu_device_num = device_num;
  ipu_micro_batch_size = micro_batch_size;
  ipu_enable_pipelining = enable_pipelining;
  ipu_batches_per_step = batches_per_step;
 #else
  FDWARNING << "The FastDeploy didn't compile with IPU, will force to use CPU."
            << std::endl;
  device = Device::CPU;
 #endif
 }
 void RuntimeOption::SetIpuConfig(bool enable_fp16, int replica_num,
                                 float available_memory_proportion,
                                 bool enable_half_partial) {
  ipu_enable_fp16 = enable_fp16;
  ipu_replica_num = replica_num;
  ipu_available_memory_proportion = available_memory_proportion;
  ipu_enable_half_partial = enable_half_partial;
 }
 }  // namespace fastdeploy
--- a/fastdeploy/runtime/runtime_option.h
+++ b/fastdeploy/runtime/runtime_option.h
@@ -0,0 +1,482 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 /*! \file runtime_option.h
    \brief A brief file description.
    More details
 */
 #pragma once
 #include <algorithm>
 #include <map>
 #include <vector>
 #include "fastdeploy/runtime/enum_variables.h"
 #include "fastdeploy/backends/lite/option.h"
 #include "fastdeploy/backends/openvino/option.h"
 #include "fastdeploy/backends/ort/option.h"
 #include "fastdeploy/backends/paddle/option.h"
 #include "fastdeploy/backends/poros/option.h"
 #include "fastdeploy/backends/rknpu2/option.h"
 #include "fastdeploy/backends/sophgo/option.h"
 #include "fastdeploy/backends/tensorrt/option.h"
 namespace fastdeploy {
 /**
 * @brief Get all the available inference backend in FastDeploy
 */
 FASTDEPLOY_DECL std::vector<Backend> GetAvailableBackends();
 /**
 * @brief Check if the inference backend available
 */
 FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend);
 bool CheckModelFormat(const std::string& model_file,
                      const ModelFormat& model_format);
 ModelFormat GuessModelFormat(const std::string& model_file);
 /*! @brief Option object used when create a new Runtime object
 */
 struct FASTDEPLOY_DECL RuntimeOption {
  /** \brief Set path of model file and parameter file
   *
   * \param[in] model_path Path of model file, e.g ResNet50/model.pdmodel for Paddle format model / ResNet50/model.onnx for ONNX format model
   * \param[in] params_path Path of parameter file, this only used when the model format is Paddle, e.g Resnet50/model.pdiparams
   * \param[in] format Format of the loaded model
   */
  void SetModelPath(const std::string& model_path,
                    const std::string& params_path = "",
                    const ModelFormat& format = ModelFormat::PADDLE);
  /** \brief Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory
   *
   * \param[in] model_buffer The memory buffer of model
   * \param[in] model_buffer_size The size of the model data
   * \param[in] params_buffer The memory buffer of the combined parameters file
   * \param[in] params_buffer_size The size of the combined parameters data
   * \param[in] format Format of the loaded model
   */
  void SetModelBuffer(const char* model_buffer, size_t model_buffer_size,
                      const char* params_buffer, size_t params_buffer_size,
                      const ModelFormat& format = ModelFormat::PADDLE);
  /// Use cpu to inference, the runtime will inference on CPU by default
  void UseCpu();
  /// Use Nvidia GPU to inference
  void UseGpu(int gpu_id = 0);
  void UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name =
                     fastdeploy::rknpu2::CpuName::RK3588,
                 fastdeploy::rknpu2::CoreMask rknpu2_core =
                     fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0);
  /// Use TimVX to inference
  void UseTimVX();
  /// Use Huawei Ascend to inference
  void UseAscend();
  ///
  /// \brief Turn on KunlunXin XPU.
  ///
  /// \param kunlunxin_id the KunlunXin XPU card to use (default is 0).
  /// \param l3_workspace_size The size of the video memory allocated by the l3
  ///         cache, the maximum is 16M.
  /// \param locked Whether the allocated L3 cache can be locked. If false,
  ///       it means that the L3 cache is not locked, and the allocated L3
  ///       cache can be shared by multiple models, and multiple models
  ///       sharing the L3 cache will be executed sequentially on the card.
  /// \param autotune Whether to autotune the conv operator in the model. If
  ///       true, when the conv operator of a certain dimension is executed
  ///       for the first time, it will automatically search for a better
  ///       algorithm to improve the performance of subsequent conv operators
  ///       of the same dimension.
  /// \param autotune_file Specify the path of the autotune file. If
  ///       autotune_file is specified, the algorithm specified in the
  ///       file will be used and autotune will not be performed again.
  /// \param precision Calculation accuracy of multi_encoder
  /// \param adaptive_seqlen Is the input of multi_encoder variable length
  /// \param enable_multi_stream Whether to enable the multi stream of
  ///        KunlunXin XPU.
  ///
  void UseKunlunXin(int kunlunxin_id = 0, int l3_workspace_size = 0xfffc00,
                    bool locked = false, bool autotune = true,
                    const std::string& autotune_file = "",
                    const std::string& precision = "int16",
                    bool adaptive_seqlen = false,
                    bool enable_multi_stream = false);
  /// Use Sophgo to inference
  void UseSophgo();
  void SetExternalStream(void* external_stream);
  /*
   * @brief Set number of cpu threads while inference on CPU, by default it will decided by the different backends
   */
  void SetCpuThreadNum(int thread_num);
  /// Set ORT graph opt level, default is decide by ONNX Runtime itself
  void SetOrtGraphOptLevel(int level = -1);
  /// Set Paddle Inference as inference backend, support CPU/GPU
  void UsePaddleBackend();
  /// Wrapper function of UsePaddleBackend()
  void UsePaddleInferBackend() { return UsePaddleBackend(); }
  /// Set ONNX Runtime as inference backend, support CPU/GPU
  void UseOrtBackend();
  /// Set SOPHGO Runtime as inference backend, support CPU/GPU
  void UseSophgoBackend();
  /// Set TensorRT as inference backend, only support GPU
  void UseTrtBackend();
  /// Set Poros backend as inference backend, support CPU/GPU
  void UsePorosBackend();
  /// Set OpenVINO as inference backend, only support CPU
  void UseOpenVINOBackend();
  /// Set Paddle Lite as inference backend, only support arm cpu
  void UseLiteBackend();
  /// Wrapper function of UseLiteBackend()
  void UsePaddleLiteBackend() { return UseLiteBackend(); }
  /// Set mkldnn switch while using Paddle Inference as inference backend
  void SetPaddleMKLDNN(bool pd_mkldnn = true);
  /*
   * @brief If TensorRT backend is used, EnablePaddleToTrt will change to use Paddle Inference backend, and use its integrated TensorRT instead.
   */
  void EnablePaddleToTrt();
  /**
   * @brief Delete pass by name while using Paddle Inference as inference backend, this can be called multiple times to delete a set of passes
   */
  void DeletePaddleBackendPass(const std::string& delete_pass_name);
  /**
   * @brief Enable print debug information while using Paddle Inference as inference backend, the backend disable the debug information by default
   */
  void EnablePaddleLogInfo();
  /**
   * @brief Disable print debug information while using Paddle Inference as inference backend
   */
  void DisablePaddleLogInfo();
  /**
   * @brief Set shape cache size while using Paddle Inference with mkldnn, by default it will cache all the difference shape
   */
  void SetPaddleMKLDNNCacheSize(int size);
  /**
   * @brief Set device name for OpenVINO, default 'CPU', can also be 'AUTO', 'GPU', 'GPU.1'....
   */
  void SetOpenVINODevice(const std::string& name = "CPU");
  /**
   * @brief Set shape info for OpenVINO
   */
  void SetOpenVINOShapeInfo(
      const std::map<std::string, std::vector<int64_t>>& shape_info) {
    ov_shape_infos = shape_info;
  }
  /**
   * @brief While use OpenVINO backend with intel GPU, use this interface to specify operators run on CPU
   */
  void SetOpenVINOCpuOperators(const std::vector<std::string>& operators) {
    ov_cpu_operators = operators;
  }
  /**
   * @brief Set optimzed model dir for Paddle Lite backend.
   */
  void SetLiteOptimizedModelDir(const std::string& optimized_model_dir);
  /**
   * @brief Set subgraph partition path for Paddle Lite backend.
   */
  void SetLiteSubgraphPartitionPath(
      const std::string& nnadapter_subgraph_partition_config_path);
  /**
   * @brief Set subgraph partition path for Paddle Lite backend.
   */
  void SetLiteSubgraphPartitionConfigBuffer(
      const std::string& nnadapter_subgraph_partition_config_buffer);
  /**
   * @brief Set device name for Paddle Lite backend.
   */
  void
  SetLiteDeviceNames(const std::vector<std::string>& nnadapter_device_names);
  /**
   * @brief Set context properties for Paddle Lite backend.
   */
  void
  SetLiteContextProperties(const std::string& nnadapter_context_properties);
  /**
   * @brief Set model cache dir for Paddle Lite backend.
   */
  void SetLiteModelCacheDir(const std::string& nnadapter_model_cache_dir);
  /**
   * @brief Set dynamic shape info for Paddle Lite backend.
   */
  void SetLiteDynamicShapeInfo(
      const std::map<std::string, std::vector<std::vector<int64_t>>>&
          nnadapter_dynamic_shape_info);
  /**
   * @brief Set mixed precision quantization config path for Paddle Lite backend.
   */
  void SetLiteMixedPrecisionQuantizationConfigPath(
      const std::string& nnadapter_mixed_precision_quantization_config_path);
  /**
   * @brief enable half precision while use paddle lite backend
   */
  void EnableLiteFP16();
  /**
   * @brief disable half precision, change to full precision(float32)
   */
  void DisableLiteFP16();
  /**
    * @brief enable int8 precision while use paddle lite backend
    */
  void EnableLiteInt8();
  /**
    * @brief disable int8 precision, change to full precision(float32)
    */
  void DisableLiteInt8();
  /**
   * @brief Set power mode while using Paddle Lite as inference backend, mode(0: LITE_POWER_HIGH; 1: LITE_POWER_LOW; 2: LITE_POWER_FULL; 3: LITE_POWER_NO_BIND, 4: LITE_POWER_RAND_HIGH; 5: LITE_POWER_RAND_LOW, refer [paddle lite](https://paddle-lite.readthedocs.io/zh/latest/api_reference/cxx_api_doc.html#set-power-mode) for more details)
   */
  void SetLitePowerMode(LitePowerMode mode);
  /** \brief Set shape range of input tensor for the model that contain dynamic input shape while using TensorRT backend
   *
   * \param[in] input_name The name of input for the model which is dynamic shape
   * \param[in] min_shape The minimal shape for the input tensor
   * \param[in] opt_shape The optimized shape for the input tensor, just set the most common shape, if set as default value, it will keep same with min_shape
   * \param[in] max_shape The maximum shape for the input tensor, if set as default value, it will keep same with min_shape
   */
  void SetTrtInputShape(
      const std::string& input_name, const std::vector<int32_t>& min_shape,
      const std::vector<int32_t>& opt_shape = std::vector<int32_t>(),
      const std::vector<int32_t>& max_shape = std::vector<int32_t>());
  /// Set max_workspace_size for TensorRT, default 1<<30
  void SetTrtMaxWorkspaceSize(size_t trt_max_workspace_size);
  /// Set max_batch_size for TensorRT, default 32
  void SetTrtMaxBatchSize(size_t max_batch_size);
  /**
   * @brief Enable FP16 inference while using TensorRT backend. Notice: not all the GPU device support FP16, on those device doesn't support FP16, FastDeploy will fallback to FP32 automaticly
   */
  void EnableTrtFP16();
  /// Disable FP16 inference while using TensorRT backend
  void DisableTrtFP16();
  /**
   * @brief Set cache file path while use TensorRT backend. Loadding a Paddle/ONNX model and initialize TensorRT will take a long time, by this interface it will save the tensorrt engine to `cache_file_path`, and load it directly while execute the code again
   */
  void SetTrtCacheFile(const std::string& cache_file_path);
  /**
   * @brief Enable pinned memory. Pinned memory can be utilized to speedup the data transfer between CPU and GPU. Currently it's only suppurted in TRT backend and Paddle Inference backend.
   */
  void EnablePinnedMemory();
  /**
   * @brief Disable pinned memory
   */
  void DisablePinnedMemory();
  /**
   * @brief Enable to collect shape in paddle trt backend
   */
  void EnablePaddleTrtCollectShape();
  /**
   * @brief Disable to collect shape in paddle trt backend
   */
  void DisablePaddleTrtCollectShape();
  /**
   * @brief Prevent ops running in paddle trt backend
   */
  void DisablePaddleTrtOPs(const std::vector<std::string>& ops);
  /*
   * @brief Set number of streams by the OpenVINO backends
   */
  void SetOpenVINOStreams(int num_streams);
  /** \Use Graphcore IPU to inference.
   *
   * \param[in] device_num the number of IPUs.
   * \param[in] micro_batch_size the batch size in the graph, only work when graph has no batch shape info.
   * \param[in] enable_pipelining enable pipelining.
   * \param[in] batches_per_step the number of batches per run in pipelining.
   */
  void UseIpu(int device_num = 1, int micro_batch_size = 1,
              bool enable_pipelining = false, int batches_per_step = 1);
  /** \brief Set IPU config.
   *
   * \param[in] enable_fp16 enable fp16.
   * \param[in] replica_num the number of graph replication.
   * \param[in] available_memory_proportion the available memory proportion for matmul/conv.
   * \param[in] enable_half_partial enable fp16 partial for matmul, only work with fp16.
   */
  void SetIpuConfig(bool enable_fp16 = false, int replica_num = 1,
                    float available_memory_proportion = 1.0,
                    bool enable_half_partial = false);
  Backend backend = Backend::UNKNOWN;
  // for cpu inference and preprocess
  // default will let the backend choose their own default value
  int cpu_thread_num = -1;
  int device_id = 0;
  Device device = Device::CPU;
  void* external_stream_ = nullptr;
  bool enable_pinned_memory = false;
  // ======Only for ORT Backend========
  // -1 means use default value by ort
  // 0: ORT_DISABLE_ALL 1: ORT_ENABLE_BASIC 2: ORT_ENABLE_EXTENDED 3:
  // ORT_ENABLE_ALL
  int ort_graph_opt_level = -1;
  int ort_inter_op_num_threads = -1;
  // 0: ORT_SEQUENTIAL 1: ORT_PARALLEL
  int ort_execution_mode = -1;
  // ======Only for Paddle Backend=====
  bool pd_enable_mkldnn = true;
  bool pd_enable_log_info = false;
  bool pd_enable_trt = false;
  bool pd_collect_shape = false;
  int pd_mkldnn_cache_size = 1;
  std::vector<std::string> pd_delete_pass_names;
  // ======Only for Paddle IPU Backend =======
  int ipu_device_num = 1;
  int ipu_micro_batch_size = 1;
  bool ipu_enable_pipelining = false;
  int ipu_batches_per_step = 1;
  bool ipu_enable_fp16 = false;
  int ipu_replica_num = 1;
  float ipu_available_memory_proportion = 1.0;
  bool ipu_enable_half_partial = false;
  // ======Only for Paddle Lite Backend=====
  // 0: LITE_POWER_HIGH 1: LITE_POWER_LOW 2: LITE_POWER_FULL
  // 3: LITE_POWER_NO_BIND 4: LITE_POWER_RAND_HIGH
  // 5: LITE_POWER_RAND_LOW
  LitePowerMode lite_power_mode = LitePowerMode::LITE_POWER_NO_BIND;
  // enable int8 or not
  bool lite_enable_int8 = false;
  // enable fp16 or not
  bool lite_enable_fp16 = false;
  // optimized model dir for CxxConfig
  std::string lite_optimized_model_dir = "";
  std::string lite_nnadapter_subgraph_partition_config_path = "";
  // and other nnadapter settings for CxxConfig
  std::string lite_nnadapter_subgraph_partition_config_buffer = "";
  std::string lite_nnadapter_context_properties = "";
  std::string lite_nnadapter_model_cache_dir = "";
  std::string lite_nnadapter_mixed_precision_quantization_config_path = "";
  std::map<std::string, std::vector<std::vector<int64_t>>>
      lite_nnadapter_dynamic_shape_info = {{"", {{0}}}};
  std::vector<std::string> lite_nnadapter_device_names = {};
  bool enable_timvx = false;
  bool enable_ascend = false;
  bool enable_kunlunxin = false;
  // ======Only for Trt Backend=======
  std::map<std::string, std::vector<int32_t>> trt_max_shape;
  std::map<std::string, std::vector<int32_t>> trt_min_shape;
  std::map<std::string, std::vector<int32_t>> trt_opt_shape;
  std::string trt_serialize_file = "";
  bool trt_enable_fp16 = false;
  bool trt_enable_int8 = false;
  size_t trt_max_batch_size = 1;
  size_t trt_max_workspace_size = 1 << 30;
  // ======Only for PaddleTrt Backend=======
  std::vector<std::string> trt_disabled_ops_{};
  // ======Only for Poros Backend=======
  bool is_dynamic = false;
  bool long_to_int = true;
  bool use_nvidia_tf32 = false;
  int unconst_ops_thres = -1;
  std::string poros_file = "";
  // ======Only for OpenVINO Backend=======
  int ov_num_streams = 0;
  std::string openvino_device = "CPU";
  std::map<std::string, std::vector<int64_t>> ov_shape_infos;
  std::vector<std::string> ov_cpu_operators;
  // ======Only for RKNPU2 Backend=======
  fastdeploy::rknpu2::CpuName rknpu2_cpu_name_ =
      fastdeploy::rknpu2::CpuName::RK3588;
  fastdeploy::rknpu2::CoreMask rknpu2_core_mask_ =
      fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO;
  // ======Only for KunlunXin XPU Backend=======
  int kunlunxin_l3_workspace_size = 0xfffc00;
  bool kunlunxin_locked = false;
  bool kunlunxin_autotune = true;
  std::string kunlunxin_autotune_file = "";
  std::string kunlunxin_precision = "int16";
  bool kunlunxin_adaptive_seqlen = false;
  bool kunlunxin_enable_multi_stream = false;
  std::string model_file = "";   // Path of model file
  std::string params_file = "";  // Path of parameters file, can be empty
  // format of input model
  ModelFormat model_format = ModelFormat::PADDLE;
  std::string model_buffer_ = "";
  std::string params_buffer_ = "";
  size_t model_buffer_size_ = 0;
  size_t params_buffer_size_ = 0;
  bool model_from_memory_ = false;
 };
 }  // namespace fastdeploy
--- a/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc
+++ b/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc
@@ -75,14 +75,14 @@ YOLOv7End2EndTRT::YOLOv7End2EndTRT(const std::string& model_file,
  runtime_option.model_format = model_format;
  runtime_option.model_file = model_file;
  if (runtime_option.device != Device::GPU) {
-    FDWARNING << Str(runtime_option.device)
+    FDWARNING << runtime_option.device
              << " is not support for YOLOv7End2EndTRT,"
              << "will fallback to Device::GPU." << std::endl;
    runtime_option.device = Device::GPU;
  }
  if (runtime_option.backend != Backend::UNKNOWN) {
    if (runtime_option.backend != Backend::TRT) {
-      FDWARNING << Str(runtime_option.backend)
+      FDWARNING << runtime_option.backend
                << " is not support for YOLOv7End2EndTRT,"
                << "will fallback to Backend::TRT." << std::endl;
      runtime_option.backend = Backend::TRT;
@@ -347,4 +347,4 @@ bool YOLOv7End2EndTRT::Predict(cv::Mat* im, DetectionResult* result,
 }  // namespace detection
 }  // namespace vision
-}  // namespace fastdeploy
+}  // namespace fastdeploy