Support Poros Backend (#188)

* Add poros backend * Add torch lib * Add python3 lib * set c++ 14 for poros * fixed bugs * fixed grammer bugs * fixed grammer bugs * fixed code bugs * fixed code bugs * fixed CreatePorosValue bug * Add AtType2String for Log * fixed trt_option * fixed poros.cmake path * fixed grammer bug * fixed grammer bug * fixed ambiguous reference * fixed ambiguous reference * fixed reference error * fixed include files * rm ENABLE_TRT_BACKEND in poros * update CMakeLists.txt * fixed CMakeLists.txt * Add libtorch.so in CMakeLists.txt * Fixed CMakeLists.txt * Fixed CMakeLists.txt * Fixed copy bug * Fixed copy bug * Fixed copy bug * Fixed Cmake * Fixed Cmake * debug * debug * debug * debug * debug * debug * debug utils * debug utils * copy to cpu * rm log info * test share mem * test share mem * test share mem * test multi outputs * test multi outputs * test multi outputs * test multi outputs * test multi outputs * test multi outputs * test multi outputs * time cost * time cost * fixed bug * time collect * mem copy * mem copy * rm time log * rm share mem * fixed multi inputs bug * add set_input_dtypes func * add SetInputDtypes * fixed bug * fixed bug * fixed prewarm data order * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * fixed bug * Add compile func * Add compile func * Add compile func * Add is_dynamic option * Add is_dynamic option * Add is_dynamic option * Add is_dynamic option * rm infer log * add cuda11.6 poros lib * fixed bug * fixed bug * fixed multi outputs * fixed multi outputs * fixed multi outputs * fixed multi outputs * fixed multi outputs * fixed multi outputs * fixed multi outputs * fixed multi outputs * fixed multi outputs * fixed multi outputs * fixed multi outputs * rm logs * test * test * test * add test log * add test log * add test log * add test log * support cpu * support cpu * support cpu * support cpu * support member variable definition * rm useless log * fixed name * resolve conflict * resolve conflict * resolve conflict * fixed cmake * add GetInputInfos&GetOutputInfos * add GetInputInfos&GetOutputInfos * fixed bug * fixed runtime.py * add compile func * add np * deal with comments * rm to_inter func * add property
2025-10-06 00:57:33 +08:00 · 2022-10-17 15:28:12 +08:00
parent c8db2dd1ef
commit f5c94e5471
19 changed files with 1333 additions and 12 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -51,10 +51,11 @@ endif()
 ############################# Basic Options for FastDeploy ################################
 option(ENABLE_PADDLE_FRONTEND "Whether to enable PaddlePaddle frontend to support load paddle model in fastdeploy." ON)
-option(WITH_GPU "Whether WITH_GPU=ON, will enable onnxruntime-gpu/paddle-infernce-gpu" OFF)
+option(WITH_GPU "Whether WITH_GPU=ON, will enable onnxruntime-gpu/paddle-infernce-gpu/poros-gpu" OFF)
 option(ENABLE_ORT_BACKEND "Whether to enable onnxruntime backend." OFF)
 option(ENABLE_TRT_BACKEND "Whether to enable tensorrt backend." OFF)
 option(ENABLE_PADDLE_BACKEND "Whether to enable paddle backend." OFF)
 option(ENABLE_POROS_BACKEND "Whether to enable poros backend." OFF)
 option(ENABLE_OPENVINO_BACKEND "Whether to enable openvino backend." OFF)
 option(ENABLE_LITE_BACKEND "Whether to enable paddle lite backend." OFF)
 option(ENABLE_VISION "Whether to enable vision models usage." OFF)
@@ -108,6 +109,9 @@ if(WIN32)
    if(ENABLE_PADDLE_BACKEND)
      message(FATAL_ERROR "-DENABLE_PADDLE_BACKEND=ON doesn't support on non 64-bit system now.")
    endif()
    if(ENABLE_POROS_BACKEND)
    message(FATAL_ERROR "-DENABLE_POROS_BACKEND=ON doesn't support on non 64-bit system now.")
    endif()
    if(ENABLE_VISION)
      message(FATAL_ERROR "-DENABLE_VISION=ON doesn't support on non 64-bit system now.")
    endif()
@@ -165,13 +169,14 @@ file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastde
 file(GLOB_RECURSE FDTENSOR_FUNC_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/function/*.cc)
 file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/ort/*.cc)
 file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/paddle/*.cc)
 file(GLOB_RECURSE DEPLOY_POROS_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/poros/*.cc)
 file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cpp)
 file(GLOB_RECURSE DEPLOY_OPENVINO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/openvino/*.cc)
 file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/lite/*.cc)
 file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cc)
 file(GLOB_RECURSE DEPLOY_TEXT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/text/*.cc)
 file(GLOB_RECURSE DEPLOY_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*_pybind.cc)
-list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_OPENVINO_SRCS} ${DEPLOY_LITE_SRCS} ${DEPLOY_VISION_SRCS} ${DEPLOY_TEXT_SRCS})
+list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} ${DEPLOY_POROS_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_OPENVINO_SRCS} ${DEPLOY_LITE_SRCS} ${DEPLOY_VISION_SRCS} ${DEPLOY_TEXT_SRCS})
 set(DEPEND_LIBS "")
@@ -228,6 +233,62 @@ if(ENABLE_OPENVINO_BACKEND)
  include(${PROJECT_SOURCE_DIR}/cmake/openvino.cmake)
 endif()
 if(ENABLE_POROS_BACKEND)
  set(CMAKE_CXX_STANDARD 14)
  add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
  add_definitions(-DENABLE_POROS_BACKEND)
  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_POROS_SRCS})
  include(${PROJECT_SOURCE_DIR}/cmake/poros.cmake)
  list(APPEND DEPEND_LIBS external_poros)
  set(PYTHON_MINIMUM_VERSION 3.6)
  set(PYTORCH_MINIMUM_VERSION 1.9)
  set(TENSORRT_MINIMUM_VERSION 8.0)
  # find python3
  find_package(Python3 ${PYTHON_MINIMUM_VERSION} REQUIRED COMPONENTS Interpreter Development)
  message(STATUS "Found Python: ${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}.${Python3_VERSION_PATCH}")
  if (NOT Python3_SITELIB)
    message(FATAL_ERROR "site-packages not found. ")
  else ()
    message(STATUS "site-packages: ${Python3_SITELIB}")
  endif ()
  # find pytorch
  find_package(Torch ${PYTORCH_MINIMUM_VERSION} REQUIRED HINTS ${Python3_SITELIB})
  include_directories(${TORCH_INCLUDE_DIRS})
  include_directories(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/poros/common)
  list(APPEND DEPEND_LIBS ${TORCH_LIBRARY})
  if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/torch")
    file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/torch")
  endif()
  if(EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/torch/lib")
    file(REMOVE_RECURSE "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/torch/lib")
  endif()
  find_package(Python COMPONENTS Interpreter Development REQUIRED)
  message(STATUS "Copying ${TORCH_INSTALL_PREFIX}/lib to ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/torch/lib ...")
  execute_process(COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/scripts/copy_directory.py ${TORCH_INSTALL_PREFIX}/lib ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/torch/lib)
  # find trt
  if(NOT WITH_GPU)
  message(FATAL_ERROR "While -DENABLE_POROS_BACKEND=ON, must set -DWITH_GPU=ON, but now it's OFF")
  endif()
  if(NOT TRT_DIRECTORY)
    message(FATAL_ERROR "While -DENABLE_POROS_BACKEND=ON, must define -DTRT_DIRECTORY, e.g -DTRT_DIRECTORY=/Downloads/TensorRT-8.4")
  endif()
  include_directories(${TRT_DIRECTORY}/include)
  find_library(TRT_INFER_LIB nvinfer ${TRT_DIRECTORY}/lib)
  find_library(TRT_ONNX_LIB nvonnxparser ${TRT_DIRECTORY}/lib)
  find_library(TRT_PLUGIN_LIB nvinfer_plugin ${TRT_DIRECTORY}/lib)
  list(APPEND DEPEND_LIBS ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_PLUGIN_LIB})
  if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt")
    file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt")
  endif()
  if(EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib")
    file(REMOVE_RECURSE "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib")
  endif()
  find_package(Python COMPONENTS Interpreter Development REQUIRED)
  message(STATUS "Copying ${TRT_DIRECTORY}/lib to ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib ...")
  execute_process(COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/scripts/copy_directory.py ${TRT_DIRECTORY}/lib ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib)
 endif()
 if(WITH_GPU)
  if(APPLE)
    message(FATAL_ERROR "Cannot enable GPU while compling in Mac OSX.")
@@ -292,7 +353,7 @@ if(ENABLE_TRT_BACKEND)
                      OUTPUT_VARIABLE curr_out
                      ERROR_VARIABLE  curr_out)
      if(ret EQUAL "1")
-	message(FATAL_ERROR "Failed to patchelf tensorrt libraries.")
+	     message(FATAL_ERROR "Failed to patchelf tensorrt libraries.")
      endif()
      message(STATUS "result:${result} out:${curr_out}")
    endif()
--- a/FastDeploy.cmake.in
+++ b/FastDeploy.cmake.in
@@ -5,6 +5,8 @@ set(ENABLE_ORT_BACKEND @ENABLE_ORT_BACKEND@)
 set(ENABLE_LITE_BACKEND @ENABLE_LITE_BACKEND@)
 set(ENABLE_PADDLE_BACKEND @ENABLE_PADDLE_BACKEND@)
 set(ENABLE_OPENVINO_BACKEND @ENABLE_OPENVINO_BACKEND@)
 set(ENABLE_POROS_BACKEND @ENABLE_POROS_BACKEND@)
 set(POROS_VERSION @POROS_VERSION@)
 set(ENABLE_TRT_BACKEND @ENABLE_TRT_BACKEND@)
 set(ENABLE_PADDLE_FRONTEND @ENABLE_PADDLE_FRONTEND@)
 set(ENABLE_VISION @ENABLE_VISION@)
@@ -106,6 +108,12 @@ if(ENABLE_LITE_BACKEND)
  endif()
 endif()
 if(ENABLE_POROS_BACKEND)
  find_library(POROS_LIB poros ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/poros/lib NO_DEFAULT_PATH)
  list(APPEND FASTDEPLOY_LIBS ${POROS_LIB})
  list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/poros/include)
 endif()
 if(WITH_GPU)
  if (NOT CUDA_DIRECTORY)
    set(CUDA_DIRECTORY "/usr/local/cuda")
@@ -212,12 +220,16 @@ message(STATUS "  CXX flags                 : ${CMAKE_CXX_FLAGS}")
 message(STATUS "  WITH_GPU                  : ${WITH_GPU}")
 message(STATUS "  ENABLE_ORT_BACKEND        : ${ENABLE_ORT_BACKEND}")
 message(STATUS "  ENABLE_PADDLE_BACKEND     : ${ENABLE_PADDLE_BACKEND}")
 message(STATUS "  ENABLE_POROS_BACKEND      : ${ENABLE_POROS_BACKEND}")
 message(STATUS "  ENABLE_OPENVINO_BACKEND   : ${ENABLE_OPENVINO_BACKEND}")
 message(STATUS "  ENABLE_TRT_BACKEND        : ${ENABLE_TRT_BACKEND}")
 message(STATUS "  ENABLE_LITE_BACKEND       : ${ENABLE_LITE_BACKEND}")
 if(ENABLE_PADDLE_BACKEND)
  message(STATUS "  Paddle Inference version  : ${PADDLEINFERENCE_VERSION}")
 endif()
 if(ENABLE_POROS_BACKEND)
  message(STATUS "  Poros version  : ${POROS_VERSION}")
 endif()
 if(ENABLE_OPENVINO_BACKEND)
  message(STATUS "  OpenVINO version          : ${OPENVINO_VERSION}")
 endif()
--- a/cmake/poros.cmake
+++ b/cmake/poros.cmake
@@ -0,0 +1,76 @@
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 include(ExternalProject)
 set(POROS_PROJECT "extern_poros")
 set(POROS_PREFIX_DIR ${THIRD_PARTY_PATH}/poros)
 set(POROS_SOURCE_DIR
    ${THIRD_PARTY_PATH}/poros/src/${POROS_PROJECT})
 set(POROS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/poros)
 set(POROS_INC_DIR
    "${POROS_INSTALL_DIR}/include"
    CACHE PATH "poros include directory." FORCE)
 set(POROS_LIB_DIR
    "${POROS_INSTALL_DIR}/lib/"
    CACHE PATH "poros lib directory." FORCE)
 set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}"
                      "${POROS_LIB_DIR}")
 include_directories(${POROS_INC_DIR})
 if(WIN32)
  message(FATAL_ERROR "Poros Backend doesn't support Windows now.")
 elseif(APPLE)
  message(FATAL_ERROR "Poros Backend doesn't support Mac OSX now.")
 else()
  set(POROS_COMPILE_LIB
      "${POROS_INSTALL_DIR}/lib/libporos.so"
      CACHE FILEPATH "poros compile library." FORCE)
 endif(WIN32)
 set(POROS_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
 set(POROS_VERSION "0.1.0")
 if(WIN32)
  message(FATAL_ERROR "Poros Backend doesn't support Windows now.")
 elseif(APPLE)
  message(FATAL_ERROR "Poros Backend doesn't support Mac OSX now.")
 else()
  if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
    message(FATAL_ERROR "Poros Backend doesn't support linux aarch64 now.")
    set(POROS_FILE "poros-linux-aarch64-${POROS_VERSION}.tgz")
  else()
    set(POROS_FILE "poros-linux-x64-${POROS_VERSION}.tgz")
    if(WITH_GPU)
        set(POROS_FILE "poros-linux-x64-gpu-${POROS_VERSION}.tgz")
    endif()
  endif()
 endif()
 set(POROS_URL "${POROS_URL_BASE}${POROS_FILE}")
 ExternalProject_Add(
  ${POROS_PROJECT}
  ${EXTERNAL_PROJECT_LOG_ARGS}
  URL ${POROS_URL}
  PREFIX ${POROS_PREFIX_DIR}
  DOWNLOAD_NO_PROGRESS 1
  CONFIGURE_COMMAND ""
  BUILD_COMMAND ""
  UPDATE_COMMAND ""
  INSTALL_COMMAND
    ${CMAKE_COMMAND} -E copy_directory ${POROS_SOURCE_DIR} ${POROS_INSTALL_DIR}
  BUILD_BYPRODUCTS ${POROS_COMPILE_LIB})
 add_library(external_poros STATIC IMPORTED GLOBAL)
 set_property(TARGET external_poros PROPERTY IMPORTED_LOCATION
                                         ${POROS_COMPILE_LIB})
 add_dependencies(external_poros ${POROS_PROJECT})
--- a/cmake/summary.cmake
+++ b/cmake/summary.cmake
@@ -32,6 +32,7 @@ function(fastdeploy_summary)
  message(STATUS "  Paddle2ONNX version       : ${PADDLE2ONNX_VERSION}")
  message(STATUS "  ENABLE_ORT_BACKEND        : ${ENABLE_ORT_BACKEND}")
  message(STATUS "  ENABLE_PADDLE_BACKEND     : ${ENABLE_PADDLE_BACKEND}")
  message(STATUS "  ENABLE_POROS_BACKEND      : ${ENABLE_POROS_BACKEND}")
  message(STATUS "  ENABLE_TRT_BACKEND        : ${ENABLE_TRT_BACKEND}")
  message(STATUS "  ENABLE_OPENVINO_BACKEND   : ${ENABLE_OPENVINO_BACKEND}")
  if(ENABLE_ORT_BACKEND)
@@ -40,6 +41,9 @@ function(fastdeploy_summary)
  if(ENABLE_PADDLE_BACKEND)
    message(STATUS "  Paddle Inference version  : ${PADDLEINFERENCE_VERSION}")
  endif()
  if(ENABLE_POROS_BACKEND)
    message(STATUS "  Poros version  : ${POROS_VERSION}")
  endif()
  if(ENABLE_OPENVINO_BACKEND)
    message(STATUS "  OpenVINO version          : ${OPENVINO_VERSION}")
  endif()
--- a/docs/api_docs/cpp/main_page.md
+++ b/docs/api_docs/cpp/main_page.md
@@ -11,6 +11,7 @@ Currently, FastDeploy supported backends listed as below,
 | ONNX Runtime | CPU/Nvidia GPU | Paddle/ONNX | Windows(x64)/Linux(x64/aarch64)/Mac(x86/arm64) |
 | TensorRT | Nvidia GPU | Paddle/ONNX | Windows(x64)/Linux(x64)/Jetson |
 | OpenVINO | CPU | Paddle/ONNX | Windows(x64)/Linux(x64)/Mac(x86) |
 | Poros | CPU/Nvidia GPU | TorchScript | Linux(x64) |
 ### Example code
 - [Python examples](./)
--- a/fastdeploy/backends/poros/common/compile.h
+++ b/fastdeploy/backends/poros/common/compile.h
@@ -0,0 +1,167 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include <string>
 #include <algorithm>
 #include <unordered_map>
 #include <set>
 #include "torch/script.h"
 #include "iengine.h"
 #include "poros_module.h"
 namespace baidu {
 namespace mirana {
 namespace poros {
 /**
 * @brief  compile graph
 *
 * @param [in] module : original module
 * @param [in] input_ivalues : prewarm datas
 * @param [in] options : Inference options
 * @return porosmodule
 * @retval !nullptr => succeed  nullptr => failed
 **/
 std::unique_ptr<PorosModule> Compile(const torch::jit::Module& module,
        const std::vector<std::vector<c10::IValue> >& prewarm_datas,
        const PorosOptions& options);
 class Compiler {
 public:
    typedef std::unordered_map<const torch::jit::Node*, IEngine*> engine_map_t;
    typedef std::vector<std::vector<c10::IValue> > ivalue_vec_t;
    Compiler() : _origin_module(NULL) {}
    ~Compiler();
    /**
     * @brief initial Compiler
     *
     * @param [in] options : poros options
     * @return  int
     * @retval 0 => succeed  <0 => failed
    **/
    int init(const PorosOptions& options);
    /**
     * @brief compile whole graph
     *
     * @param [in] origin_module
     * @param [in] prewarm_datas : ivalue_vec_t, vector of IValue
     * @param [out] optimized_module : optimized graph
     * @return  int
     * @retval 0 => succeed  <0 => failed
    **/
    int compile(const torch::jit::Module& origin_module,
                const ivalue_vec_t& prewarm_datas,
                torch::jit::Module* optimized_module);
 private:
    /**
     * @brief preprocess this calculation graph
     *
     * @param [in] prewarm_datas : ivalue_vec_t, vector of IValue
     * @param [out] graph : preprcessed graph
     * @return  int
     * @retval 0 => succeed  <0 => failed
    **/
    int preprocess_graph(const ivalue_vec_t& prewarm_datas, std::shared_ptr<torch::jit::Graph>& graph);
    /**
     * @brief segement this calculation graph
     *
     * @param [in/out] graph
     * @return  int
     * @retval 0 => succeed  <0 => failed
    **/
    int segment_graph(std::shared_ptr<torch::jit::Graph>& graph);
    // Split subgraph（block)
    // The divided subgraph, as a subgraph, is associated with the block
    int segment_block(torch::jit::Block& block, IEngine* engine, int current_depth);
    // Subgraph optimization
    /**
     * @brief Subgraph optimization
     *
     * @param [in] prewarm_datas : ivalue_vec_t, vector of IValue
     * @param [in] opt_graph : ivalue_vec_t, vector of IValue
     * @param [out] optimized_module : optimized graph
     * @return  int
     * @retval 0 => succeed  <0 => failed
    **/
    int optimize_subgraph(const ivalue_vec_t& prewarm_datas,
            const std::shared_ptr<torch::jit::Graph>& opt_graph,
            torch::jit::Module* optimized_module);
    // Subgraph optimization(block)
    int optimize_subblock(torch::jit::Block* block,
            torch::jit::Module* optimized_module);
    /**
     * @brief Compile the subgraph into a new graph based on the engine
     *
     * @param [in] engine : The engine used by the subgraph
     * @param [in] subgraph_node : Subgraph node
     * @return [out] module : Transformed model
     * @retval 0 => succeed  <0 => failed
    **/
    int transform(IEngine* engine, torch::jit::Node& subgraph_node,
            torch::jit::Module& module);
    /**
     * @brief Select engine based on subgraph and options
     *
     * @param [in] node : Jit Node
     * @return  int
     * @retval 0 => succeed  <0 => failed
    **/
    IEngine* select_engine(const torch::jit::Node* n);
    /**
     * @brief destory
     *
     * @return  void
    **/
    void close();
 private:
    int _max_segment_depth{5};                    // Maximum subgraph segmentation depth
    ivalue_vec_t _prewarm_datas;                    // Prewarm datas
    PorosOptions _options;
    engine_map_t _engine_map;                       // The engine used to record the subgraph
    const torch::jit::Module* _origin_module;       // Origin_module
    std::atomic<int> _engine_index = {0};            // Record engine index
 };
 /**
 * @brief  compile graph, internal use
 *
 * @param [in] module : Origin module
 * @param [in] input_ivalues : Prewarm datas
 * @param [in] options : Inference options
 * @return optimized_module
 * @retval !nullptr => succeed  nullptr => failed
 **/
 std::unique_ptr<torch::jit::Module> CompileGraph(const torch::jit::Module& module,
                                const std::vector<std::vector<c10::IValue> >& prewarm_datas,
                                const PorosOptions& options);
 }  // namespace poros
 }  // namespace mirana
 }  // namespace baidu
--- a/fastdeploy/backends/poros/common/iengine.h
+++ b/fastdeploy/backends/poros/common/iengine.h
@@ -0,0 +1,84 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include <string>
 //from pytorch
 #include "torch/script.h"
 #include "torch/csrc/jit/ir/ir.h"
 #include "ATen/core/interned_strings.h"
 #include "plugin_create.h"
 namespace baidu {
 namespace mirana {
 namespace poros {
 /**
 * the base engine class
 * every registered engine should inherit from this IEngine
 **/
 struct PorosGraph {
    torch::jit::Graph* graph = NULL;
    torch::jit::Node* node = NULL;
 };
 typedef uint64_t EngineID;
 class IEngine : public IPlugin, public torch::CustomClassHolder{
 public:
    virtual ~IEngine() {}
    /**
     * @brief init, initialization must be successful if the init is successful
     * @return int
     * @retval 0 => success, <0 => fail
     **/
    virtual int init() = 0;
    /**
     * @brief During compilation, the subgraph is converted into the graph structure of the corresponding engine and stored inside the engine, so that the execute_engine at runtime can be called
     * @param [in] sub_graph  : subgraph
     * @return [res]int
     * @retval 0 => success, <0 => fail
     **/
    virtual int transform(const PorosGraph& sub_graph) = 0;
    /**
     * @brief Subgraph execution period logic
     * @param [in] inputs  : input tensor
     * @return [res] output tensor
     **/
    virtual std::vector<at::Tensor> excute_engine(const std::vector<at::Tensor>& inputs) = 0;
    virtual void register_module_attribute(const std::string& name, torch::jit::Module& module) = 0;
    // Logo
    virtual const std::string who_am_i() = 0;
    // Whether the node is supported by the current engine
    bool is_node_supported(const torch::jit::Node* node);
 public:
    std::pair<uint64_t, uint64_t> _num_io; // Number of input/output parameters
    EngineID _id;
 };
 }  // namespace poros
 }  // namespace mirana
 }  // namespace baidu
--- a/fastdeploy/backends/poros/common/plugin_create.h
+++ b/fastdeploy/backends/poros/common/plugin_create.h
@@ -0,0 +1,65 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include <unordered_map>
 #include <string>
 namespace baidu {
 namespace mirana {
 namespace poros {
 class IPlugin {
 public:
    virtual ~IPlugin() {}
    virtual const std::string who_am_i() = 0;
 };
 typedef IPlugin* (*plugin_creator_t)();
 typedef std::unordered_map<std::string, plugin_creator_t> plugin_creator_map_t;
 IPlugin* create_plugin(const std::string& plugin_name);
 IPlugin* create_plugin(const std::string& plugin_name, const plugin_creator_map_t& plugin_creator_map);
 void create_all_plugins(const plugin_creator_map_t& plugin_creator_map,
        std::unordered_map<std::string, IPlugin*>& plugin_m);
 //void create_all_plugins(std::unordered_map<std::string, IPlugin*>& plugin_m);
 template <typename PluginType>
 IPlugin* default_plugin_creator() {
    return new (std::nothrow)PluginType;
 }
 void register_plugin_creator(const std::string& plugin_name, plugin_creator_t creator);
 void register_plugin_creator(const std::string& plugin_name,
        plugin_creator_t creator, plugin_creator_map_t& plugin_creator_map);
 template <typename PluginType>
 void register_plugin_class(const std::string& plugin_name) {
    return register_plugin_creator(plugin_name, default_plugin_creator<PluginType>);
 }
 // This version is recommended
 template <typename PluginType>
 void register_plugin_class(const std::string& plugin_name, plugin_creator_map_t& plugin_creator_map) {
    return register_plugin_creator(plugin_name, default_plugin_creator<PluginType>, plugin_creator_map);
 }
 }//poros
 }//mirana
 }//baidu
 /* vim: set ts=4 sw=4 sts=4 tw=100 */
--- a/fastdeploy/backends/poros/common/poros_module.h
+++ b/fastdeploy/backends/poros/common/poros_module.h
@@ -0,0 +1,67 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include <string>
 #include "torch/script.h"
 #include "torch/csrc/jit/jit_log.h"
 // #include "ATen/Context.h"
 namespace baidu {
 namespace mirana {
 namespace poros {
 enum Device : int8_t {
    GPU = 0,
    CPU,
    XPU,
    UNKNOW
 };
 struct PorosOptions {
    Device device = GPU;
    bool debug = false;
    bool use_fp16 = false;
    bool is_dynamic = false;
    bool long_to_int = true;
    uint64_t max_workspace_size = 1ULL << 30;
    int32_t device_id = -1;
    int32_t unconst_ops_thres = -1;
    bool use_nvidia_tf32 = false;
 };
 class PorosModule : public torch::jit::Module {
 public:
    PorosModule(torch::jit::Module module) : torch::jit::Module(module) {
    }
    ~PorosModule() = default;
    void to_device(Device device){
        _options.device = device;
    }
    //c10::IValue forward(std::vector<c10::IValue> inputs);
    //void save(const std::string& filename);
 public:
    PorosOptions _options;
 };
 //via porosmodule.save
 std::unique_ptr<PorosModule> Load(const std::string& filename, const PorosOptions& options);
 }  // namespace poros
 }  // namespace mirana
 }  // namespace baidu
--- a/fastdeploy/backends/poros/poros_backend.cc
+++ b/fastdeploy/backends/poros/poros_backend.cc
@@ -0,0 +1,240 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fastdeploy/backends/poros/poros_backend.h"
 #include <sys/time.h>
 namespace fastdeploy {
 TensorInfo PorosBackend::GetInputInfo(int index) {
  // eager mode cann't obtain input information before infer
  TensorInfo info_input;
  return info_input;
 }
 TensorInfo PorosBackend::GetOutputInfo(int index) {
  // eager mode cann't obtain output information before infer
  TensorInfo info_output;
  return info_output;
 }
 std::vector<TensorInfo> PorosBackend::GetInputInfos() {
  // eager mode cann't obtain inputs information before infer
  std::vector<TensorInfo> info_inputs;
  return info_inputs;
 }
 std::vector<TensorInfo> PorosBackend::GetOutputInfos() {
  // eager mode cann't obtain outputs information before infer
  std::vector<TensorInfo> info_outputs;
  return info_outputs;
 }
 void PorosBackend::BuildOption(const PorosBackendOption& option) {
  _options.device = option.use_gpu ? baidu::mirana::poros::Device::GPU
                                   : baidu::mirana::poros::Device::CPU;
  _options.long_to_int = option.long_to_int;
  _options.use_nvidia_tf32 = option.use_nvidia_tf32;
  _options.device_id = option.gpu_id;
  _options.unconst_ops_thres = option.unconst_ops_thres;
  _options.is_dynamic = option.is_dynamic;
  _options.max_workspace_size = option.max_workspace_size;
  _options.use_fp16 = option.enable_fp16;
  return;
 }
 bool PorosBackend::Compile(const std::string& model_file,
                           std::vector<std::vector<FDTensor>>& prewarm_tensors,
                           const PorosBackendOption& option) {
  if (initialized_) {
    FDERROR << "PorosBackend is already initlized, cannot initialize again."
            << std::endl;
    return false;
  }
  BuildOption(option);
  torch::jit::Module mod;
  mod = torch::jit::load(model_file);
  mod.eval();
  if (option.use_gpu) {
    mod.to(at::kCUDA);
  } else {
    mod.to(at::kCPU);
  }
  // get inputs_nums and outputs_nums
  auto graph = mod.get_method("forward").graph();
  auto inputs = graph->inputs();
  // remove self node
  _numinputs = inputs.size() - 1;
  // FDTensor to at::Tensor
  std::vector<std::vector<c10::IValue>> prewarm_datas;
  bool is_backend_cuda = option.use_gpu ? true : false;
  for (size_t i = 0; i < prewarm_tensors.size(); ++i) {
    std::vector<c10::IValue> prewarm_data;
    for (size_t j = 0; j < prewarm_tensors[i].size(); ++j) {
      auto tensor = CreatePorosValue(prewarm_tensors[i][j], is_backend_cuda);
      prewarm_data.push_back(tensor);
    }
    prewarm_datas.push_back(prewarm_data);
  }
  // get outputs nums
  auto temp_result = mod.forward(prewarm_datas[0]);
  size_t outputs_nums = 0;
  if (temp_result.isTensor()) {
    outputs_nums += 1;
  } else if (temp_result.isTuple()) {
    auto temp_result_tuple = temp_result.toTuple();
    for (size_t i = 0; i < temp_result_tuple->elements().size(); ++i) {
      auto poros_tensor = temp_result_tuple->elements()[i];
      if (poros_tensor.isTensor()) {
        outputs_nums += 1;
      } else if (poros_tensor.isList()) {
        auto poros_tensor_list = poros_tensor.toList();
        outputs_nums += poros_tensor_list.size();
      } else if (poros_tensor.isTuple()) {
        auto poros_tensor_tuple = poros_tensor.toTuple();
        outputs_nums += poros_tensor_tuple->elements().size();
      } else {
        continue;
      }
    }
  }
  _numoutputs = outputs_nums;
  _poros_module = baidu::mirana::poros::Compile(mod, prewarm_datas, _options);
  if (_poros_module == nullptr) {
    FDERROR << "PorosBackend initlize Failed, try initialize again."
            << std::endl;
    return false;
  }
  initialized_ = true;
  return true;
 }
 bool PorosBackend::InitFromTorchScript(const std::string& model_file,
                                       const PorosBackendOption& option) {
  if (initialized_) {
    FDERROR << "PorosBackend is already initlized, cannot initialize again."
            << std::endl;
    return false;
  }
  if (option.poros_file != "") {
    std::ifstream fin(option.poros_file, std::ios::binary | std::ios::in);
    if (fin) {
      FDINFO << "Detect compiled Poros file in " << option.poros_file
             << ", will load it directly." << std::endl;
      fin.close();
      return InitFromPoros(option.poros_file, option);
    }
  }
  BuildOption(option);
  torch::jit::Module mod;
  mod = torch::jit::load(model_file);
  mod.eval();
  if (option.use_gpu) {
    mod.to(at::kCUDA);
  } else {
    mod.to(at::kCPU);
  }
  // get inputs_nums and outputs_nums
  auto graph = mod.get_method("forward").graph();
  auto inputs = graph->inputs();
  // remove self node
  _numinputs = inputs.size() - 1;
  auto outputs = graph->outputs();
  _numoutputs = outputs.size();
  _poros_module = baidu::mirana::poros::Compile(mod, _prewarm_datas, _options);
  if (_poros_module == nullptr) {
    FDERROR << "PorosBackend initlize Failed, try initialize again."
            << std::endl;
    return false;
  }
  initialized_ = true;
  return true;
 }
 bool PorosBackend::InitFromPoros(const std::string& model_file,
                                 const PorosBackendOption& option) {
  if (initialized_) {
    FDERROR << "PorosBackend is already initlized, cannot initialize again."
            << std::endl;
    return false;
  }
  BuildOption(option);
  _poros_module = baidu::mirana::poros::Load(model_file, _options);
  if (_poros_module == nullptr) {
    FDERROR << "PorosBackend initlize Failed, try initialize again."
            << std::endl;
    return false;
  }
  // get inputs_nums and outputs_nums
  auto graph = _poros_module->get_method("forward").graph();
  auto inputs = graph->inputs();
  // remove self node
  _numinputs = inputs.size() - 1;
  auto outputs = graph->outputs();
  _numoutputs = outputs.size();
  initialized_ = true;
  return true;
 }
 bool PorosBackend::Infer(std::vector<FDTensor>& inputs,
                         std::vector<FDTensor>* outputs) {
  // Convert FD Tensor to PyTorch Tensor
  std::vector<torch::jit::IValue> poros_inputs;
  bool is_backend_cuda =
      _options.device == baidu::mirana::poros::Device::GPU ? true : false;
  for (size_t i = 0; i < inputs.size(); ++i) {
    poros_inputs.push_back(CreatePorosValue(inputs[i], is_backend_cuda));
  }
  // Infer
  auto poros_outputs = _poros_module->forward(poros_inputs);
  // Convert PyTorch Tensor to FD Tensor
  if (poros_outputs.isTensor()) {
    CopyTensorToCpu(poros_outputs.toTensor(), &((*outputs)[0]),
                    is_backend_cuda);
  } else if (poros_outputs.isTuple()) {
    // deal with multi outputs
    auto poros_outputs_tuple = poros_outputs.toTuple();
    size_t index = 0;
    for (size_t i = 0; i < poros_outputs_tuple->elements().size(); ++i) {
      auto poros_tensor = poros_outputs_tuple->elements()[i];
      if (poros_tensor.isTensor()) {
        CopyTensorToCpu(poros_tensor.toTensor(), &((*outputs)[index]),
                        is_backend_cuda);
        index += 1;
      } else if (poros_tensor.isList()) {
        auto poros_tensor_list = poros_tensor.toList();
        for (const auto list_idx : c10::irange(0, poros_tensor_list.size())) {
          const auto& elt = poros_tensor_list.get(list_idx);
          CopyTensorToCpu(elt.toTensor(), &((*outputs)[index]),
                          is_backend_cuda);
          index += 1;
        }
      } else if (poros_tensor.isTuple()) {
        auto poros_tensor_tuple = poros_tensor.toTuple();
        for (size_t j = 0; j < poros_tensor_tuple->elements().size(); ++j) {
          CopyTensorToCpu(poros_tensor_tuple->elements()[j].toTensor(),
                          &((*outputs)[index]), is_backend_cuda);
          index += 1;
        }
      } else {
        continue;
      }
    }
  } else {
    FDERROR << "Convert to FDTensor Failed!!!!!" << std::endl;
  }
  return true;
 }
 }  // namespace fastdeploy
--- a/fastdeploy/backends/poros/poros_backend.h
+++ b/fastdeploy/backends/poros/poros_backend.h
@@ -0,0 +1,107 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include <iostream>
 #include <memory>
 #include <string>
 #include <vector>
 #include "fastdeploy/backends/backend.h"
 #include "fastdeploy/backends/poros/common/compile.h"
 #include "fastdeploy/backends/poros/common/poros_module.h"
 namespace fastdeploy {
 struct PorosBackendOption {
 #ifdef WITH_GPU
  bool use_gpu = true;
 #else
  bool use_gpu = false;
 #endif
  int gpu_id = 0;
  bool long_to_int = true;
  // There is calculation precision in tf32 mode on A10, it can bring some
  // performance improvement, but there may be diff
  bool use_nvidia_tf32 = false;
  // Threshold for the number of non-const ops
  int32_t unconst_ops_thres = -1;
  std::string poros_file = "";
  std::vector<FDDataType> prewarm_datatypes = {FDDataType::FP32};
  // TRT options
  bool enable_fp16 = false;
  bool enable_int8 = false;
  bool is_dynamic = false;
  size_t max_batch_size = 32;
  size_t max_workspace_size = 1 << 30;
 };
 // Convert data type from fastdeploy to poros
 at::ScalarType GetPorosDtype(const FDDataType& fd_dtype);
 // Convert data type from poros to fastdeploy
 FDDataType GetFdDtype(const at::ScalarType& dtype);
 // at::ScalarType to std::string for FDERROR
 std::string AtType2String(const at::ScalarType& dtype);
 // Create at::Tensor
 // is_backend_cuda specify if Poros use GPU Device
 // While is_backend_cuda = true, and tensor.device = Device::GPU
 at::Tensor CreatePorosValue(FDTensor& tensor, bool is_backend_cuda = false);
 // Copy memory data from at::Tensor to fastdeploy::FDTensor
 void CopyTensorToCpu(const at::Tensor& tensor, FDTensor* fd_tensor,
                     bool is_backend_cuda = false);
 class PorosBackend : public BaseBackend {
 public:
  PorosBackend() {}
  virtual ~PorosBackend() = default;
  void BuildOption(const PorosBackendOption& option);
  bool InitFromTorchScript(
      const std::string& model_file,
      const PorosBackendOption& option = PorosBackendOption());
  bool InitFromPoros(const std::string& model_file,
                     const PorosBackendOption& option = PorosBackendOption());
  bool Compile(const std::string& model_file,
               std::vector<std::vector<FDTensor>>& prewarm_tensors,
               const PorosBackendOption& option = PorosBackendOption());
  bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs);
  int NumInputs() const { return _numinputs; }
  int NumOutputs() const { return _numoutputs; }
  TensorInfo GetInputInfo(int index) override;
  TensorInfo GetOutputInfo(int index) override;
  std::vector<TensorInfo> GetInputInfos() override;
  std::vector<TensorInfo> GetOutputInfos() override;
 private:
  baidu::mirana::poros::PorosOptions _options;
  std::unique_ptr<baidu::mirana::poros::PorosModule> _poros_module;
  std::vector<std::vector<c10::IValue>> _prewarm_datas;
  int _numinputs = 1;
  int _numoutputs = 1;
 };
 }  // namespace fastdeploy
--- a/fastdeploy/backends/poros/utils.cc
+++ b/fastdeploy/backends/poros/utils.cc
@@ -0,0 +1,186 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fastdeploy/backends/poros/poros_backend.h"
 #ifdef WITH_GPU
 #include <cuda_runtime_api.h>
 #endif
 namespace fastdeploy {
 std::string AtType2String(const at::ScalarType& dtype) {
  std::string out;
  switch (dtype) {
    case at::kByte:
      out = "at::kByte";
      break;
    case at::kChar:
      out = "at::kChar";
      break;
    case at::kShort:
      out = "at::kShort";
      break;
    case at::kInt:
      out = "at::kInt";
      break;
    case at::kLong:
      out = "at::kLong";
      break;
    case at::kHalf:
      out = "at::kHalf";
      break;
    case at::kFloat:
      out = "at::kFloat";
      break;
    case at::kDouble:
      out = "at::kDouble";
      break;
    default:
      out = "at::UNKNOWN";
  }
  return out;
 }
 at::ScalarType GetPorosDtype(const FDDataType& fd_dtype) {
  if (fd_dtype == FDDataType::FP32) {
    return at::kFloat;
  } else if (fd_dtype == FDDataType::FP64) {
    return at::kDouble;
  } else if (fd_dtype == FDDataType::INT32) {
    return at::kInt;
  } else if (fd_dtype == FDDataType::INT64) {
    return at::kLong;
  }
  FDERROR << "Unrecognized fastdeply data type:" << Str(fd_dtype) << "."
          << std::endl;
  return at::kFloat;
 }
 FDDataType GetFdDtype(const at::ScalarType& poros_dtype) {
  if (poros_dtype == at::kFloat) {
    return FDDataType::FP32;
  } else if (poros_dtype == at::kDouble) {
    return FDDataType::FP64;
  } else if (poros_dtype == at::kInt) {
    return FDDataType::INT32;
  } else if (poros_dtype == at::kLong) {
    return FDDataType::INT64;
  }
  FDERROR << "Unrecognized poros data type:" << AtType2String(poros_dtype)
          << "." << std::endl;
  return FDDataType::FP32;
 }
 at::Tensor CreatePorosValue(FDTensor& tensor, bool is_backend_cuda) {
  FDASSERT(tensor.device == Device::GPU || tensor.device == Device::CPU,
           "Only support tensor which device is CPU or GPU for PorosBackend.");
  auto data_type = GetPorosDtype(tensor.dtype);
  size_t numel = tensor.Numel();
  at::Tensor poros_value;
  if (is_backend_cuda) {
    poros_value = std::move(
        at::empty(tensor.shape, {at::kCUDA}).to(data_type).contiguous());
  } else {
    poros_value = std::move(
        at::empty(tensor.shape, {at::kCPU}).to(data_type).contiguous());
  }
  if (data_type == at::kFloat) {
    if (is_backend_cuda) {
      cudaMemcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
                 numel * sizeof(float), cudaMemcpyHostToDevice);
    } else {
      memcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
             numel * sizeof(float));
    }
  } else if (data_type == at::kInt) {
    if (is_backend_cuda) {
      cudaMemcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
                 numel * sizeof(int32_t), cudaMemcpyHostToDevice);
    } else {
      memcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
             numel * sizeof(int32_t));
    }
  } else if (data_type == at::kLong) {
    if (is_backend_cuda) {
      cudaMemcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
                 numel * sizeof(int64_t), cudaMemcpyHostToDevice);
    } else {
      memcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
             numel * sizeof(int64_t));
    }
  } else if (data_type == at::kDouble) {
    if (is_backend_cuda) {
      cudaMemcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
                 numel * sizeof(double), cudaMemcpyHostToDevice);
    } else {
      memcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
             numel * sizeof(double));
    }
  } else {
    FDASSERT(false,
             "Unrecognized data type while calling "
             "PorosBackend::CreatePorosValue().");
  }
  return poros_value;
 }
 void CopyTensorToCpu(const at::Tensor& tensor, FDTensor* fd_tensor,
                     bool is_backend_cuda) {
  const auto data_type = tensor.scalar_type();
  std::vector<int64_t> shape;
  auto sizes = tensor.sizes();
  for (size_t i = 0; i < sizes.size(); i++) {
    shape.push_back(sizes[i]);
  }
  auto fd_dtype = GetFdDtype(data_type);
  fd_tensor->Resize(shape, fd_dtype);
  size_t numel = tensor.numel();
  // at::Tensor -> FDTensor
  if (data_type == at::kFloat) {
    if (is_backend_cuda) {
      cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(float),
                 cudaMemcpyDeviceToHost);
    } else {
      memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(float));
    }
    return;
  } else if (data_type == at::kInt) {
    if (is_backend_cuda) {
      cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int32_t),
                 cudaMemcpyDeviceToHost);
    } else {
      memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int32_t));
    }
    return;
  } else if (data_type == at::kLong) {
    if (is_backend_cuda) {
      cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int64_t),
                 cudaMemcpyDeviceToHost);
    } else {
      memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int64_t));
    }
    return;
  } else if (data_type == at::kDouble) {
    if (is_backend_cuda) {
      cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(double),
                 cudaMemcpyDeviceToHost);
    } else {
      memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(double));
    }
    return;
  }
 }
 }  // namespace fastdeploy
--- a/fastdeploy/core/config.h.in
+++ b/fastdeploy/core/config.h.in
@@ -29,6 +29,10 @@
 #cmakedefine ENABLE_PADDLE_BACKEND
 #endif
 #ifndef ENABLE_POROS_BACKEND
 #cmakedefine ENABLE_POROS_BACKEND
 #endif
 #ifndef ENABLE_OPENVINO_BACKEND
 #cmakedefine ENABLE_OPENVINO_BACKEND
 #endif
--- a/fastdeploy/pybind/runtime.cc
+++ b/fastdeploy/pybind/runtime.cc
@@ -24,6 +24,7 @@ void BindRuntime(pybind11::module& m) {
      .def("use_cpu", &RuntimeOption::UseCpu)
      .def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum)
      .def("use_paddle_backend", &RuntimeOption::UsePaddleBackend)
      .def("use_poros_backend", &RuntimeOption::UsePorosBackend)
      .def("use_ort_backend", &RuntimeOption::UseOrtBackend)
      .def("set_ort_graph_opt_level", &RuntimeOption::SetOrtGraphOptLevel)
      .def("use_trt_backend", &RuntimeOption::UseTrtBackend)
@@ -62,7 +63,12 @@ void BindRuntime(pybind11::module& m) {
      .def_readwrite("trt_enable_int8", &RuntimeOption::trt_enable_int8)
      .def_readwrite("trt_max_batch_size", &RuntimeOption::trt_max_batch_size)
      .def_readwrite("trt_max_workspace_size",
-                     &RuntimeOption::trt_max_workspace_size);
+                     &RuntimeOption::trt_max_workspace_size)
      .def_readwrite("is_dynamic", &RuntimeOption::is_dynamic)
      .def_readwrite("long_to_int", &RuntimeOption::long_to_int)
      .def_readwrite("use_nvidia_tf32", &RuntimeOption::use_nvidia_tf32)
      .def_readwrite("unconst_ops_thres", &RuntimeOption::unconst_ops_thres)
      .def_readwrite("poros_file", &RuntimeOption::poros_file);
  pybind11::class_<TensorInfo>(m, "TensorInfo")
      .def_readwrite("name", &TensorInfo::name)
@@ -72,6 +78,30 @@ void BindRuntime(pybind11::module& m) {
  pybind11::class_<Runtime>(m, "Runtime")
      .def(pybind11::init())
      .def("init", &Runtime::Init)
      .def("compile",
           [](Runtime& self,
              std::vector<std::vector<pybind11::array>>& warm_datas,
              const RuntimeOption& _option) {
             size_t rows = warm_datas.size();
             size_t columns = warm_datas[0].size();
             std::vector<std::vector<FDTensor>> warm_tensors(
                 rows, std::vector<FDTensor>(columns));
             for (size_t i = 0; i < rows; ++i) {
               for (size_t j = 0; j < columns; ++j) {
                 auto dtype =
                     NumpyDataTypeToFDDataType(warm_datas[i][j].dtype());
                 std::vector<int64_t> data_shape;
                 data_shape.insert(
                     data_shape.begin(), warm_datas[i][j].shape(),
                     warm_datas[i][j].shape() + warm_datas[i][j].ndim());
                 warm_tensors[i][j].Resize(data_shape, dtype);
                 memcpy(warm_tensors[i][j].MutableData(),
                        warm_datas[i][j].mutable_data(),
                        warm_datas[i][j].nbytes());
               }
             }
             return self.Compile(warm_tensors, _option);
           })
      .def("infer",
           [](Runtime& self, std::vector<FDTensor>& inputs) {
             std::vector<FDTensor> outputs(self.NumOutputs());
@@ -121,11 +151,13 @@ void BindRuntime(pybind11::module& m) {
      .value("UNKOWN", Backend::UNKNOWN)
      .value("ORT", Backend::ORT)
      .value("TRT", Backend::TRT)
      .value("POROS", Backend::POROS)
      .value("PDINFER", Backend::PDINFER)
      .value("LITE", Backend::LITE);
  pybind11::enum_<ModelFormat>(m, "ModelFormat", pybind11::arithmetic(),
                               "ModelFormat for inference.")
      .value("PADDLE", ModelFormat::PADDLE)
      .value("TORCHSCRIPT", ModelFormat::TORCHSCRIPT)
      .value("ONNX", ModelFormat::ONNX);
  pybind11::enum_<Device>(m, "Device", pybind11::arithmetic(),
                          "Device for inference.")
--- a/fastdeploy/runtime.cc
+++ b/fastdeploy/runtime.cc
@@ -29,6 +29,10 @@
 #include "fastdeploy/backends/paddle/paddle_backend.h"
 #endif
 #ifdef ENABLE_POROS_BACKEND
 #include "fastdeploy/backends/poros/poros_backend.h"
 #endif
 #ifdef ENABLE_OPENVINO_BACKEND
 #include "fastdeploy/backends/openvino/ov_backend.h"
 #endif
@@ -50,6 +54,9 @@ std::vector<Backend> GetAvailableBackends() {
 #ifdef ENABLE_PADDLE_BACKEND
  backends.push_back(Backend::PDINFER);
 #endif
 #ifdef ENABLE_POROS_BACKEND
  backends.push_back(Backend::POROS);
 #endif
 #ifdef ENABLE_OPENVINO_BACKEND
  backends.push_back(Backend::OPENVINO);
 #endif
@@ -76,6 +83,8 @@ std::string Str(const Backend& b) {
    return "Backend::TRT";
  } else if (b == Backend::PDINFER) {
    return "Backend::PDINFER";
  } else if (b == Backend::POROS) {
    return "Backend::POROS";
  } else if (b == Backend::OPENVINO) {
    return "Backend::OPENVINO";
  } else if (b == Backend::LITE) {
@@ -89,6 +98,8 @@ std::string Str(const ModelFormat& f) {
    return "ModelFormat::PADDLE";
  } else if (f == ModelFormat::ONNX) {
    return "ModelFormat::ONNX";
  } else if (f == ModelFormat::TORCHSCRIPT) {
    return "ModelFormat::TORCHSCRIPT";
  }
  return "UNKNOWN-ModelFormat";
 }
@@ -102,6 +113,8 @@ std::ostream& operator<<(std::ostream& out, const Backend& backend) {
    out << "Backend::PDINFER";
  } else if (backend == Backend::OPENVINO) {
    out << "Backend::OPENVINO";
  } else if (backend == Backend::POROS) {
    out << "Backend::POROS";
  } else if (backend == Backend::LITE) {
    out << "Backend::LITE";
  }
@@ -114,6 +127,8 @@ std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
    out << "ModelFormat::PADDLE";
  } else if (format == ModelFormat::ONNX) {
    out << "ModelFormat::ONNX";
  } else if (format == ModelFormat::TORCHSCRIPT) {
    out << "ModelFormat::TORCHSCRIPT";
  }
  out << "UNKNOWN-ModelFormat";
  return out;
@@ -137,9 +152,17 @@ bool CheckModelFormat(const std::string& model_file,
              << model_file << std::endl;
      return false;
    }
  } else if (model_format == ModelFormat::TORCHSCRIPT) {
    if (model_file.size() < 3 ||
        model_file.substr(model_file.size() - 3, 3) != ".pt") {
      FDERROR << "With model format of ModelFormat::TORCHSCRIPT, the model file "
                 "should ends with `.pt`, but now it's "
              << model_file << std::endl;
      return false;
    }
  } else {
    FDERROR << "Only support model format with frontend ModelFormat::PADDLE / "
-               "ModelFormat::ONNX."
+               "ModelFormat::ONNX / ModelFormat::TORCHSCRIPT."
            << std::endl;
    return false;
  }
@@ -155,6 +178,10 @@ ModelFormat GuessModelFormat(const std::string& model_file) {
             model_file.substr(model_file.size() - 5, 5) == ".onnx") {
    FDINFO << "Model Format: ONNX." << std::endl;
    return ModelFormat::ONNX;
  } else if (model_file.size() > 3 &&
             model_file.substr(model_file.size() - 3, 3) == ".pt") {
    FDINFO << "Model Format: Torchscript." << std::endl;
    return ModelFormat::TORCHSCRIPT;
  }
  FDERROR << "Cannot guess which model format you are using, please set "
@@ -173,10 +200,13 @@ void RuntimeOption::SetModelPath(const std::string& model_path,
  } else if (format == ModelFormat::ONNX) {
    model_file = model_path;
    model_format = ModelFormat::ONNX;
  } else if (format == ModelFormat::TORCHSCRIPT) {
    model_file = model_path;
    model_format = ModelFormat::TORCHSCRIPT;
  } else {
    FDASSERT(
        false,
-        "The model format only can be ModelFormat::PADDLE/ModelFormat::ONNX.");
+        "The model format only can be ModelFormat::PADDLE/ModelFormat::ONNX/ModelFormat::TORCHSCRIPT.");
  }
 }
@@ -223,6 +253,15 @@ void RuntimeOption::UseOrtBackend() {
 #endif
 }
 // use poros backend
 void RuntimeOption::UsePorosBackend() {
 #ifdef ENABLE_POROS_BACKEND
  backend = Backend::POROS;
 #else
  FDASSERT(false, "The FastDeploy didn't compile with PorosBackend.");
 #endif
 }
 void RuntimeOption::UseTrtBackend() {
 #ifdef ENABLE_TRT_BACKEND
  backend = Backend::TRT;
@@ -324,6 +363,36 @@ void RuntimeOption::SetTrtCacheFile(const std::string& cache_file_path) {
  trt_serialize_file = cache_file_path;
 }
 bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
                      const RuntimeOption& _option) {
 #ifdef ENABLE_POROS_BACKEND
  option = _option;
  auto poros_option = PorosBackendOption();
  poros_option.use_gpu = (option.device == Device::GPU) ? true : false;
  poros_option.gpu_id = option.device_id;
  poros_option.long_to_int = option.long_to_int;
  poros_option.use_nvidia_tf32 = option.use_nvidia_tf32;
  poros_option.unconst_ops_thres = option.unconst_ops_thres;
  poros_option.poros_file = option.poros_file;
  poros_option.is_dynamic = option.is_dynamic;
  poros_option.enable_fp16 = option.trt_enable_fp16;
  poros_option.max_batch_size = option.trt_max_batch_size;
  poros_option.max_workspace_size = option.trt_max_workspace_size;
  FDASSERT(option.model_format == ModelFormat::TORCHSCRIPT,
           "PorosBackend only support model format of ModelFormat::TORCHSCRIPT.");
  backend_ = utils::make_unique<PorosBackend>();
  auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get());
  FDASSERT(
      casted_backend->Compile(option.model_file, prewarm_tensors, poros_option),
      "Load model from Torchscript failed while initliazing PorosBackend.");
 #else
  FDASSERT(false,
           "PorosBackend is not available, please compiled with "
           "ENABLE_POROS_BACKEND=ON.");
 #endif
  return true;
 }
 bool Runtime::Init(const RuntimeOption& _option) {
  option = _option;
  if (option.model_format == ModelFormat::AUTOREC) {
@@ -334,6 +403,8 @@ bool Runtime::Init(const RuntimeOption& _option) {
      option.backend = Backend::ORT;
    } else if (IsBackendAvailable(Backend::PDINFER)) {
      option.backend = Backend::PDINFER;
    } else if (IsBackendAvailable(Backend::POROS)) {
      option.backend = Backend::POROS;
    } else if (IsBackendAvailable(Backend::OPENVINO)) {
      option.backend = Backend::OPENVINO;
    } else {
@@ -365,6 +436,15 @@ bool Runtime::Init(const RuntimeOption& _option) {
    CreatePaddleBackend();
    FDINFO << "Runtime initialized with Backend::PDINFER in "
           << Str(option.device) << "." << std::endl;
  } else if (option.backend == Backend::POROS) {
    FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
             "Backend::POROS only supports Device::CPU/Device::GPU.");
    FDASSERT(
        option.model_format == ModelFormat::TORCHSCRIPT,
        "Backend::POROS only supports model format of ModelFormat::TORCHSCRIPT.");
    FDINFO << "Runtime initialized with Backend::POROS in "
           << Str(option.device) << "." << std::endl;
    return true;
  } else if (option.backend == Backend::OPENVINO) {
    FDASSERT(option.device == Device::CPU,
             "Backend::OPENVINO only supports Device::CPU");
@@ -379,7 +459,8 @@ bool Runtime::Init(const RuntimeOption& _option) {
           << "." << std::endl;
  } else {
    FDERROR << "Runtime only support "
-               "Backend::ORT/Backend::TRT/Backend::PDINFER as backend now."
+               "Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as "
               "backend now."
            << std::endl;
    return false;
  }
--- a/fastdeploy/runtime.h
+++ b/fastdeploy/runtime.h
@@ -38,6 +38,7 @@ enum Backend {
  ORT,  ///< ONNX Runtime, support Paddle/ONNX format model, CPU / Nvidia GPU
  TRT,  ///< TensorRT, support Paddle/ONNX format model, Nvidia GPU only
  PDINFER,  ///< Paddle Inference, support Paddle format model, CPU / Nvidia GPU
  POROS,  ///< Poros, support TorchScript format model, CPU / Nvidia GPU
  OPENVINO,  ///< Intel OpenVINO, support Paddle/ONNX format, CPU only
  LITE,  ///< Paddle Lite, support Paddle format model, ARM CPU only
 };
@@ -47,6 +48,7 @@ enum ModelFormat {
  AUTOREC,  ///< Auto recognize the model format by model file name
  PADDLE,  ///< Model with paddlepaddle format
  ONNX,  ///< Model with ONNX format
  TORCHSCRIPT,  ///< Model with TorchScript format
 };
 FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
@@ -117,6 +119,9 @@ struct FASTDEPLOY_DECL RuntimeOption {
  /// Set TensorRT as inference backend, only support GPU
  void UseTrtBackend();
  /// Set Poros backend as inference backend, support CPU/GPU
  void UsePorosBackend();
  /// Set OpenVINO as inference backend, only support CPU
  void UseOpenVINOBackend();
@@ -243,6 +248,13 @@ struct FASTDEPLOY_DECL RuntimeOption {
  size_t trt_max_batch_size = 32;
  size_t trt_max_workspace_size = 1 << 30;
  // ======Only for Poros Backend=======
  bool is_dynamic = false;
  bool long_to_int = true;
  bool use_nvidia_tf32 = false;
  int unconst_ops_thres = -1;
  std::string poros_file = "";
  std::string model_file = "";   // Path of model file
  std::string params_file = "";  // Path of parameters file, can be empty
  ModelFormat model_format = ModelFormat::AUTOREC;  // format of input model
@@ -270,6 +282,15 @@ struct FASTDEPLOY_DECL Runtime {
  bool Infer(std::vector<FDTensor>& input_tensors,
             std::vector<FDTensor>* output_tensors);
  /** \brief Compile TorchScript Module, only for Poros backend
   *
   * \param[in] prewarm_tensors Prewarm datas for compile
   * \param[in] _option Runtime option
   * \return true if compile successed, otherwise false
   */
  bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
               const RuntimeOption& _option);
  /** \brief Get number of inputs
   */
  int NumInputs() { return backend_->NumInputs(); }
--- a/python/fastdeploy/c_lib_wrap.py.in
+++ b/python/fastdeploy/c_lib_wrap.py.in
@@ -34,6 +34,11 @@ def is_built_with_trt() -> bool:
 def is_built_with_paddle() -> bool:
    return True if "@ENABLE_PADDLE_BACKEND@" == "ON" else False
 def is_built_with_poros() ->bool:
    return True if "@ENABLE_POROS_BACKEND@" == "ON" else False
 def is_built_with_openvino() ->bool:
    return True if "@ENABLE_OPENVINO_BACKEND@" == "ON" else False
--- a/python/fastdeploy/runtime.py
+++ b/python/fastdeploy/runtime.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 from __future__ import absolute_import
 import logging
 import numpy as np
 from . import ModelFormat
 from . import c_lib_wrap as C
@@ -28,8 +29,24 @@ class Runtime:
        """
        self._runtime = C.Runtime()
        self.runtime_option = runtime_option
        assert self._runtime.init(
-            runtime_option._option), "Initialize Runtime Failed!"
+            self.runtime_option._option), "Initialize Runtime Failed!"
    def forward(self, *inputs):
        """Inference with input data for poros
        :param data: (list[str : numpy.ndarray])The input data list
        :return list of numpy.ndarray
        """
        if self.runtime_option._option.model_format != ModelFormat.TORCHSCRIPT:
            raise Exception(
                "The forward function is only used for Poros backend, please call infer function"
            )
        inputs_dict = dict()
        for i in range(len(inputs)):
            inputs_dict["x" + str(i)] = inputs[i]
        return self.infer(inputs_dict)
    def infer(self, data):
        """Inference with input data.
@@ -41,6 +58,27 @@ class Runtime:
            data, list), "The input data should be type of dict or list."
        return self._runtime.infer(data)
    def compile(self, warm_datas):
        """compile with prewarm data for poros
        :param data: (list[str : numpy.ndarray])The prewarm data list
        :return TorchScript Model
        """
        if self.runtime_option._option.model_format != ModelFormat.TORCHSCRIPT:
            raise Exception(
                "The compile function is only used for Poros backend, please call infer function"
            )
        assert isinstance(warm_datas,
                          list), "The prewarm data should be type of list."
        for i in range(len(warm_datas)):
            warm_data = warm_datas[i]
            if isinstance(warm_data[0], np.ndarray):
                warm_data = list(data for data in warm_data)
            else:
                warm_data = list(data.numpy() for data in warm_data)
            warm_datas[i] = warm_data
        return self._runtime.compile(warm_datas, self.runtime_option._option)
    def num_inputs(self):
        """Get number of inputs of the loaded model.
        """
@@ -85,6 +123,65 @@ class RuntimeOption:
    def __init__(self):
        self._option = C.RuntimeOption()
    @property
    def is_dynamic(self):
        """Only for Poros backend
        :param value: (bool)Whether to enable dynamic shape, default False
        """
        return self._option.is_dynamic
    @property
    def unconst_ops_thres(self):
        """Only for Poros backend
        :param value: (int)Minimum number of subgraph OPs, default 10
        """
        return self._option.unconst_ops_thres
    @property
    def long_to_int(self):
        """Only for Poros backend
        :param value: (bool)Whether to convert long dtype to int dtype, default True
        """
        return self._option.long_to_int
    @property
    def use_nvidia_tf32(self):
        """Only for Poros backend
        :param value: (bool)The calculation accuracy of tf32 mode exists on the A card, which can bring some performance improvements, default False
        """
        return self._option.use_nvidia_tf32
    @is_dynamic.setter
    def is_dynamic(self, value):
        assert isinstance(
            value, bool), "The value to set `is_dynamic` must be type of bool."
        self._option.is_dynamic = value
    @unconst_ops_thres.setter
    def unconst_ops_thres(self, value):
        assert isinstance(
            value,
            int), "The value to set `unconst_ops_thres` must be type of int."
        self._option.unconst_ops_thres = value
    @long_to_int.setter
    def long_to_int(self, value):
        assert isinstance(
            value,
            bool), "The value to set `long_to_int` must be type of bool."
        self._option.long_to_int = value
    @use_nvidia_tf32.setter
    def use_nvidia_tf32(self, value):
        assert isinstance(
            value,
            bool), "The value to set `use_nvidia_tf32` must be type of bool."
        self._option.use_nvidia_tf32 = value
    def set_model_path(self,
                       model_path,
                       params_path="",
@@ -125,6 +222,11 @@ class RuntimeOption:
        """
        return self._option.use_paddle_backend()
    def use_poros_backend(self):
        """Use Poros backend, support inference TorchScript model on CPU/Nvidia GPU.
        """
        return self._option.use_poros_backend()
    def use_ort_backend(self):
        """Use ONNX Runtime backend, support inference Paddle/ONNX model on CPU/Nvidia GPU.
        """
@@ -235,7 +337,8 @@ class RuntimeOption:
                continue
            if hasattr(getattr(self._option, attr), "__call__"):
                continue
-            message += "  {} : {}\t\n".format(attr, getattr(self._option, attr))
+            message += "  {} : {}\t\n".format(attr,
                                              getattr(self._option, attr))
        message.strip("\n")
        message += ")"
        return message
--- a/python/setup.py
+++ b/python/setup.py
@@ -53,13 +53,16 @@ setup_configs["ENABLE_OPENVINO_BACKEND"] = os.getenv("ENABLE_OPENVINO_BACKEND",
                                                     "OFF")
 setup_configs["ENABLE_PADDLE_BACKEND"] = os.getenv("ENABLE_PADDLE_BACKEND",
                                                   "OFF")
 setup_configs["ENABLE_POROS_BACKEND"] = os.getenv("ENABLE_POROS_BACKEND",
                                                  "OFF")
 setup_configs["ENABLE_VISION"] = os.getenv("ENABLE_VISION", "OFF")
 setup_configs["ENABLE_TEXT"] = os.getenv("ENABLE_TEXT", "OFF")
 setup_configs["ENABLE_TRT_BACKEND"] = os.getenv("ENABLE_TRT_BACKEND", "OFF")
 setup_configs["WITH_GPU"] = os.getenv("WITH_GPU", "OFF")
 setup_configs["BUILD_ON_JETSON"] = os.getenv("BUILD_ON_JETSON", "OFF")
 setup_configs["TRT_DIRECTORY"] = os.getenv("TRT_DIRECTORY", "UNDEFINED")
-setup_configs["CUDA_DIRECTORY"] = os.getenv("CUDA_DIRECTORY", "/usr/local/cuda")
+setup_configs["CUDA_DIRECTORY"] = os.getenv("CUDA_DIRECTORY",
                                            "/usr/local/cuda")
 setup_configs["LIBRARY_NAME"] = PACKAGE_NAME
 setup_configs["PY_LIBRARY_NAME"] = PACKAGE_NAME + "_main"
 setup_configs["OPENCV_DIRECTORY"] = os.getenv("OPENCV_DIRECTORY", "")
@@ -89,7 +92,8 @@ extras_require = {}
 # Default value is set to TRUE\1 to keep the settings same as the current ones.
 # However going forward the recomemded way to is to set this to False\0
-USE_MSVC_STATIC_RUNTIME = bool(os.getenv('USE_MSVC_STATIC_RUNTIME', '1') == '1')
+USE_MSVC_STATIC_RUNTIME = bool(
    os.getenv('USE_MSVC_STATIC_RUNTIME', '1') == '1')
 ONNX_NAMESPACE = os.getenv('ONNX_NAMESPACE', 'paddle2onnx')
 ################################################################################
 # Version
@@ -119,7 +123,8 @@ assert CMAKE, 'Could not find "cmake" executable!'
@contextmanager
 def cd(path):
    if not os.path.isabs(path):
-        raise RuntimeError('Can only cd to absolute path, got: {}'.format(path))
+        raise RuntimeError('Can only cd to absolute path, got: {}'.format(
            path))
    orig_path = os.getcwd()
    os.chdir(path)
    try: