Support Poros Backend (#188)

* Add poros backend

* Add torch lib

* Add python3 lib

* set c++ 14 for poros

* fixed bugs

* fixed grammer bugs

* fixed grammer bugs

* fixed code bugs

* fixed code bugs

* fixed CreatePorosValue bug

* Add AtType2String for Log

* fixed trt_option

* fixed poros.cmake path

* fixed grammer bug

* fixed grammer bug

* fixed ambiguous reference

* fixed ambiguous reference

* fixed reference error

* fixed include files

* rm ENABLE_TRT_BACKEND in poros

* update CMakeLists.txt

* fixed CMakeLists.txt

* Add libtorch.so in CMakeLists.txt

* Fixed CMakeLists.txt

* Fixed CMakeLists.txt

* Fixed copy bug

* Fixed copy bug

* Fixed copy bug

* Fixed Cmake

* Fixed Cmake

* debug

* debug

* debug

* debug

* debug

* debug

* debug utils

* debug utils

* copy to cpu

* rm log info

* test share mem

* test share mem

* test share mem

* test multi outputs

* test multi outputs

* test multi outputs

* test multi outputs

* test multi outputs

* test multi outputs

* test multi outputs

* time cost

* time cost

* fixed bug

* time collect

* mem copy

* mem copy

* rm time log

* rm share mem

* fixed multi inputs bug

* add set_input_dtypes func

* add SetInputDtypes

* fixed bug

* fixed bug

* fixed prewarm data order

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* fixed bug

* Add compile func

* Add compile func

* Add compile func

* Add is_dynamic option

* Add is_dynamic option

* Add is_dynamic option

* Add is_dynamic option

* rm infer log

* add cuda11.6 poros lib

* fixed bug

* fixed bug

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* fixed multi outputs

* rm logs

* test

* test

* test

* add test log

* add test log

* add test log

* add test log

* support cpu

* support cpu

* support cpu

* support cpu

* support member variable definition

* rm useless log

* fixed name

* resolve conflict

* resolve conflict

* resolve conflict

* fixed cmake

* add GetInputInfos&GetOutputInfos

* add GetInputInfos&GetOutputInfos

* fixed bug

* fixed runtime.py

* add compile func

* add np

* deal with comments

* rm to_inter func

* add property
This commit is contained in:
WJJ1995
2022-10-17 15:28:12 +08:00
committed by GitHub
parent c8db2dd1ef
commit f5c94e5471
19 changed files with 1333 additions and 12 deletions

67
CMakeLists.txt Normal file → Executable file
View File

@@ -51,10 +51,11 @@ endif()
############################# Basic Options for FastDeploy ################################ ############################# Basic Options for FastDeploy ################################
option(ENABLE_PADDLE_FRONTEND "Whether to enable PaddlePaddle frontend to support load paddle model in fastdeploy." ON) option(ENABLE_PADDLE_FRONTEND "Whether to enable PaddlePaddle frontend to support load paddle model in fastdeploy." ON)
option(WITH_GPU "Whether WITH_GPU=ON, will enable onnxruntime-gpu/paddle-infernce-gpu" OFF) option(WITH_GPU "Whether WITH_GPU=ON, will enable onnxruntime-gpu/paddle-infernce-gpu/poros-gpu" OFF)
option(ENABLE_ORT_BACKEND "Whether to enable onnxruntime backend." OFF) option(ENABLE_ORT_BACKEND "Whether to enable onnxruntime backend." OFF)
option(ENABLE_TRT_BACKEND "Whether to enable tensorrt backend." OFF) option(ENABLE_TRT_BACKEND "Whether to enable tensorrt backend." OFF)
option(ENABLE_PADDLE_BACKEND "Whether to enable paddle backend." OFF) option(ENABLE_PADDLE_BACKEND "Whether to enable paddle backend." OFF)
option(ENABLE_POROS_BACKEND "Whether to enable poros backend." OFF)
option(ENABLE_OPENVINO_BACKEND "Whether to enable openvino backend." OFF) option(ENABLE_OPENVINO_BACKEND "Whether to enable openvino backend." OFF)
option(ENABLE_LITE_BACKEND "Whether to enable paddle lite backend." OFF) option(ENABLE_LITE_BACKEND "Whether to enable paddle lite backend." OFF)
option(ENABLE_VISION "Whether to enable vision models usage." OFF) option(ENABLE_VISION "Whether to enable vision models usage." OFF)
@@ -108,6 +109,9 @@ if(WIN32)
if(ENABLE_PADDLE_BACKEND) if(ENABLE_PADDLE_BACKEND)
message(FATAL_ERROR "-DENABLE_PADDLE_BACKEND=ON doesn't support on non 64-bit system now.") message(FATAL_ERROR "-DENABLE_PADDLE_BACKEND=ON doesn't support on non 64-bit system now.")
endif() endif()
if(ENABLE_POROS_BACKEND)
message(FATAL_ERROR "-DENABLE_POROS_BACKEND=ON doesn't support on non 64-bit system now.")
endif()
if(ENABLE_VISION) if(ENABLE_VISION)
message(FATAL_ERROR "-DENABLE_VISION=ON doesn't support on non 64-bit system now.") message(FATAL_ERROR "-DENABLE_VISION=ON doesn't support on non 64-bit system now.")
endif() endif()
@@ -165,13 +169,14 @@ file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastde
file(GLOB_RECURSE FDTENSOR_FUNC_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/function/*.cc) file(GLOB_RECURSE FDTENSOR_FUNC_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/function/*.cc)
file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/ort/*.cc) file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/ort/*.cc)
file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/paddle/*.cc) file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/paddle/*.cc)
file(GLOB_RECURSE DEPLOY_POROS_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/poros/*.cc)
file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cpp) file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cpp)
file(GLOB_RECURSE DEPLOY_OPENVINO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/openvino/*.cc) file(GLOB_RECURSE DEPLOY_OPENVINO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/openvino/*.cc)
file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/lite/*.cc) file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/lite/*.cc)
file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cc) file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cc)
file(GLOB_RECURSE DEPLOY_TEXT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/text/*.cc) file(GLOB_RECURSE DEPLOY_TEXT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/text/*.cc)
file(GLOB_RECURSE DEPLOY_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*_pybind.cc) file(GLOB_RECURSE DEPLOY_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*_pybind.cc)
list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_OPENVINO_SRCS} ${DEPLOY_LITE_SRCS} ${DEPLOY_VISION_SRCS} ${DEPLOY_TEXT_SRCS}) list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} ${DEPLOY_POROS_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_OPENVINO_SRCS} ${DEPLOY_LITE_SRCS} ${DEPLOY_VISION_SRCS} ${DEPLOY_TEXT_SRCS})
set(DEPEND_LIBS "") set(DEPEND_LIBS "")
@@ -228,6 +233,62 @@ if(ENABLE_OPENVINO_BACKEND)
include(${PROJECT_SOURCE_DIR}/cmake/openvino.cmake) include(${PROJECT_SOURCE_DIR}/cmake/openvino.cmake)
endif() endif()
if(ENABLE_POROS_BACKEND)
set(CMAKE_CXX_STANDARD 14)
add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
add_definitions(-DENABLE_POROS_BACKEND)
list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_POROS_SRCS})
include(${PROJECT_SOURCE_DIR}/cmake/poros.cmake)
list(APPEND DEPEND_LIBS external_poros)
set(PYTHON_MINIMUM_VERSION 3.6)
set(PYTORCH_MINIMUM_VERSION 1.9)
set(TENSORRT_MINIMUM_VERSION 8.0)
# find python3
find_package(Python3 ${PYTHON_MINIMUM_VERSION} REQUIRED COMPONENTS Interpreter Development)
message(STATUS "Found Python: ${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}.${Python3_VERSION_PATCH}")
if (NOT Python3_SITELIB)
message(FATAL_ERROR "site-packages not found. ")
else ()
message(STATUS "site-packages: ${Python3_SITELIB}")
endif ()
# find pytorch
find_package(Torch ${PYTORCH_MINIMUM_VERSION} REQUIRED HINTS ${Python3_SITELIB})
include_directories(${TORCH_INCLUDE_DIRS})
include_directories(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/poros/common)
list(APPEND DEPEND_LIBS ${TORCH_LIBRARY})
if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/torch")
file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/torch")
endif()
if(EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/torch/lib")
file(REMOVE_RECURSE "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/torch/lib")
endif()
find_package(Python COMPONENTS Interpreter Development REQUIRED)
message(STATUS "Copying ${TORCH_INSTALL_PREFIX}/lib to ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/torch/lib ...")
execute_process(COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/scripts/copy_directory.py ${TORCH_INSTALL_PREFIX}/lib ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/torch/lib)
# find trt
if(NOT WITH_GPU)
message(FATAL_ERROR "While -DENABLE_POROS_BACKEND=ON, must set -DWITH_GPU=ON, but now it's OFF")
endif()
if(NOT TRT_DIRECTORY)
message(FATAL_ERROR "While -DENABLE_POROS_BACKEND=ON, must define -DTRT_DIRECTORY, e.g -DTRT_DIRECTORY=/Downloads/TensorRT-8.4")
endif()
include_directories(${TRT_DIRECTORY}/include)
find_library(TRT_INFER_LIB nvinfer ${TRT_DIRECTORY}/lib)
find_library(TRT_ONNX_LIB nvonnxparser ${TRT_DIRECTORY}/lib)
find_library(TRT_PLUGIN_LIB nvinfer_plugin ${TRT_DIRECTORY}/lib)
list(APPEND DEPEND_LIBS ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_PLUGIN_LIB})
if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt")
file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt")
endif()
if(EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib")
file(REMOVE_RECURSE "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib")
endif()
find_package(Python COMPONENTS Interpreter Development REQUIRED)
message(STATUS "Copying ${TRT_DIRECTORY}/lib to ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib ...")
execute_process(COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/scripts/copy_directory.py ${TRT_DIRECTORY}/lib ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib)
endif()
if(WITH_GPU) if(WITH_GPU)
if(APPLE) if(APPLE)
message(FATAL_ERROR "Cannot enable GPU while compling in Mac OSX.") message(FATAL_ERROR "Cannot enable GPU while compling in Mac OSX.")
@@ -292,7 +353,7 @@ if(ENABLE_TRT_BACKEND)
OUTPUT_VARIABLE curr_out OUTPUT_VARIABLE curr_out
ERROR_VARIABLE curr_out) ERROR_VARIABLE curr_out)
if(ret EQUAL "1") if(ret EQUAL "1")
message(FATAL_ERROR "Failed to patchelf tensorrt libraries.") message(FATAL_ERROR "Failed to patchelf tensorrt libraries.")
endif() endif()
message(STATUS "result:${result} out:${curr_out}") message(STATUS "result:${result} out:${curr_out}")
endif() endif()

12
FastDeploy.cmake.in Normal file → Executable file
View File

@@ -5,6 +5,8 @@ set(ENABLE_ORT_BACKEND @ENABLE_ORT_BACKEND@)
set(ENABLE_LITE_BACKEND @ENABLE_LITE_BACKEND@) set(ENABLE_LITE_BACKEND @ENABLE_LITE_BACKEND@)
set(ENABLE_PADDLE_BACKEND @ENABLE_PADDLE_BACKEND@) set(ENABLE_PADDLE_BACKEND @ENABLE_PADDLE_BACKEND@)
set(ENABLE_OPENVINO_BACKEND @ENABLE_OPENVINO_BACKEND@) set(ENABLE_OPENVINO_BACKEND @ENABLE_OPENVINO_BACKEND@)
set(ENABLE_POROS_BACKEND @ENABLE_POROS_BACKEND@)
set(POROS_VERSION @POROS_VERSION@)
set(ENABLE_TRT_BACKEND @ENABLE_TRT_BACKEND@) set(ENABLE_TRT_BACKEND @ENABLE_TRT_BACKEND@)
set(ENABLE_PADDLE_FRONTEND @ENABLE_PADDLE_FRONTEND@) set(ENABLE_PADDLE_FRONTEND @ENABLE_PADDLE_FRONTEND@)
set(ENABLE_VISION @ENABLE_VISION@) set(ENABLE_VISION @ENABLE_VISION@)
@@ -106,6 +108,12 @@ if(ENABLE_LITE_BACKEND)
endif() endif()
endif() endif()
if(ENABLE_POROS_BACKEND)
find_library(POROS_LIB poros ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/poros/lib NO_DEFAULT_PATH)
list(APPEND FASTDEPLOY_LIBS ${POROS_LIB})
list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/poros/include)
endif()
if(WITH_GPU) if(WITH_GPU)
if (NOT CUDA_DIRECTORY) if (NOT CUDA_DIRECTORY)
set(CUDA_DIRECTORY "/usr/local/cuda") set(CUDA_DIRECTORY "/usr/local/cuda")
@@ -212,12 +220,16 @@ message(STATUS " CXX flags : ${CMAKE_CXX_FLAGS}")
message(STATUS " WITH_GPU : ${WITH_GPU}") message(STATUS " WITH_GPU : ${WITH_GPU}")
message(STATUS " ENABLE_ORT_BACKEND : ${ENABLE_ORT_BACKEND}") message(STATUS " ENABLE_ORT_BACKEND : ${ENABLE_ORT_BACKEND}")
message(STATUS " ENABLE_PADDLE_BACKEND : ${ENABLE_PADDLE_BACKEND}") message(STATUS " ENABLE_PADDLE_BACKEND : ${ENABLE_PADDLE_BACKEND}")
message(STATUS " ENABLE_POROS_BACKEND : ${ENABLE_POROS_BACKEND}")
message(STATUS " ENABLE_OPENVINO_BACKEND : ${ENABLE_OPENVINO_BACKEND}") message(STATUS " ENABLE_OPENVINO_BACKEND : ${ENABLE_OPENVINO_BACKEND}")
message(STATUS " ENABLE_TRT_BACKEND : ${ENABLE_TRT_BACKEND}") message(STATUS " ENABLE_TRT_BACKEND : ${ENABLE_TRT_BACKEND}")
message(STATUS " ENABLE_LITE_BACKEND : ${ENABLE_LITE_BACKEND}") message(STATUS " ENABLE_LITE_BACKEND : ${ENABLE_LITE_BACKEND}")
if(ENABLE_PADDLE_BACKEND) if(ENABLE_PADDLE_BACKEND)
message(STATUS " Paddle Inference version : ${PADDLEINFERENCE_VERSION}") message(STATUS " Paddle Inference version : ${PADDLEINFERENCE_VERSION}")
endif() endif()
if(ENABLE_POROS_BACKEND)
message(STATUS " Poros version : ${POROS_VERSION}")
endif()
if(ENABLE_OPENVINO_BACKEND) if(ENABLE_OPENVINO_BACKEND)
message(STATUS " OpenVINO version : ${OPENVINO_VERSION}") message(STATUS " OpenVINO version : ${OPENVINO_VERSION}")
endif() endif()

76
cmake/poros.cmake Executable file
View File

@@ -0,0 +1,76 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
include(ExternalProject)
set(POROS_PROJECT "extern_poros")
set(POROS_PREFIX_DIR ${THIRD_PARTY_PATH}/poros)
set(POROS_SOURCE_DIR
${THIRD_PARTY_PATH}/poros/src/${POROS_PROJECT})
set(POROS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/poros)
set(POROS_INC_DIR
"${POROS_INSTALL_DIR}/include"
CACHE PATH "poros include directory." FORCE)
set(POROS_LIB_DIR
"${POROS_INSTALL_DIR}/lib/"
CACHE PATH "poros lib directory." FORCE)
set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}"
"${POROS_LIB_DIR}")
include_directories(${POROS_INC_DIR})
if(WIN32)
message(FATAL_ERROR "Poros Backend doesn't support Windows now.")
elseif(APPLE)
message(FATAL_ERROR "Poros Backend doesn't support Mac OSX now.")
else()
set(POROS_COMPILE_LIB
"${POROS_INSTALL_DIR}/lib/libporos.so"
CACHE FILEPATH "poros compile library." FORCE)
endif(WIN32)
set(POROS_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
set(POROS_VERSION "0.1.0")
if(WIN32)
message(FATAL_ERROR "Poros Backend doesn't support Windows now.")
elseif(APPLE)
message(FATAL_ERROR "Poros Backend doesn't support Mac OSX now.")
else()
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
message(FATAL_ERROR "Poros Backend doesn't support linux aarch64 now.")
set(POROS_FILE "poros-linux-aarch64-${POROS_VERSION}.tgz")
else()
set(POROS_FILE "poros-linux-x64-${POROS_VERSION}.tgz")
if(WITH_GPU)
set(POROS_FILE "poros-linux-x64-gpu-${POROS_VERSION}.tgz")
endif()
endif()
endif()
set(POROS_URL "${POROS_URL_BASE}${POROS_FILE}")
ExternalProject_Add(
${POROS_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
URL ${POROS_URL}
PREFIX ${POROS_PREFIX_DIR}
DOWNLOAD_NO_PROGRESS 1
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND
${CMAKE_COMMAND} -E copy_directory ${POROS_SOURCE_DIR} ${POROS_INSTALL_DIR}
BUILD_BYPRODUCTS ${POROS_COMPILE_LIB})
add_library(external_poros STATIC IMPORTED GLOBAL)
set_property(TARGET external_poros PROPERTY IMPORTED_LOCATION
${POROS_COMPILE_LIB})
add_dependencies(external_poros ${POROS_PROJECT})

4
cmake/summary.cmake Normal file → Executable file
View File

@@ -32,6 +32,7 @@ function(fastdeploy_summary)
message(STATUS " Paddle2ONNX version : ${PADDLE2ONNX_VERSION}") message(STATUS " Paddle2ONNX version : ${PADDLE2ONNX_VERSION}")
message(STATUS " ENABLE_ORT_BACKEND : ${ENABLE_ORT_BACKEND}") message(STATUS " ENABLE_ORT_BACKEND : ${ENABLE_ORT_BACKEND}")
message(STATUS " ENABLE_PADDLE_BACKEND : ${ENABLE_PADDLE_BACKEND}") message(STATUS " ENABLE_PADDLE_BACKEND : ${ENABLE_PADDLE_BACKEND}")
message(STATUS " ENABLE_POROS_BACKEND : ${ENABLE_POROS_BACKEND}")
message(STATUS " ENABLE_TRT_BACKEND : ${ENABLE_TRT_BACKEND}") message(STATUS " ENABLE_TRT_BACKEND : ${ENABLE_TRT_BACKEND}")
message(STATUS " ENABLE_OPENVINO_BACKEND : ${ENABLE_OPENVINO_BACKEND}") message(STATUS " ENABLE_OPENVINO_BACKEND : ${ENABLE_OPENVINO_BACKEND}")
if(ENABLE_ORT_BACKEND) if(ENABLE_ORT_BACKEND)
@@ -40,6 +41,9 @@ function(fastdeploy_summary)
if(ENABLE_PADDLE_BACKEND) if(ENABLE_PADDLE_BACKEND)
message(STATUS " Paddle Inference version : ${PADDLEINFERENCE_VERSION}") message(STATUS " Paddle Inference version : ${PADDLEINFERENCE_VERSION}")
endif() endif()
if(ENABLE_POROS_BACKEND)
message(STATUS " Poros version : ${POROS_VERSION}")
endif()
if(ENABLE_OPENVINO_BACKEND) if(ENABLE_OPENVINO_BACKEND)
message(STATUS " OpenVINO version : ${OPENVINO_VERSION}") message(STATUS " OpenVINO version : ${OPENVINO_VERSION}")
endif() endif()

1
docs/api_docs/cpp/main_page.md Normal file → Executable file
View File

@@ -11,6 +11,7 @@ Currently, FastDeploy supported backends listed as below,
| ONNX Runtime | CPU/Nvidia GPU | Paddle/ONNX | Windows(x64)/Linux(x64/aarch64)/Mac(x86/arm64) | | ONNX Runtime | CPU/Nvidia GPU | Paddle/ONNX | Windows(x64)/Linux(x64/aarch64)/Mac(x86/arm64) |
| TensorRT | Nvidia GPU | Paddle/ONNX | Windows(x64)/Linux(x64)/Jetson | | TensorRT | Nvidia GPU | Paddle/ONNX | Windows(x64)/Linux(x64)/Jetson |
| OpenVINO | CPU | Paddle/ONNX | Windows(x64)/Linux(x64)/Mac(x86) | | OpenVINO | CPU | Paddle/ONNX | Windows(x64)/Linux(x64)/Mac(x86) |
| Poros | CPU/Nvidia GPU | TorchScript | Linux(x64) |
### Example code ### Example code
- [Python examples](./) - [Python examples](./)

View File

@@ -0,0 +1,167 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <algorithm>
#include <unordered_map>
#include <set>
#include "torch/script.h"
#include "iengine.h"
#include "poros_module.h"
namespace baidu {
namespace mirana {
namespace poros {
/**
* @brief compile graph
*
* @param [in] module : original module
* @param [in] input_ivalues : prewarm datas
* @param [in] options : Inference options
* @return porosmodule
* @retval !nullptr => succeed nullptr => failed
**/
std::unique_ptr<PorosModule> Compile(const torch::jit::Module& module,
const std::vector<std::vector<c10::IValue> >& prewarm_datas,
const PorosOptions& options);
class Compiler {
public:
typedef std::unordered_map<const torch::jit::Node*, IEngine*> engine_map_t;
typedef std::vector<std::vector<c10::IValue> > ivalue_vec_t;
Compiler() : _origin_module(NULL) {}
~Compiler();
/**
* @brief initial Compiler
*
* @param [in] options : poros options
* @return int
* @retval 0 => succeed <0 => failed
**/
int init(const PorosOptions& options);
/**
* @brief compile whole graph
*
* @param [in] origin_module
* @param [in] prewarm_datas : ivalue_vec_t, vector of IValue
* @param [out] optimized_module : optimized graph
* @return int
* @retval 0 => succeed <0 => failed
**/
int compile(const torch::jit::Module& origin_module,
const ivalue_vec_t& prewarm_datas,
torch::jit::Module* optimized_module);
private:
/**
* @brief preprocess this calculation graph
*
* @param [in] prewarm_datas : ivalue_vec_t, vector of IValue
* @param [out] graph : preprcessed graph
* @return int
* @retval 0 => succeed <0 => failed
**/
int preprocess_graph(const ivalue_vec_t& prewarm_datas, std::shared_ptr<torch::jit::Graph>& graph);
/**
* @brief segement this calculation graph
*
* @param [in/out] graph
* @return int
* @retval 0 => succeed <0 => failed
**/
int segment_graph(std::shared_ptr<torch::jit::Graph>& graph);
// Split subgraphblock)
// The divided subgraph, as a subgraph, is associated with the block
int segment_block(torch::jit::Block& block, IEngine* engine, int current_depth);
// Subgraph optimization
/**
* @brief Subgraph optimization
*
* @param [in] prewarm_datas : ivalue_vec_t, vector of IValue
* @param [in] opt_graph : ivalue_vec_t, vector of IValue
* @param [out] optimized_module : optimized graph
* @return int
* @retval 0 => succeed <0 => failed
**/
int optimize_subgraph(const ivalue_vec_t& prewarm_datas,
const std::shared_ptr<torch::jit::Graph>& opt_graph,
torch::jit::Module* optimized_module);
// Subgraph optimization(block)
int optimize_subblock(torch::jit::Block* block,
torch::jit::Module* optimized_module);
/**
* @brief Compile the subgraph into a new graph based on the engine
*
* @param [in] engine : The engine used by the subgraph
* @param [in] subgraph_node : Subgraph node
* @return [out] module : Transformed model
* @retval 0 => succeed <0 => failed
**/
int transform(IEngine* engine, torch::jit::Node& subgraph_node,
torch::jit::Module& module);
/**
* @brief Select engine based on subgraph and options
*
* @param [in] node : Jit Node
* @return int
* @retval 0 => succeed <0 => failed
**/
IEngine* select_engine(const torch::jit::Node* n);
/**
* @brief destory
*
* @return void
**/
void close();
private:
int _max_segment_depth{5}; // Maximum subgraph segmentation depth
ivalue_vec_t _prewarm_datas; // Prewarm datas
PorosOptions _options;
engine_map_t _engine_map; // The engine used to record the subgraph
const torch::jit::Module* _origin_module; // Origin_module
std::atomic<int> _engine_index = {0}; // Record engine index
};
/**
* @brief compile graph, internal use
*
* @param [in] module : Origin module
* @param [in] input_ivalues : Prewarm datas
* @param [in] options : Inference options
* @return optimized_module
* @retval !nullptr => succeed nullptr => failed
**/
std::unique_ptr<torch::jit::Module> CompileGraph(const torch::jit::Module& module,
const std::vector<std::vector<c10::IValue> >& prewarm_datas,
const PorosOptions& options);
} // namespace poros
} // namespace mirana
} // namespace baidu

View File

@@ -0,0 +1,84 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
//from pytorch
#include "torch/script.h"
#include "torch/csrc/jit/ir/ir.h"
#include "ATen/core/interned_strings.h"
#include "plugin_create.h"
namespace baidu {
namespace mirana {
namespace poros {
/**
* the base engine class
* every registered engine should inherit from this IEngine
**/
struct PorosGraph {
torch::jit::Graph* graph = NULL;
torch::jit::Node* node = NULL;
};
typedef uint64_t EngineID;
class IEngine : public IPlugin, public torch::CustomClassHolder{
public:
virtual ~IEngine() {}
/**
* @brief init, initialization must be successful if the init is successful
* @return int
* @retval 0 => success, <0 => fail
**/
virtual int init() = 0;
/**
* @brief During compilation, the subgraph is converted into the graph structure of the corresponding engine and stored inside the engine, so that the execute_engine at runtime can be called
* @param [in] sub_graph : subgraph
* @return [res]int
* @retval 0 => success, <0 => fail
**/
virtual int transform(const PorosGraph& sub_graph) = 0;
/**
* @brief Subgraph execution period logic
* @param [in] inputs : input tensor
* @return [res] output tensor
**/
virtual std::vector<at::Tensor> excute_engine(const std::vector<at::Tensor>& inputs) = 0;
virtual void register_module_attribute(const std::string& name, torch::jit::Module& module) = 0;
// Logo
virtual const std::string who_am_i() = 0;
// Whether the node is supported by the current engine
bool is_node_supported(const torch::jit::Node* node);
public:
std::pair<uint64_t, uint64_t> _num_io; // Number of input/output parameters
EngineID _id;
};
} // namespace poros
} // namespace mirana
} // namespace baidu

View File

@@ -0,0 +1,65 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <unordered_map>
#include <string>
namespace baidu {
namespace mirana {
namespace poros {
class IPlugin {
public:
virtual ~IPlugin() {}
virtual const std::string who_am_i() = 0;
};
typedef IPlugin* (*plugin_creator_t)();
typedef std::unordered_map<std::string, plugin_creator_t> plugin_creator_map_t;
IPlugin* create_plugin(const std::string& plugin_name);
IPlugin* create_plugin(const std::string& plugin_name, const plugin_creator_map_t& plugin_creator_map);
void create_all_plugins(const plugin_creator_map_t& plugin_creator_map,
std::unordered_map<std::string, IPlugin*>& plugin_m);
//void create_all_plugins(std::unordered_map<std::string, IPlugin*>& plugin_m);
template <typename PluginType>
IPlugin* default_plugin_creator() {
return new (std::nothrow)PluginType;
}
void register_plugin_creator(const std::string& plugin_name, plugin_creator_t creator);
void register_plugin_creator(const std::string& plugin_name,
plugin_creator_t creator, plugin_creator_map_t& plugin_creator_map);
template <typename PluginType>
void register_plugin_class(const std::string& plugin_name) {
return register_plugin_creator(plugin_name, default_plugin_creator<PluginType>);
}
// This version is recommended
template <typename PluginType>
void register_plugin_class(const std::string& plugin_name, plugin_creator_map_t& plugin_creator_map) {
return register_plugin_creator(plugin_name, default_plugin_creator<PluginType>, plugin_creator_map);
}
}//poros
}//mirana
}//baidu
/* vim: set ts=4 sw=4 sts=4 tw=100 */

View File

@@ -0,0 +1,67 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "torch/script.h"
#include "torch/csrc/jit/jit_log.h"
// #include "ATen/Context.h"
namespace baidu {
namespace mirana {
namespace poros {
enum Device : int8_t {
GPU = 0,
CPU,
XPU,
UNKNOW
};
struct PorosOptions {
Device device = GPU;
bool debug = false;
bool use_fp16 = false;
bool is_dynamic = false;
bool long_to_int = true;
uint64_t max_workspace_size = 1ULL << 30;
int32_t device_id = -1;
int32_t unconst_ops_thres = -1;
bool use_nvidia_tf32 = false;
};
class PorosModule : public torch::jit::Module {
public:
PorosModule(torch::jit::Module module) : torch::jit::Module(module) {
}
~PorosModule() = default;
void to_device(Device device){
_options.device = device;
}
//c10::IValue forward(std::vector<c10::IValue> inputs);
//void save(const std::string& filename);
public:
PorosOptions _options;
};
//via porosmodule.save
std::unique_ptr<PorosModule> Load(const std::string& filename, const PorosOptions& options);
} // namespace poros
} // namespace mirana
} // namespace baidu

View File

@@ -0,0 +1,240 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/backends/poros/poros_backend.h"
#include <sys/time.h>
namespace fastdeploy {
TensorInfo PorosBackend::GetInputInfo(int index) {
// eager mode cann't obtain input information before infer
TensorInfo info_input;
return info_input;
}
TensorInfo PorosBackend::GetOutputInfo(int index) {
// eager mode cann't obtain output information before infer
TensorInfo info_output;
return info_output;
}
std::vector<TensorInfo> PorosBackend::GetInputInfos() {
// eager mode cann't obtain inputs information before infer
std::vector<TensorInfo> info_inputs;
return info_inputs;
}
std::vector<TensorInfo> PorosBackend::GetOutputInfos() {
// eager mode cann't obtain outputs information before infer
std::vector<TensorInfo> info_outputs;
return info_outputs;
}
void PorosBackend::BuildOption(const PorosBackendOption& option) {
_options.device = option.use_gpu ? baidu::mirana::poros::Device::GPU
: baidu::mirana::poros::Device::CPU;
_options.long_to_int = option.long_to_int;
_options.use_nvidia_tf32 = option.use_nvidia_tf32;
_options.device_id = option.gpu_id;
_options.unconst_ops_thres = option.unconst_ops_thres;
_options.is_dynamic = option.is_dynamic;
_options.max_workspace_size = option.max_workspace_size;
_options.use_fp16 = option.enable_fp16;
return;
}
bool PorosBackend::Compile(const std::string& model_file,
std::vector<std::vector<FDTensor>>& prewarm_tensors,
const PorosBackendOption& option) {
if (initialized_) {
FDERROR << "PorosBackend is already initlized, cannot initialize again."
<< std::endl;
return false;
}
BuildOption(option);
torch::jit::Module mod;
mod = torch::jit::load(model_file);
mod.eval();
if (option.use_gpu) {
mod.to(at::kCUDA);
} else {
mod.to(at::kCPU);
}
// get inputs_nums and outputs_nums
auto graph = mod.get_method("forward").graph();
auto inputs = graph->inputs();
// remove self node
_numinputs = inputs.size() - 1;
// FDTensor to at::Tensor
std::vector<std::vector<c10::IValue>> prewarm_datas;
bool is_backend_cuda = option.use_gpu ? true : false;
for (size_t i = 0; i < prewarm_tensors.size(); ++i) {
std::vector<c10::IValue> prewarm_data;
for (size_t j = 0; j < prewarm_tensors[i].size(); ++j) {
auto tensor = CreatePorosValue(prewarm_tensors[i][j], is_backend_cuda);
prewarm_data.push_back(tensor);
}
prewarm_datas.push_back(prewarm_data);
}
// get outputs nums
auto temp_result = mod.forward(prewarm_datas[0]);
size_t outputs_nums = 0;
if (temp_result.isTensor()) {
outputs_nums += 1;
} else if (temp_result.isTuple()) {
auto temp_result_tuple = temp_result.toTuple();
for (size_t i = 0; i < temp_result_tuple->elements().size(); ++i) {
auto poros_tensor = temp_result_tuple->elements()[i];
if (poros_tensor.isTensor()) {
outputs_nums += 1;
} else if (poros_tensor.isList()) {
auto poros_tensor_list = poros_tensor.toList();
outputs_nums += poros_tensor_list.size();
} else if (poros_tensor.isTuple()) {
auto poros_tensor_tuple = poros_tensor.toTuple();
outputs_nums += poros_tensor_tuple->elements().size();
} else {
continue;
}
}
}
_numoutputs = outputs_nums;
_poros_module = baidu::mirana::poros::Compile(mod, prewarm_datas, _options);
if (_poros_module == nullptr) {
FDERROR << "PorosBackend initlize Failed, try initialize again."
<< std::endl;
return false;
}
initialized_ = true;
return true;
}
bool PorosBackend::InitFromTorchScript(const std::string& model_file,
const PorosBackendOption& option) {
if (initialized_) {
FDERROR << "PorosBackend is already initlized, cannot initialize again."
<< std::endl;
return false;
}
if (option.poros_file != "") {
std::ifstream fin(option.poros_file, std::ios::binary | std::ios::in);
if (fin) {
FDINFO << "Detect compiled Poros file in " << option.poros_file
<< ", will load it directly." << std::endl;
fin.close();
return InitFromPoros(option.poros_file, option);
}
}
BuildOption(option);
torch::jit::Module mod;
mod = torch::jit::load(model_file);
mod.eval();
if (option.use_gpu) {
mod.to(at::kCUDA);
} else {
mod.to(at::kCPU);
}
// get inputs_nums and outputs_nums
auto graph = mod.get_method("forward").graph();
auto inputs = graph->inputs();
// remove self node
_numinputs = inputs.size() - 1;
auto outputs = graph->outputs();
_numoutputs = outputs.size();
_poros_module = baidu::mirana::poros::Compile(mod, _prewarm_datas, _options);
if (_poros_module == nullptr) {
FDERROR << "PorosBackend initlize Failed, try initialize again."
<< std::endl;
return false;
}
initialized_ = true;
return true;
}
bool PorosBackend::InitFromPoros(const std::string& model_file,
const PorosBackendOption& option) {
if (initialized_) {
FDERROR << "PorosBackend is already initlized, cannot initialize again."
<< std::endl;
return false;
}
BuildOption(option);
_poros_module = baidu::mirana::poros::Load(model_file, _options);
if (_poros_module == nullptr) {
FDERROR << "PorosBackend initlize Failed, try initialize again."
<< std::endl;
return false;
}
// get inputs_nums and outputs_nums
auto graph = _poros_module->get_method("forward").graph();
auto inputs = graph->inputs();
// remove self node
_numinputs = inputs.size() - 1;
auto outputs = graph->outputs();
_numoutputs = outputs.size();
initialized_ = true;
return true;
}
bool PorosBackend::Infer(std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs) {
// Convert FD Tensor to PyTorch Tensor
std::vector<torch::jit::IValue> poros_inputs;
bool is_backend_cuda =
_options.device == baidu::mirana::poros::Device::GPU ? true : false;
for (size_t i = 0; i < inputs.size(); ++i) {
poros_inputs.push_back(CreatePorosValue(inputs[i], is_backend_cuda));
}
// Infer
auto poros_outputs = _poros_module->forward(poros_inputs);
// Convert PyTorch Tensor to FD Tensor
if (poros_outputs.isTensor()) {
CopyTensorToCpu(poros_outputs.toTensor(), &((*outputs)[0]),
is_backend_cuda);
} else if (poros_outputs.isTuple()) {
// deal with multi outputs
auto poros_outputs_tuple = poros_outputs.toTuple();
size_t index = 0;
for (size_t i = 0; i < poros_outputs_tuple->elements().size(); ++i) {
auto poros_tensor = poros_outputs_tuple->elements()[i];
if (poros_tensor.isTensor()) {
CopyTensorToCpu(poros_tensor.toTensor(), &((*outputs)[index]),
is_backend_cuda);
index += 1;
} else if (poros_tensor.isList()) {
auto poros_tensor_list = poros_tensor.toList();
for (const auto list_idx : c10::irange(0, poros_tensor_list.size())) {
const auto& elt = poros_tensor_list.get(list_idx);
CopyTensorToCpu(elt.toTensor(), &((*outputs)[index]),
is_backend_cuda);
index += 1;
}
} else if (poros_tensor.isTuple()) {
auto poros_tensor_tuple = poros_tensor.toTuple();
for (size_t j = 0; j < poros_tensor_tuple->elements().size(); ++j) {
CopyTensorToCpu(poros_tensor_tuple->elements()[j].toTensor(),
&((*outputs)[index]), is_backend_cuda);
index += 1;
}
} else {
continue;
}
}
} else {
FDERROR << "Convert to FDTensor Failed!!!!!" << std::endl;
}
return true;
}
} // namespace fastdeploy

View File

@@ -0,0 +1,107 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include "fastdeploy/backends/backend.h"
#include "fastdeploy/backends/poros/common/compile.h"
#include "fastdeploy/backends/poros/common/poros_module.h"
namespace fastdeploy {
struct PorosBackendOption {
#ifdef WITH_GPU
bool use_gpu = true;
#else
bool use_gpu = false;
#endif
int gpu_id = 0;
bool long_to_int = true;
// There is calculation precision in tf32 mode on A10, it can bring some
// performance improvement, but there may be diff
bool use_nvidia_tf32 = false;
// Threshold for the number of non-const ops
int32_t unconst_ops_thres = -1;
std::string poros_file = "";
std::vector<FDDataType> prewarm_datatypes = {FDDataType::FP32};
// TRT options
bool enable_fp16 = false;
bool enable_int8 = false;
bool is_dynamic = false;
size_t max_batch_size = 32;
size_t max_workspace_size = 1 << 30;
};
// Convert data type from fastdeploy to poros
at::ScalarType GetPorosDtype(const FDDataType& fd_dtype);
// Convert data type from poros to fastdeploy
FDDataType GetFdDtype(const at::ScalarType& dtype);
// at::ScalarType to std::string for FDERROR
std::string AtType2String(const at::ScalarType& dtype);
// Create at::Tensor
// is_backend_cuda specify if Poros use GPU Device
// While is_backend_cuda = true, and tensor.device = Device::GPU
at::Tensor CreatePorosValue(FDTensor& tensor, bool is_backend_cuda = false);
// Copy memory data from at::Tensor to fastdeploy::FDTensor
void CopyTensorToCpu(const at::Tensor& tensor, FDTensor* fd_tensor,
bool is_backend_cuda = false);
class PorosBackend : public BaseBackend {
public:
PorosBackend() {}
virtual ~PorosBackend() = default;
void BuildOption(const PorosBackendOption& option);
bool InitFromTorchScript(
const std::string& model_file,
const PorosBackendOption& option = PorosBackendOption());
bool InitFromPoros(const std::string& model_file,
const PorosBackendOption& option = PorosBackendOption());
bool Compile(const std::string& model_file,
std::vector<std::vector<FDTensor>>& prewarm_tensors,
const PorosBackendOption& option = PorosBackendOption());
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs);
int NumInputs() const { return _numinputs; }
int NumOutputs() const { return _numoutputs; }
TensorInfo GetInputInfo(int index) override;
TensorInfo GetOutputInfo(int index) override;
std::vector<TensorInfo> GetInputInfos() override;
std::vector<TensorInfo> GetOutputInfos() override;
private:
baidu::mirana::poros::PorosOptions _options;
std::unique_ptr<baidu::mirana::poros::PorosModule> _poros_module;
std::vector<std::vector<c10::IValue>> _prewarm_datas;
int _numinputs = 1;
int _numoutputs = 1;
};
} // namespace fastdeploy

View File

@@ -0,0 +1,186 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/backends/poros/poros_backend.h"
#ifdef WITH_GPU
#include <cuda_runtime_api.h>
#endif
namespace fastdeploy {
std::string AtType2String(const at::ScalarType& dtype) {
std::string out;
switch (dtype) {
case at::kByte:
out = "at::kByte";
break;
case at::kChar:
out = "at::kChar";
break;
case at::kShort:
out = "at::kShort";
break;
case at::kInt:
out = "at::kInt";
break;
case at::kLong:
out = "at::kLong";
break;
case at::kHalf:
out = "at::kHalf";
break;
case at::kFloat:
out = "at::kFloat";
break;
case at::kDouble:
out = "at::kDouble";
break;
default:
out = "at::UNKNOWN";
}
return out;
}
at::ScalarType GetPorosDtype(const FDDataType& fd_dtype) {
if (fd_dtype == FDDataType::FP32) {
return at::kFloat;
} else if (fd_dtype == FDDataType::FP64) {
return at::kDouble;
} else if (fd_dtype == FDDataType::INT32) {
return at::kInt;
} else if (fd_dtype == FDDataType::INT64) {
return at::kLong;
}
FDERROR << "Unrecognized fastdeply data type:" << Str(fd_dtype) << "."
<< std::endl;
return at::kFloat;
}
FDDataType GetFdDtype(const at::ScalarType& poros_dtype) {
if (poros_dtype == at::kFloat) {
return FDDataType::FP32;
} else if (poros_dtype == at::kDouble) {
return FDDataType::FP64;
} else if (poros_dtype == at::kInt) {
return FDDataType::INT32;
} else if (poros_dtype == at::kLong) {
return FDDataType::INT64;
}
FDERROR << "Unrecognized poros data type:" << AtType2String(poros_dtype)
<< "." << std::endl;
return FDDataType::FP32;
}
at::Tensor CreatePorosValue(FDTensor& tensor, bool is_backend_cuda) {
FDASSERT(tensor.device == Device::GPU || tensor.device == Device::CPU,
"Only support tensor which device is CPU or GPU for PorosBackend.");
auto data_type = GetPorosDtype(tensor.dtype);
size_t numel = tensor.Numel();
at::Tensor poros_value;
if (is_backend_cuda) {
poros_value = std::move(
at::empty(tensor.shape, {at::kCUDA}).to(data_type).contiguous());
} else {
poros_value = std::move(
at::empty(tensor.shape, {at::kCPU}).to(data_type).contiguous());
}
if (data_type == at::kFloat) {
if (is_backend_cuda) {
cudaMemcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
numel * sizeof(float), cudaMemcpyHostToDevice);
} else {
memcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
numel * sizeof(float));
}
} else if (data_type == at::kInt) {
if (is_backend_cuda) {
cudaMemcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
numel * sizeof(int32_t), cudaMemcpyHostToDevice);
} else {
memcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
numel * sizeof(int32_t));
}
} else if (data_type == at::kLong) {
if (is_backend_cuda) {
cudaMemcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
numel * sizeof(int64_t), cudaMemcpyHostToDevice);
} else {
memcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
numel * sizeof(int64_t));
}
} else if (data_type == at::kDouble) {
if (is_backend_cuda) {
cudaMemcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
numel * sizeof(double), cudaMemcpyHostToDevice);
} else {
memcpy(poros_value.data_ptr(), static_cast<void*>(tensor.Data()),
numel * sizeof(double));
}
} else {
FDASSERT(false,
"Unrecognized data type while calling "
"PorosBackend::CreatePorosValue().");
}
return poros_value;
}
void CopyTensorToCpu(const at::Tensor& tensor, FDTensor* fd_tensor,
bool is_backend_cuda) {
const auto data_type = tensor.scalar_type();
std::vector<int64_t> shape;
auto sizes = tensor.sizes();
for (size_t i = 0; i < sizes.size(); i++) {
shape.push_back(sizes[i]);
}
auto fd_dtype = GetFdDtype(data_type);
fd_tensor->Resize(shape, fd_dtype);
size_t numel = tensor.numel();
// at::Tensor -> FDTensor
if (data_type == at::kFloat) {
if (is_backend_cuda) {
cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(float),
cudaMemcpyDeviceToHost);
} else {
memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(float));
}
return;
} else if (data_type == at::kInt) {
if (is_backend_cuda) {
cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int32_t),
cudaMemcpyDeviceToHost);
} else {
memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int32_t));
}
return;
} else if (data_type == at::kLong) {
if (is_backend_cuda) {
cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int64_t),
cudaMemcpyDeviceToHost);
} else {
memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int64_t));
}
return;
} else if (data_type == at::kDouble) {
if (is_backend_cuda) {
cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(double),
cudaMemcpyDeviceToHost);
} else {
memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(double));
}
return;
}
}
} // namespace fastdeploy

4
fastdeploy/core/config.h.in Normal file → Executable file
View File

@@ -29,6 +29,10 @@
#cmakedefine ENABLE_PADDLE_BACKEND #cmakedefine ENABLE_PADDLE_BACKEND
#endif #endif
#ifndef ENABLE_POROS_BACKEND
#cmakedefine ENABLE_POROS_BACKEND
#endif
#ifndef ENABLE_OPENVINO_BACKEND #ifndef ENABLE_OPENVINO_BACKEND
#cmakedefine ENABLE_OPENVINO_BACKEND #cmakedefine ENABLE_OPENVINO_BACKEND
#endif #endif

View File

@@ -24,6 +24,7 @@ void BindRuntime(pybind11::module& m) {
.def("use_cpu", &RuntimeOption::UseCpu) .def("use_cpu", &RuntimeOption::UseCpu)
.def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum) .def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum)
.def("use_paddle_backend", &RuntimeOption::UsePaddleBackend) .def("use_paddle_backend", &RuntimeOption::UsePaddleBackend)
.def("use_poros_backend", &RuntimeOption::UsePorosBackend)
.def("use_ort_backend", &RuntimeOption::UseOrtBackend) .def("use_ort_backend", &RuntimeOption::UseOrtBackend)
.def("set_ort_graph_opt_level", &RuntimeOption::SetOrtGraphOptLevel) .def("set_ort_graph_opt_level", &RuntimeOption::SetOrtGraphOptLevel)
.def("use_trt_backend", &RuntimeOption::UseTrtBackend) .def("use_trt_backend", &RuntimeOption::UseTrtBackend)
@@ -62,7 +63,12 @@ void BindRuntime(pybind11::module& m) {
.def_readwrite("trt_enable_int8", &RuntimeOption::trt_enable_int8) .def_readwrite("trt_enable_int8", &RuntimeOption::trt_enable_int8)
.def_readwrite("trt_max_batch_size", &RuntimeOption::trt_max_batch_size) .def_readwrite("trt_max_batch_size", &RuntimeOption::trt_max_batch_size)
.def_readwrite("trt_max_workspace_size", .def_readwrite("trt_max_workspace_size",
&RuntimeOption::trt_max_workspace_size); &RuntimeOption::trt_max_workspace_size)
.def_readwrite("is_dynamic", &RuntimeOption::is_dynamic)
.def_readwrite("long_to_int", &RuntimeOption::long_to_int)
.def_readwrite("use_nvidia_tf32", &RuntimeOption::use_nvidia_tf32)
.def_readwrite("unconst_ops_thres", &RuntimeOption::unconst_ops_thres)
.def_readwrite("poros_file", &RuntimeOption::poros_file);
pybind11::class_<TensorInfo>(m, "TensorInfo") pybind11::class_<TensorInfo>(m, "TensorInfo")
.def_readwrite("name", &TensorInfo::name) .def_readwrite("name", &TensorInfo::name)
@@ -72,6 +78,30 @@ void BindRuntime(pybind11::module& m) {
pybind11::class_<Runtime>(m, "Runtime") pybind11::class_<Runtime>(m, "Runtime")
.def(pybind11::init()) .def(pybind11::init())
.def("init", &Runtime::Init) .def("init", &Runtime::Init)
.def("compile",
[](Runtime& self,
std::vector<std::vector<pybind11::array>>& warm_datas,
const RuntimeOption& _option) {
size_t rows = warm_datas.size();
size_t columns = warm_datas[0].size();
std::vector<std::vector<FDTensor>> warm_tensors(
rows, std::vector<FDTensor>(columns));
for (size_t i = 0; i < rows; ++i) {
for (size_t j = 0; j < columns; ++j) {
auto dtype =
NumpyDataTypeToFDDataType(warm_datas[i][j].dtype());
std::vector<int64_t> data_shape;
data_shape.insert(
data_shape.begin(), warm_datas[i][j].shape(),
warm_datas[i][j].shape() + warm_datas[i][j].ndim());
warm_tensors[i][j].Resize(data_shape, dtype);
memcpy(warm_tensors[i][j].MutableData(),
warm_datas[i][j].mutable_data(),
warm_datas[i][j].nbytes());
}
}
return self.Compile(warm_tensors, _option);
})
.def("infer", .def("infer",
[](Runtime& self, std::vector<FDTensor>& inputs) { [](Runtime& self, std::vector<FDTensor>& inputs) {
std::vector<FDTensor> outputs(self.NumOutputs()); std::vector<FDTensor> outputs(self.NumOutputs());
@@ -121,11 +151,13 @@ void BindRuntime(pybind11::module& m) {
.value("UNKOWN", Backend::UNKNOWN) .value("UNKOWN", Backend::UNKNOWN)
.value("ORT", Backend::ORT) .value("ORT", Backend::ORT)
.value("TRT", Backend::TRT) .value("TRT", Backend::TRT)
.value("POROS", Backend::POROS)
.value("PDINFER", Backend::PDINFER) .value("PDINFER", Backend::PDINFER)
.value("LITE", Backend::LITE); .value("LITE", Backend::LITE);
pybind11::enum_<ModelFormat>(m, "ModelFormat", pybind11::arithmetic(), pybind11::enum_<ModelFormat>(m, "ModelFormat", pybind11::arithmetic(),
"ModelFormat for inference.") "ModelFormat for inference.")
.value("PADDLE", ModelFormat::PADDLE) .value("PADDLE", ModelFormat::PADDLE)
.value("TORCHSCRIPT", ModelFormat::TORCHSCRIPT)
.value("ONNX", ModelFormat::ONNX); .value("ONNX", ModelFormat::ONNX);
pybind11::enum_<Device>(m, "Device", pybind11::arithmetic(), pybind11::enum_<Device>(m, "Device", pybind11::arithmetic(),
"Device for inference.") "Device for inference.")

View File

@@ -29,6 +29,10 @@
#include "fastdeploy/backends/paddle/paddle_backend.h" #include "fastdeploy/backends/paddle/paddle_backend.h"
#endif #endif
#ifdef ENABLE_POROS_BACKEND
#include "fastdeploy/backends/poros/poros_backend.h"
#endif
#ifdef ENABLE_OPENVINO_BACKEND #ifdef ENABLE_OPENVINO_BACKEND
#include "fastdeploy/backends/openvino/ov_backend.h" #include "fastdeploy/backends/openvino/ov_backend.h"
#endif #endif
@@ -50,6 +54,9 @@ std::vector<Backend> GetAvailableBackends() {
#ifdef ENABLE_PADDLE_BACKEND #ifdef ENABLE_PADDLE_BACKEND
backends.push_back(Backend::PDINFER); backends.push_back(Backend::PDINFER);
#endif #endif
#ifdef ENABLE_POROS_BACKEND
backends.push_back(Backend::POROS);
#endif
#ifdef ENABLE_OPENVINO_BACKEND #ifdef ENABLE_OPENVINO_BACKEND
backends.push_back(Backend::OPENVINO); backends.push_back(Backend::OPENVINO);
#endif #endif
@@ -76,6 +83,8 @@ std::string Str(const Backend& b) {
return "Backend::TRT"; return "Backend::TRT";
} else if (b == Backend::PDINFER) { } else if (b == Backend::PDINFER) {
return "Backend::PDINFER"; return "Backend::PDINFER";
} else if (b == Backend::POROS) {
return "Backend::POROS";
} else if (b == Backend::OPENVINO) { } else if (b == Backend::OPENVINO) {
return "Backend::OPENVINO"; return "Backend::OPENVINO";
} else if (b == Backend::LITE) { } else if (b == Backend::LITE) {
@@ -89,6 +98,8 @@ std::string Str(const ModelFormat& f) {
return "ModelFormat::PADDLE"; return "ModelFormat::PADDLE";
} else if (f == ModelFormat::ONNX) { } else if (f == ModelFormat::ONNX) {
return "ModelFormat::ONNX"; return "ModelFormat::ONNX";
} else if (f == ModelFormat::TORCHSCRIPT) {
return "ModelFormat::TORCHSCRIPT";
} }
return "UNKNOWN-ModelFormat"; return "UNKNOWN-ModelFormat";
} }
@@ -102,6 +113,8 @@ std::ostream& operator<<(std::ostream& out, const Backend& backend) {
out << "Backend::PDINFER"; out << "Backend::PDINFER";
} else if (backend == Backend::OPENVINO) { } else if (backend == Backend::OPENVINO) {
out << "Backend::OPENVINO"; out << "Backend::OPENVINO";
} else if (backend == Backend::POROS) {
out << "Backend::POROS";
} else if (backend == Backend::LITE) { } else if (backend == Backend::LITE) {
out << "Backend::LITE"; out << "Backend::LITE";
} }
@@ -114,6 +127,8 @@ std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
out << "ModelFormat::PADDLE"; out << "ModelFormat::PADDLE";
} else if (format == ModelFormat::ONNX) { } else if (format == ModelFormat::ONNX) {
out << "ModelFormat::ONNX"; out << "ModelFormat::ONNX";
} else if (format == ModelFormat::TORCHSCRIPT) {
out << "ModelFormat::TORCHSCRIPT";
} }
out << "UNKNOWN-ModelFormat"; out << "UNKNOWN-ModelFormat";
return out; return out;
@@ -137,9 +152,17 @@ bool CheckModelFormat(const std::string& model_file,
<< model_file << std::endl; << model_file << std::endl;
return false; return false;
} }
} else if (model_format == ModelFormat::TORCHSCRIPT) {
if (model_file.size() < 3 ||
model_file.substr(model_file.size() - 3, 3) != ".pt") {
FDERROR << "With model format of ModelFormat::TORCHSCRIPT, the model file "
"should ends with `.pt`, but now it's "
<< model_file << std::endl;
return false;
}
} else { } else {
FDERROR << "Only support model format with frontend ModelFormat::PADDLE / " FDERROR << "Only support model format with frontend ModelFormat::PADDLE / "
"ModelFormat::ONNX." "ModelFormat::ONNX / ModelFormat::TORCHSCRIPT."
<< std::endl; << std::endl;
return false; return false;
} }
@@ -155,6 +178,10 @@ ModelFormat GuessModelFormat(const std::string& model_file) {
model_file.substr(model_file.size() - 5, 5) == ".onnx") { model_file.substr(model_file.size() - 5, 5) == ".onnx") {
FDINFO << "Model Format: ONNX." << std::endl; FDINFO << "Model Format: ONNX." << std::endl;
return ModelFormat::ONNX; return ModelFormat::ONNX;
} else if (model_file.size() > 3 &&
model_file.substr(model_file.size() - 3, 3) == ".pt") {
FDINFO << "Model Format: Torchscript." << std::endl;
return ModelFormat::TORCHSCRIPT;
} }
FDERROR << "Cannot guess which model format you are using, please set " FDERROR << "Cannot guess which model format you are using, please set "
@@ -173,10 +200,13 @@ void RuntimeOption::SetModelPath(const std::string& model_path,
} else if (format == ModelFormat::ONNX) { } else if (format == ModelFormat::ONNX) {
model_file = model_path; model_file = model_path;
model_format = ModelFormat::ONNX; model_format = ModelFormat::ONNX;
} else if (format == ModelFormat::TORCHSCRIPT) {
model_file = model_path;
model_format = ModelFormat::TORCHSCRIPT;
} else { } else {
FDASSERT( FDASSERT(
false, false,
"The model format only can be ModelFormat::PADDLE/ModelFormat::ONNX."); "The model format only can be ModelFormat::PADDLE/ModelFormat::ONNX/ModelFormat::TORCHSCRIPT.");
} }
} }
@@ -223,6 +253,15 @@ void RuntimeOption::UseOrtBackend() {
#endif #endif
} }
// use poros backend
void RuntimeOption::UsePorosBackend() {
#ifdef ENABLE_POROS_BACKEND
backend = Backend::POROS;
#else
FDASSERT(false, "The FastDeploy didn't compile with PorosBackend.");
#endif
}
void RuntimeOption::UseTrtBackend() { void RuntimeOption::UseTrtBackend() {
#ifdef ENABLE_TRT_BACKEND #ifdef ENABLE_TRT_BACKEND
backend = Backend::TRT; backend = Backend::TRT;
@@ -324,6 +363,36 @@ void RuntimeOption::SetTrtCacheFile(const std::string& cache_file_path) {
trt_serialize_file = cache_file_path; trt_serialize_file = cache_file_path;
} }
bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
const RuntimeOption& _option) {
#ifdef ENABLE_POROS_BACKEND
option = _option;
auto poros_option = PorosBackendOption();
poros_option.use_gpu = (option.device == Device::GPU) ? true : false;
poros_option.gpu_id = option.device_id;
poros_option.long_to_int = option.long_to_int;
poros_option.use_nvidia_tf32 = option.use_nvidia_tf32;
poros_option.unconst_ops_thres = option.unconst_ops_thres;
poros_option.poros_file = option.poros_file;
poros_option.is_dynamic = option.is_dynamic;
poros_option.enable_fp16 = option.trt_enable_fp16;
poros_option.max_batch_size = option.trt_max_batch_size;
poros_option.max_workspace_size = option.trt_max_workspace_size;
FDASSERT(option.model_format == ModelFormat::TORCHSCRIPT,
"PorosBackend only support model format of ModelFormat::TORCHSCRIPT.");
backend_ = utils::make_unique<PorosBackend>();
auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get());
FDASSERT(
casted_backend->Compile(option.model_file, prewarm_tensors, poros_option),
"Load model from Torchscript failed while initliazing PorosBackend.");
#else
FDASSERT(false,
"PorosBackend is not available, please compiled with "
"ENABLE_POROS_BACKEND=ON.");
#endif
return true;
}
bool Runtime::Init(const RuntimeOption& _option) { bool Runtime::Init(const RuntimeOption& _option) {
option = _option; option = _option;
if (option.model_format == ModelFormat::AUTOREC) { if (option.model_format == ModelFormat::AUTOREC) {
@@ -334,6 +403,8 @@ bool Runtime::Init(const RuntimeOption& _option) {
option.backend = Backend::ORT; option.backend = Backend::ORT;
} else if (IsBackendAvailable(Backend::PDINFER)) { } else if (IsBackendAvailable(Backend::PDINFER)) {
option.backend = Backend::PDINFER; option.backend = Backend::PDINFER;
} else if (IsBackendAvailable(Backend::POROS)) {
option.backend = Backend::POROS;
} else if (IsBackendAvailable(Backend::OPENVINO)) { } else if (IsBackendAvailable(Backend::OPENVINO)) {
option.backend = Backend::OPENVINO; option.backend = Backend::OPENVINO;
} else { } else {
@@ -365,6 +436,15 @@ bool Runtime::Init(const RuntimeOption& _option) {
CreatePaddleBackend(); CreatePaddleBackend();
FDINFO << "Runtime initialized with Backend::PDINFER in " FDINFO << "Runtime initialized with Backend::PDINFER in "
<< Str(option.device) << "." << std::endl; << Str(option.device) << "." << std::endl;
} else if (option.backend == Backend::POROS) {
FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
"Backend::POROS only supports Device::CPU/Device::GPU.");
FDASSERT(
option.model_format == ModelFormat::TORCHSCRIPT,
"Backend::POROS only supports model format of ModelFormat::TORCHSCRIPT.");
FDINFO << "Runtime initialized with Backend::POROS in "
<< Str(option.device) << "." << std::endl;
return true;
} else if (option.backend == Backend::OPENVINO) { } else if (option.backend == Backend::OPENVINO) {
FDASSERT(option.device == Device::CPU, FDASSERT(option.device == Device::CPU,
"Backend::OPENVINO only supports Device::CPU"); "Backend::OPENVINO only supports Device::CPU");
@@ -379,7 +459,8 @@ bool Runtime::Init(const RuntimeOption& _option) {
<< "." << std::endl; << "." << std::endl;
} else { } else {
FDERROR << "Runtime only support " FDERROR << "Runtime only support "
"Backend::ORT/Backend::TRT/Backend::PDINFER as backend now." "Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as "
"backend now."
<< std::endl; << std::endl;
return false; return false;
} }

View File

@@ -38,6 +38,7 @@ enum Backend {
ORT, ///< ONNX Runtime, support Paddle/ONNX format model, CPU / Nvidia GPU ORT, ///< ONNX Runtime, support Paddle/ONNX format model, CPU / Nvidia GPU
TRT, ///< TensorRT, support Paddle/ONNX format model, Nvidia GPU only TRT, ///< TensorRT, support Paddle/ONNX format model, Nvidia GPU only
PDINFER, ///< Paddle Inference, support Paddle format model, CPU / Nvidia GPU PDINFER, ///< Paddle Inference, support Paddle format model, CPU / Nvidia GPU
POROS, ///< Poros, support TorchScript format model, CPU / Nvidia GPU
OPENVINO, ///< Intel OpenVINO, support Paddle/ONNX format, CPU only OPENVINO, ///< Intel OpenVINO, support Paddle/ONNX format, CPU only
LITE, ///< Paddle Lite, support Paddle format model, ARM CPU only LITE, ///< Paddle Lite, support Paddle format model, ARM CPU only
}; };
@@ -47,6 +48,7 @@ enum ModelFormat {
AUTOREC, ///< Auto recognize the model format by model file name AUTOREC, ///< Auto recognize the model format by model file name
PADDLE, ///< Model with paddlepaddle format PADDLE, ///< Model with paddlepaddle format
ONNX, ///< Model with ONNX format ONNX, ///< Model with ONNX format
TORCHSCRIPT, ///< Model with TorchScript format
}; };
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
@@ -117,6 +119,9 @@ struct FASTDEPLOY_DECL RuntimeOption {
/// Set TensorRT as inference backend, only support GPU /// Set TensorRT as inference backend, only support GPU
void UseTrtBackend(); void UseTrtBackend();
/// Set Poros backend as inference backend, support CPU/GPU
void UsePorosBackend();
/// Set OpenVINO as inference backend, only support CPU /// Set OpenVINO as inference backend, only support CPU
void UseOpenVINOBackend(); void UseOpenVINOBackend();
@@ -243,6 +248,13 @@ struct FASTDEPLOY_DECL RuntimeOption {
size_t trt_max_batch_size = 32; size_t trt_max_batch_size = 32;
size_t trt_max_workspace_size = 1 << 30; size_t trt_max_workspace_size = 1 << 30;
// ======Only for Poros Backend=======
bool is_dynamic = false;
bool long_to_int = true;
bool use_nvidia_tf32 = false;
int unconst_ops_thres = -1;
std::string poros_file = "";
std::string model_file = ""; // Path of model file std::string model_file = ""; // Path of model file
std::string params_file = ""; // Path of parameters file, can be empty std::string params_file = ""; // Path of parameters file, can be empty
ModelFormat model_format = ModelFormat::AUTOREC; // format of input model ModelFormat model_format = ModelFormat::AUTOREC; // format of input model
@@ -270,6 +282,15 @@ struct FASTDEPLOY_DECL Runtime {
bool Infer(std::vector<FDTensor>& input_tensors, bool Infer(std::vector<FDTensor>& input_tensors,
std::vector<FDTensor>* output_tensors); std::vector<FDTensor>* output_tensors);
/** \brief Compile TorchScript Module, only for Poros backend
*
* \param[in] prewarm_tensors Prewarm datas for compile
* \param[in] _option Runtime option
* \return true if compile successed, otherwise false
*/
bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
const RuntimeOption& _option);
/** \brief Get number of inputs /** \brief Get number of inputs
*/ */
int NumInputs() { return backend_->NumInputs(); } int NumInputs() { return backend_->NumInputs(); }

5
python/fastdeploy/c_lib_wrap.py.in Normal file → Executable file
View File

@@ -34,6 +34,11 @@ def is_built_with_trt() -> bool:
def is_built_with_paddle() -> bool: def is_built_with_paddle() -> bool:
return True if "@ENABLE_PADDLE_BACKEND@" == "ON" else False return True if "@ENABLE_PADDLE_BACKEND@" == "ON" else False
def is_built_with_poros() ->bool:
return True if "@ENABLE_POROS_BACKEND@" == "ON" else False
def is_built_with_openvino() ->bool: def is_built_with_openvino() ->bool:
return True if "@ENABLE_OPENVINO_BACKEND@" == "ON" else False return True if "@ENABLE_OPENVINO_BACKEND@" == "ON" else False

View File

@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import
import logging import logging
import numpy as np
from . import ModelFormat from . import ModelFormat
from . import c_lib_wrap as C from . import c_lib_wrap as C
@@ -28,8 +29,24 @@ class Runtime:
""" """
self._runtime = C.Runtime() self._runtime = C.Runtime()
self.runtime_option = runtime_option
assert self._runtime.init( assert self._runtime.init(
runtime_option._option), "Initialize Runtime Failed!" self.runtime_option._option), "Initialize Runtime Failed!"
def forward(self, *inputs):
"""Inference with input data for poros
:param data: (list[str : numpy.ndarray])The input data list
:return list of numpy.ndarray
"""
if self.runtime_option._option.model_format != ModelFormat.TORCHSCRIPT:
raise Exception(
"The forward function is only used for Poros backend, please call infer function"
)
inputs_dict = dict()
for i in range(len(inputs)):
inputs_dict["x" + str(i)] = inputs[i]
return self.infer(inputs_dict)
def infer(self, data): def infer(self, data):
"""Inference with input data. """Inference with input data.
@@ -41,6 +58,27 @@ class Runtime:
data, list), "The input data should be type of dict or list." data, list), "The input data should be type of dict or list."
return self._runtime.infer(data) return self._runtime.infer(data)
def compile(self, warm_datas):
"""compile with prewarm data for poros
:param data: (list[str : numpy.ndarray])The prewarm data list
:return TorchScript Model
"""
if self.runtime_option._option.model_format != ModelFormat.TORCHSCRIPT:
raise Exception(
"The compile function is only used for Poros backend, please call infer function"
)
assert isinstance(warm_datas,
list), "The prewarm data should be type of list."
for i in range(len(warm_datas)):
warm_data = warm_datas[i]
if isinstance(warm_data[0], np.ndarray):
warm_data = list(data for data in warm_data)
else:
warm_data = list(data.numpy() for data in warm_data)
warm_datas[i] = warm_data
return self._runtime.compile(warm_datas, self.runtime_option._option)
def num_inputs(self): def num_inputs(self):
"""Get number of inputs of the loaded model. """Get number of inputs of the loaded model.
""" """
@@ -85,6 +123,65 @@ class RuntimeOption:
def __init__(self): def __init__(self):
self._option = C.RuntimeOption() self._option = C.RuntimeOption()
@property
def is_dynamic(self):
"""Only for Poros backend
:param value: (bool)Whether to enable dynamic shape, default False
"""
return self._option.is_dynamic
@property
def unconst_ops_thres(self):
"""Only for Poros backend
:param value: (int)Minimum number of subgraph OPs, default 10
"""
return self._option.unconst_ops_thres
@property
def long_to_int(self):
"""Only for Poros backend
:param value: (bool)Whether to convert long dtype to int dtype, default True
"""
return self._option.long_to_int
@property
def use_nvidia_tf32(self):
"""Only for Poros backend
:param value: (bool)The calculation accuracy of tf32 mode exists on the A card, which can bring some performance improvements, default False
"""
return self._option.use_nvidia_tf32
@is_dynamic.setter
def is_dynamic(self, value):
assert isinstance(
value, bool), "The value to set `is_dynamic` must be type of bool."
self._option.is_dynamic = value
@unconst_ops_thres.setter
def unconst_ops_thres(self, value):
assert isinstance(
value,
int), "The value to set `unconst_ops_thres` must be type of int."
self._option.unconst_ops_thres = value
@long_to_int.setter
def long_to_int(self, value):
assert isinstance(
value,
bool), "The value to set `long_to_int` must be type of bool."
self._option.long_to_int = value
@use_nvidia_tf32.setter
def use_nvidia_tf32(self, value):
assert isinstance(
value,
bool), "The value to set `use_nvidia_tf32` must be type of bool."
self._option.use_nvidia_tf32 = value
def set_model_path(self, def set_model_path(self,
model_path, model_path,
params_path="", params_path="",
@@ -125,6 +222,11 @@ class RuntimeOption:
""" """
return self._option.use_paddle_backend() return self._option.use_paddle_backend()
def use_poros_backend(self):
"""Use Poros backend, support inference TorchScript model on CPU/Nvidia GPU.
"""
return self._option.use_poros_backend()
def use_ort_backend(self): def use_ort_backend(self):
"""Use ONNX Runtime backend, support inference Paddle/ONNX model on CPU/Nvidia GPU. """Use ONNX Runtime backend, support inference Paddle/ONNX model on CPU/Nvidia GPU.
""" """
@@ -235,7 +337,8 @@ class RuntimeOption:
continue continue
if hasattr(getattr(self._option, attr), "__call__"): if hasattr(getattr(self._option, attr), "__call__"):
continue continue
message += " {} : {}\t\n".format(attr, getattr(self._option, attr)) message += " {} : {}\t\n".format(attr,
getattr(self._option, attr))
message.strip("\n") message.strip("\n")
message += ")" message += ")"
return message return message

11
python/setup.py Normal file → Executable file
View File

@@ -53,13 +53,16 @@ setup_configs["ENABLE_OPENVINO_BACKEND"] = os.getenv("ENABLE_OPENVINO_BACKEND",
"OFF") "OFF")
setup_configs["ENABLE_PADDLE_BACKEND"] = os.getenv("ENABLE_PADDLE_BACKEND", setup_configs["ENABLE_PADDLE_BACKEND"] = os.getenv("ENABLE_PADDLE_BACKEND",
"OFF") "OFF")
setup_configs["ENABLE_POROS_BACKEND"] = os.getenv("ENABLE_POROS_BACKEND",
"OFF")
setup_configs["ENABLE_VISION"] = os.getenv("ENABLE_VISION", "OFF") setup_configs["ENABLE_VISION"] = os.getenv("ENABLE_VISION", "OFF")
setup_configs["ENABLE_TEXT"] = os.getenv("ENABLE_TEXT", "OFF") setup_configs["ENABLE_TEXT"] = os.getenv("ENABLE_TEXT", "OFF")
setup_configs["ENABLE_TRT_BACKEND"] = os.getenv("ENABLE_TRT_BACKEND", "OFF") setup_configs["ENABLE_TRT_BACKEND"] = os.getenv("ENABLE_TRT_BACKEND", "OFF")
setup_configs["WITH_GPU"] = os.getenv("WITH_GPU", "OFF") setup_configs["WITH_GPU"] = os.getenv("WITH_GPU", "OFF")
setup_configs["BUILD_ON_JETSON"] = os.getenv("BUILD_ON_JETSON", "OFF") setup_configs["BUILD_ON_JETSON"] = os.getenv("BUILD_ON_JETSON", "OFF")
setup_configs["TRT_DIRECTORY"] = os.getenv("TRT_DIRECTORY", "UNDEFINED") setup_configs["TRT_DIRECTORY"] = os.getenv("TRT_DIRECTORY", "UNDEFINED")
setup_configs["CUDA_DIRECTORY"] = os.getenv("CUDA_DIRECTORY", "/usr/local/cuda") setup_configs["CUDA_DIRECTORY"] = os.getenv("CUDA_DIRECTORY",
"/usr/local/cuda")
setup_configs["LIBRARY_NAME"] = PACKAGE_NAME setup_configs["LIBRARY_NAME"] = PACKAGE_NAME
setup_configs["PY_LIBRARY_NAME"] = PACKAGE_NAME + "_main" setup_configs["PY_LIBRARY_NAME"] = PACKAGE_NAME + "_main"
setup_configs["OPENCV_DIRECTORY"] = os.getenv("OPENCV_DIRECTORY", "") setup_configs["OPENCV_DIRECTORY"] = os.getenv("OPENCV_DIRECTORY", "")
@@ -89,7 +92,8 @@ extras_require = {}
# Default value is set to TRUE\1 to keep the settings same as the current ones. # Default value is set to TRUE\1 to keep the settings same as the current ones.
# However going forward the recomemded way to is to set this to False\0 # However going forward the recomemded way to is to set this to False\0
USE_MSVC_STATIC_RUNTIME = bool(os.getenv('USE_MSVC_STATIC_RUNTIME', '1') == '1') USE_MSVC_STATIC_RUNTIME = bool(
os.getenv('USE_MSVC_STATIC_RUNTIME', '1') == '1')
ONNX_NAMESPACE = os.getenv('ONNX_NAMESPACE', 'paddle2onnx') ONNX_NAMESPACE = os.getenv('ONNX_NAMESPACE', 'paddle2onnx')
################################################################################ ################################################################################
# Version # Version
@@ -119,7 +123,8 @@ assert CMAKE, 'Could not find "cmake" executable!'
@contextmanager @contextmanager
def cd(path): def cd(path):
if not os.path.isabs(path): if not os.path.isabs(path):
raise RuntimeError('Can only cd to absolute path, got: {}'.format(path)) raise RuntimeError('Can only cd to absolute path, got: {}'.format(
path))
orig_path = os.getcwd() orig_path = os.getcwd()
os.chdir(path) os.chdir(path)
try: try: