diff --git a/CMakeLists.txt b/CMakeLists.txt old mode 100644 new mode 100755 index ca278fd0e..418189e58 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,10 +51,11 @@ endif() ############################# Basic Options for FastDeploy ################################ option(ENABLE_PADDLE_FRONTEND "Whether to enable PaddlePaddle frontend to support load paddle model in fastdeploy." ON) -option(WITH_GPU "Whether WITH_GPU=ON, will enable onnxruntime-gpu/paddle-infernce-gpu" OFF) +option(WITH_GPU "Whether WITH_GPU=ON, will enable onnxruntime-gpu/paddle-infernce-gpu/poros-gpu" OFF) option(ENABLE_ORT_BACKEND "Whether to enable onnxruntime backend." OFF) option(ENABLE_TRT_BACKEND "Whether to enable tensorrt backend." OFF) option(ENABLE_PADDLE_BACKEND "Whether to enable paddle backend." OFF) +option(ENABLE_POROS_BACKEND "Whether to enable poros backend." OFF) option(ENABLE_OPENVINO_BACKEND "Whether to enable openvino backend." OFF) option(ENABLE_LITE_BACKEND "Whether to enable paddle lite backend." OFF) option(ENABLE_VISION "Whether to enable vision models usage." OFF) @@ -108,6 +109,9 @@ if(WIN32) if(ENABLE_PADDLE_BACKEND) message(FATAL_ERROR "-DENABLE_PADDLE_BACKEND=ON doesn't support on non 64-bit system now.") endif() + if(ENABLE_POROS_BACKEND) + message(FATAL_ERROR "-DENABLE_POROS_BACKEND=ON doesn't support on non 64-bit system now.") + endif() if(ENABLE_VISION) message(FATAL_ERROR "-DENABLE_VISION=ON doesn't support on non 64-bit system now.") endif() @@ -165,13 +169,14 @@ file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastde file(GLOB_RECURSE FDTENSOR_FUNC_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/function/*.cc) file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/ort/*.cc) file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/paddle/*.cc) +file(GLOB_RECURSE DEPLOY_POROS_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/poros/*.cc) file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cpp) file(GLOB_RECURSE DEPLOY_OPENVINO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/openvino/*.cc) file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/lite/*.cc) file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cc) file(GLOB_RECURSE DEPLOY_TEXT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/text/*.cc) file(GLOB_RECURSE DEPLOY_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*_pybind.cc) -list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_OPENVINO_SRCS} ${DEPLOY_LITE_SRCS} ${DEPLOY_VISION_SRCS} ${DEPLOY_TEXT_SRCS}) +list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} ${DEPLOY_POROS_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_OPENVINO_SRCS} ${DEPLOY_LITE_SRCS} ${DEPLOY_VISION_SRCS} ${DEPLOY_TEXT_SRCS}) set(DEPEND_LIBS "") @@ -228,6 +233,62 @@ if(ENABLE_OPENVINO_BACKEND) include(${PROJECT_SOURCE_DIR}/cmake/openvino.cmake) endif() +if(ENABLE_POROS_BACKEND) + set(CMAKE_CXX_STANDARD 14) + add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) + add_definitions(-DENABLE_POROS_BACKEND) + list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_POROS_SRCS}) + include(${PROJECT_SOURCE_DIR}/cmake/poros.cmake) + list(APPEND DEPEND_LIBS external_poros) + set(PYTHON_MINIMUM_VERSION 3.6) + set(PYTORCH_MINIMUM_VERSION 1.9) + set(TENSORRT_MINIMUM_VERSION 8.0) + # find python3 + find_package(Python3 ${PYTHON_MINIMUM_VERSION} REQUIRED COMPONENTS Interpreter Development) + message(STATUS "Found Python: ${Python3_VERSION_MAJOR}.${Python3_VERSION_MINOR}.${Python3_VERSION_PATCH}") + + if (NOT Python3_SITELIB) + message(FATAL_ERROR "site-packages not found. ") + else () + message(STATUS "site-packages: ${Python3_SITELIB}") + endif () + # find pytorch + find_package(Torch ${PYTORCH_MINIMUM_VERSION} REQUIRED HINTS ${Python3_SITELIB}) + include_directories(${TORCH_INCLUDE_DIRS}) + include_directories(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/poros/common) + list(APPEND DEPEND_LIBS ${TORCH_LIBRARY}) + if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/torch") + file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/torch") + endif() + if(EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/torch/lib") + file(REMOVE_RECURSE "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/torch/lib") + endif() + find_package(Python COMPONENTS Interpreter Development REQUIRED) + message(STATUS "Copying ${TORCH_INSTALL_PREFIX}/lib to ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/torch/lib ...") + execute_process(COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/scripts/copy_directory.py ${TORCH_INSTALL_PREFIX}/lib ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/torch/lib) + # find trt + if(NOT WITH_GPU) + message(FATAL_ERROR "While -DENABLE_POROS_BACKEND=ON, must set -DWITH_GPU=ON, but now it's OFF") + endif() + if(NOT TRT_DIRECTORY) + message(FATAL_ERROR "While -DENABLE_POROS_BACKEND=ON, must define -DTRT_DIRECTORY, e.g -DTRT_DIRECTORY=/Downloads/TensorRT-8.4") + endif() + include_directories(${TRT_DIRECTORY}/include) + find_library(TRT_INFER_LIB nvinfer ${TRT_DIRECTORY}/lib) + find_library(TRT_ONNX_LIB nvonnxparser ${TRT_DIRECTORY}/lib) + find_library(TRT_PLUGIN_LIB nvinfer_plugin ${TRT_DIRECTORY}/lib) + list(APPEND DEPEND_LIBS ${TRT_INFER_LIB} ${TRT_ONNX_LIB} ${TRT_PLUGIN_LIB}) + if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt") + file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt") + endif() + if(EXISTS "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib") + file(REMOVE_RECURSE "${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib") + endif() + find_package(Python COMPONENTS Interpreter Development REQUIRED) + message(STATUS "Copying ${TRT_DIRECTORY}/lib to ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib ...") + execute_process(COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/scripts/copy_directory.py ${TRT_DIRECTORY}/lib ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/tensorrt/lib) +endif() + if(WITH_GPU) if(APPLE) message(FATAL_ERROR "Cannot enable GPU while compling in Mac OSX.") @@ -292,7 +353,7 @@ if(ENABLE_TRT_BACKEND) OUTPUT_VARIABLE curr_out ERROR_VARIABLE curr_out) if(ret EQUAL "1") - message(FATAL_ERROR "Failed to patchelf tensorrt libraries.") + message(FATAL_ERROR "Failed to patchelf tensorrt libraries.") endif() message(STATUS "result:${result} out:${curr_out}") endif() diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in old mode 100644 new mode 100755 index a76f9a8c3..bb7cd6cff --- a/FastDeploy.cmake.in +++ b/FastDeploy.cmake.in @@ -5,6 +5,8 @@ set(ENABLE_ORT_BACKEND @ENABLE_ORT_BACKEND@) set(ENABLE_LITE_BACKEND @ENABLE_LITE_BACKEND@) set(ENABLE_PADDLE_BACKEND @ENABLE_PADDLE_BACKEND@) set(ENABLE_OPENVINO_BACKEND @ENABLE_OPENVINO_BACKEND@) +set(ENABLE_POROS_BACKEND @ENABLE_POROS_BACKEND@) +set(POROS_VERSION @POROS_VERSION@) set(ENABLE_TRT_BACKEND @ENABLE_TRT_BACKEND@) set(ENABLE_PADDLE_FRONTEND @ENABLE_PADDLE_FRONTEND@) set(ENABLE_VISION @ENABLE_VISION@) @@ -106,6 +108,12 @@ if(ENABLE_LITE_BACKEND) endif() endif() +if(ENABLE_POROS_BACKEND) + find_library(POROS_LIB poros ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/poros/lib NO_DEFAULT_PATH) + list(APPEND FASTDEPLOY_LIBS ${POROS_LIB}) + list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/poros/include) +endif() + if(WITH_GPU) if (NOT CUDA_DIRECTORY) set(CUDA_DIRECTORY "/usr/local/cuda") @@ -212,12 +220,16 @@ message(STATUS " CXX flags : ${CMAKE_CXX_FLAGS}") message(STATUS " WITH_GPU : ${WITH_GPU}") message(STATUS " ENABLE_ORT_BACKEND : ${ENABLE_ORT_BACKEND}") message(STATUS " ENABLE_PADDLE_BACKEND : ${ENABLE_PADDLE_BACKEND}") +message(STATUS " ENABLE_POROS_BACKEND : ${ENABLE_POROS_BACKEND}") message(STATUS " ENABLE_OPENVINO_BACKEND : ${ENABLE_OPENVINO_BACKEND}") message(STATUS " ENABLE_TRT_BACKEND : ${ENABLE_TRT_BACKEND}") message(STATUS " ENABLE_LITE_BACKEND : ${ENABLE_LITE_BACKEND}") if(ENABLE_PADDLE_BACKEND) message(STATUS " Paddle Inference version : ${PADDLEINFERENCE_VERSION}") endif() +if(ENABLE_POROS_BACKEND) + message(STATUS " Poros version : ${POROS_VERSION}") +endif() if(ENABLE_OPENVINO_BACKEND) message(STATUS " OpenVINO version : ${OPENVINO_VERSION}") endif() diff --git a/cmake/poros.cmake b/cmake/poros.cmake new file mode 100755 index 000000000..a457f9181 --- /dev/null +++ b/cmake/poros.cmake @@ -0,0 +1,76 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +include(ExternalProject) + +set(POROS_PROJECT "extern_poros") +set(POROS_PREFIX_DIR ${THIRD_PARTY_PATH}/poros) +set(POROS_SOURCE_DIR + ${THIRD_PARTY_PATH}/poros/src/${POROS_PROJECT}) +set(POROS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/poros) +set(POROS_INC_DIR + "${POROS_INSTALL_DIR}/include" + CACHE PATH "poros include directory." FORCE) +set(POROS_LIB_DIR + "${POROS_INSTALL_DIR}/lib/" + CACHE PATH "poros lib directory." FORCE) +set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" + "${POROS_LIB_DIR}") + +include_directories(${POROS_INC_DIR}) +if(WIN32) + message(FATAL_ERROR "Poros Backend doesn't support Windows now.") +elseif(APPLE) + message(FATAL_ERROR "Poros Backend doesn't support Mac OSX now.") +else() + set(POROS_COMPILE_LIB + "${POROS_INSTALL_DIR}/lib/libporos.so" + CACHE FILEPATH "poros compile library." FORCE) +endif(WIN32) + +set(POROS_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/") +set(POROS_VERSION "0.1.0") +if(WIN32) + message(FATAL_ERROR "Poros Backend doesn't support Windows now.") +elseif(APPLE) + message(FATAL_ERROR "Poros Backend doesn't support Mac OSX now.") +else() + if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") + message(FATAL_ERROR "Poros Backend doesn't support linux aarch64 now.") + set(POROS_FILE "poros-linux-aarch64-${POROS_VERSION}.tgz") + else() + set(POROS_FILE "poros-linux-x64-${POROS_VERSION}.tgz") + if(WITH_GPU) + set(POROS_FILE "poros-linux-x64-gpu-${POROS_VERSION}.tgz") + endif() + endif() +endif() +set(POROS_URL "${POROS_URL_BASE}${POROS_FILE}") + +ExternalProject_Add( + ${POROS_PROJECT} + ${EXTERNAL_PROJECT_LOG_ARGS} + URL ${POROS_URL} + PREFIX ${POROS_PREFIX_DIR} + DOWNLOAD_NO_PROGRESS 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + UPDATE_COMMAND "" + INSTALL_COMMAND + ${CMAKE_COMMAND} -E copy_directory ${POROS_SOURCE_DIR} ${POROS_INSTALL_DIR} + BUILD_BYPRODUCTS ${POROS_COMPILE_LIB}) + +add_library(external_poros STATIC IMPORTED GLOBAL) +set_property(TARGET external_poros PROPERTY IMPORTED_LOCATION + ${POROS_COMPILE_LIB}) +add_dependencies(external_poros ${POROS_PROJECT}) diff --git a/cmake/summary.cmake b/cmake/summary.cmake old mode 100644 new mode 100755 index e522a130a..2deea28a7 --- a/cmake/summary.cmake +++ b/cmake/summary.cmake @@ -32,6 +32,7 @@ function(fastdeploy_summary) message(STATUS " Paddle2ONNX version : ${PADDLE2ONNX_VERSION}") message(STATUS " ENABLE_ORT_BACKEND : ${ENABLE_ORT_BACKEND}") message(STATUS " ENABLE_PADDLE_BACKEND : ${ENABLE_PADDLE_BACKEND}") + message(STATUS " ENABLE_POROS_BACKEND : ${ENABLE_POROS_BACKEND}") message(STATUS " ENABLE_TRT_BACKEND : ${ENABLE_TRT_BACKEND}") message(STATUS " ENABLE_OPENVINO_BACKEND : ${ENABLE_OPENVINO_BACKEND}") if(ENABLE_ORT_BACKEND) @@ -40,6 +41,9 @@ function(fastdeploy_summary) if(ENABLE_PADDLE_BACKEND) message(STATUS " Paddle Inference version : ${PADDLEINFERENCE_VERSION}") endif() + if(ENABLE_POROS_BACKEND) + message(STATUS " Poros version : ${POROS_VERSION}") + endif() if(ENABLE_OPENVINO_BACKEND) message(STATUS " OpenVINO version : ${OPENVINO_VERSION}") endif() diff --git a/docs/api_docs/cpp/main_page.md b/docs/api_docs/cpp/main_page.md old mode 100644 new mode 100755 index 2266d476a..116ce95b0 --- a/docs/api_docs/cpp/main_page.md +++ b/docs/api_docs/cpp/main_page.md @@ -11,6 +11,7 @@ Currently, FastDeploy supported backends listed as below, | ONNX Runtime | CPU/Nvidia GPU | Paddle/ONNX | Windows(x64)/Linux(x64/aarch64)/Mac(x86/arm64) | | TensorRT | Nvidia GPU | Paddle/ONNX | Windows(x64)/Linux(x64)/Jetson | | OpenVINO | CPU | Paddle/ONNX | Windows(x64)/Linux(x64)/Mac(x86) | +| Poros | CPU/Nvidia GPU | TorchScript | Linux(x64) | ### Example code - [Python examples](./) diff --git a/fastdeploy/backends/poros/common/compile.h b/fastdeploy/backends/poros/common/compile.h new file mode 100755 index 000000000..c7cbc6756 --- /dev/null +++ b/fastdeploy/backends/poros/common/compile.h @@ -0,0 +1,167 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +#include "torch/script.h" +#include "iengine.h" +#include "poros_module.h" + +namespace baidu { +namespace mirana { +namespace poros { + +/** + * @brief compile graph + * + * @param [in] module : original module + * @param [in] input_ivalues : prewarm datas + * @param [in] options : Inference options + * @return porosmodule + * @retval !nullptr => succeed nullptr => failed + **/ +std::unique_ptr Compile(const torch::jit::Module& module, + const std::vector >& prewarm_datas, + const PorosOptions& options); + +class Compiler { +public: + typedef std::unordered_map engine_map_t; + typedef std::vector > ivalue_vec_t; + + Compiler() : _origin_module(NULL) {} + ~Compiler(); + + /** + * @brief initial Compiler + * + * @param [in] options : poros options + * @return int + * @retval 0 => succeed <0 => failed + **/ + int init(const PorosOptions& options); + + /** + * @brief compile whole graph + * + * @param [in] origin_module + * @param [in] prewarm_datas : ivalue_vec_t, vector of IValue + * @param [out] optimized_module : optimized graph + * @return int + * @retval 0 => succeed <0 => failed + **/ + int compile(const torch::jit::Module& origin_module, + const ivalue_vec_t& prewarm_datas, + torch::jit::Module* optimized_module); + +private: + + /** + * @brief preprocess this calculation graph + * + * @param [in] prewarm_datas : ivalue_vec_t, vector of IValue + * @param [out] graph : preprcessed graph + * @return int + * @retval 0 => succeed <0 => failed + **/ + int preprocess_graph(const ivalue_vec_t& prewarm_datas, std::shared_ptr& graph); + + /** + * @brief segement this calculation graph + * + * @param [in/out] graph + * @return int + * @retval 0 => succeed <0 => failed + **/ + int segment_graph(std::shared_ptr& graph); + + // Split subgraph(block) + // The divided subgraph, as a subgraph, is associated with the block + int segment_block(torch::jit::Block& block, IEngine* engine, int current_depth); + + // Subgraph optimization + /** + * @brief Subgraph optimization + * + * @param [in] prewarm_datas : ivalue_vec_t, vector of IValue + * @param [in] opt_graph : ivalue_vec_t, vector of IValue + * @param [out] optimized_module : optimized graph + * @return int + * @retval 0 => succeed <0 => failed + **/ + int optimize_subgraph(const ivalue_vec_t& prewarm_datas, + const std::shared_ptr& opt_graph, + torch::jit::Module* optimized_module); + + // Subgraph optimization(block) + int optimize_subblock(torch::jit::Block* block, + torch::jit::Module* optimized_module); + + /** + * @brief Compile the subgraph into a new graph based on the engine + * + * @param [in] engine : The engine used by the subgraph + * @param [in] subgraph_node : Subgraph node + * @return [out] module : Transformed model + * @retval 0 => succeed <0 => failed + **/ + int transform(IEngine* engine, torch::jit::Node& subgraph_node, + torch::jit::Module& module); + + /** + * @brief Select engine based on subgraph and options + * + * @param [in] node : Jit Node + * @return int + * @retval 0 => succeed <0 => failed + **/ + IEngine* select_engine(const torch::jit::Node* n); + + /** + * @brief destory + * + * @return void + **/ + void close(); + +private: + int _max_segment_depth{5}; // Maximum subgraph segmentation depth + ivalue_vec_t _prewarm_datas; // Prewarm datas + PorosOptions _options; + engine_map_t _engine_map; // The engine used to record the subgraph + const torch::jit::Module* _origin_module; // Origin_module + std::atomic _engine_index = {0}; // Record engine index +}; + +/** + * @brief compile graph, internal use + * + * @param [in] module : Origin module + * @param [in] input_ivalues : Prewarm datas + * @param [in] options : Inference options + * @return optimized_module + * @retval !nullptr => succeed nullptr => failed + **/ +std::unique_ptr CompileGraph(const torch::jit::Module& module, + const std::vector >& prewarm_datas, + const PorosOptions& options); + +} // namespace poros +} // namespace mirana +} // namespace baidu diff --git a/fastdeploy/backends/poros/common/iengine.h b/fastdeploy/backends/poros/common/iengine.h new file mode 100755 index 000000000..5cb49e1ee --- /dev/null +++ b/fastdeploy/backends/poros/common/iengine.h @@ -0,0 +1,84 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +//from pytorch +#include "torch/script.h" +#include "torch/csrc/jit/ir/ir.h" +#include "ATen/core/interned_strings.h" + +#include "plugin_create.h" + +namespace baidu { +namespace mirana { +namespace poros { + +/** + * the base engine class + * every registered engine should inherit from this IEngine + **/ + +struct PorosGraph { + torch::jit::Graph* graph = NULL; + torch::jit::Node* node = NULL; +}; + +typedef uint64_t EngineID; + +class IEngine : public IPlugin, public torch::CustomClassHolder{ +public: + virtual ~IEngine() {} + + /** + * @brief init, initialization must be successful if the init is successful + * @return int + * @retval 0 => success, <0 => fail + **/ + virtual int init() = 0; + + /** + * @brief During compilation, the subgraph is converted into the graph structure of the corresponding engine and stored inside the engine, so that the execute_engine at runtime can be called + * @param [in] sub_graph : subgraph + * @return [res]int + * @retval 0 => success, <0 => fail + **/ + virtual int transform(const PorosGraph& sub_graph) = 0; + + /** + * @brief Subgraph execution period logic + * @param [in] inputs : input tensor + * @return [res] output tensor + **/ + virtual std::vector excute_engine(const std::vector& inputs) = 0; + + virtual void register_module_attribute(const std::string& name, torch::jit::Module& module) = 0; + + // Logo + virtual const std::string who_am_i() = 0; + + // Whether the node is supported by the current engine + bool is_node_supported(const torch::jit::Node* node); + +public: + std::pair _num_io; // Number of input/output parameters + EngineID _id; + +}; + +} // namespace poros +} // namespace mirana +} // namespace baidu diff --git a/fastdeploy/backends/poros/common/plugin_create.h b/fastdeploy/backends/poros/common/plugin_create.h new file mode 100755 index 000000000..d160f2440 --- /dev/null +++ b/fastdeploy/backends/poros/common/plugin_create.h @@ -0,0 +1,65 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +namespace baidu { +namespace mirana { +namespace poros { + +class IPlugin { +public: + virtual ~IPlugin() {} + virtual const std::string who_am_i() = 0; +}; + +typedef IPlugin* (*plugin_creator_t)(); +typedef std::unordered_map plugin_creator_map_t; + +IPlugin* create_plugin(const std::string& plugin_name); +IPlugin* create_plugin(const std::string& plugin_name, const plugin_creator_map_t& plugin_creator_map); + +void create_all_plugins(const plugin_creator_map_t& plugin_creator_map, + std::unordered_map& plugin_m); +//void create_all_plugins(std::unordered_map& plugin_m); + +template +IPlugin* default_plugin_creator() { + return new (std::nothrow)PluginType; +} + +void register_plugin_creator(const std::string& plugin_name, plugin_creator_t creator); +void register_plugin_creator(const std::string& plugin_name, + plugin_creator_t creator, plugin_creator_map_t& plugin_creator_map); + +template +void register_plugin_class(const std::string& plugin_name) { + return register_plugin_creator(plugin_name, default_plugin_creator); +} + +// This version is recommended +template +void register_plugin_class(const std::string& plugin_name, plugin_creator_map_t& plugin_creator_map) { + return register_plugin_creator(plugin_name, default_plugin_creator, plugin_creator_map); +} + +}//poros +}//mirana +}//baidu + + +/* vim: set ts=4 sw=4 sts=4 tw=100 */ diff --git a/fastdeploy/backends/poros/common/poros_module.h b/fastdeploy/backends/poros/common/poros_module.h new file mode 100755 index 000000000..74ba485d4 --- /dev/null +++ b/fastdeploy/backends/poros/common/poros_module.h @@ -0,0 +1,67 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "torch/script.h" +#include "torch/csrc/jit/jit_log.h" +// #include "ATen/Context.h" + +namespace baidu { +namespace mirana { +namespace poros { + +enum Device : int8_t { + GPU = 0, + CPU, + XPU, + UNKNOW +}; + +struct PorosOptions { + Device device = GPU; + bool debug = false; + bool use_fp16 = false; + bool is_dynamic = false; + bool long_to_int = true; + uint64_t max_workspace_size = 1ULL << 30; + int32_t device_id = -1; + int32_t unconst_ops_thres = -1; + bool use_nvidia_tf32 = false; +}; + +class PorosModule : public torch::jit::Module { +public: + PorosModule(torch::jit::Module module) : torch::jit::Module(module) { + } + ~PorosModule() = default; + + void to_device(Device device){ + _options.device = device; + } + + //c10::IValue forward(std::vector inputs); + //void save(const std::string& filename); +public: + PorosOptions _options; + +}; + +//via porosmodule.save +std::unique_ptr Load(const std::string& filename, const PorosOptions& options); + +} // namespace poros +} // namespace mirana +} // namespace baidu diff --git a/fastdeploy/backends/poros/poros_backend.cc b/fastdeploy/backends/poros/poros_backend.cc new file mode 100755 index 000000000..8fdc42fb3 --- /dev/null +++ b/fastdeploy/backends/poros/poros_backend.cc @@ -0,0 +1,240 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/backends/poros/poros_backend.h" +#include + +namespace fastdeploy { + +TensorInfo PorosBackend::GetInputInfo(int index) { + // eager mode cann't obtain input information before infer + TensorInfo info_input; + return info_input; +} + +TensorInfo PorosBackend::GetOutputInfo(int index) { + // eager mode cann't obtain output information before infer + TensorInfo info_output; + return info_output; +} + +std::vector PorosBackend::GetInputInfos() { + // eager mode cann't obtain inputs information before infer + std::vector info_inputs; + return info_inputs; +} + +std::vector PorosBackend::GetOutputInfos() { + // eager mode cann't obtain outputs information before infer + std::vector info_outputs; + return info_outputs; +} + +void PorosBackend::BuildOption(const PorosBackendOption& option) { + _options.device = option.use_gpu ? baidu::mirana::poros::Device::GPU + : baidu::mirana::poros::Device::CPU; + _options.long_to_int = option.long_to_int; + _options.use_nvidia_tf32 = option.use_nvidia_tf32; + _options.device_id = option.gpu_id; + _options.unconst_ops_thres = option.unconst_ops_thres; + _options.is_dynamic = option.is_dynamic; + _options.max_workspace_size = option.max_workspace_size; + _options.use_fp16 = option.enable_fp16; + return; +} + +bool PorosBackend::Compile(const std::string& model_file, + std::vector>& prewarm_tensors, + const PorosBackendOption& option) { + if (initialized_) { + FDERROR << "PorosBackend is already initlized, cannot initialize again." + << std::endl; + return false; + } + BuildOption(option); + torch::jit::Module mod; + mod = torch::jit::load(model_file); + mod.eval(); + if (option.use_gpu) { + mod.to(at::kCUDA); + } else { + mod.to(at::kCPU); + } + // get inputs_nums and outputs_nums + auto graph = mod.get_method("forward").graph(); + auto inputs = graph->inputs(); + // remove self node + _numinputs = inputs.size() - 1; + // FDTensor to at::Tensor + std::vector> prewarm_datas; + bool is_backend_cuda = option.use_gpu ? true : false; + for (size_t i = 0; i < prewarm_tensors.size(); ++i) { + std::vector prewarm_data; + for (size_t j = 0; j < prewarm_tensors[i].size(); ++j) { + auto tensor = CreatePorosValue(prewarm_tensors[i][j], is_backend_cuda); + prewarm_data.push_back(tensor); + } + prewarm_datas.push_back(prewarm_data); + } + // get outputs nums + auto temp_result = mod.forward(prewarm_datas[0]); + size_t outputs_nums = 0; + if (temp_result.isTensor()) { + outputs_nums += 1; + } else if (temp_result.isTuple()) { + auto temp_result_tuple = temp_result.toTuple(); + for (size_t i = 0; i < temp_result_tuple->elements().size(); ++i) { + auto poros_tensor = temp_result_tuple->elements()[i]; + if (poros_tensor.isTensor()) { + outputs_nums += 1; + } else if (poros_tensor.isList()) { + auto poros_tensor_list = poros_tensor.toList(); + outputs_nums += poros_tensor_list.size(); + } else if (poros_tensor.isTuple()) { + auto poros_tensor_tuple = poros_tensor.toTuple(); + outputs_nums += poros_tensor_tuple->elements().size(); + } else { + continue; + } + } + } + _numoutputs = outputs_nums; + _poros_module = baidu::mirana::poros::Compile(mod, prewarm_datas, _options); + if (_poros_module == nullptr) { + FDERROR << "PorosBackend initlize Failed, try initialize again." + << std::endl; + return false; + } + initialized_ = true; + return true; +} + +bool PorosBackend::InitFromTorchScript(const std::string& model_file, + const PorosBackendOption& option) { + if (initialized_) { + FDERROR << "PorosBackend is already initlized, cannot initialize again." + << std::endl; + return false; + } + if (option.poros_file != "") { + std::ifstream fin(option.poros_file, std::ios::binary | std::ios::in); + if (fin) { + FDINFO << "Detect compiled Poros file in " << option.poros_file + << ", will load it directly." << std::endl; + fin.close(); + return InitFromPoros(option.poros_file, option); + } + } + BuildOption(option); + torch::jit::Module mod; + mod = torch::jit::load(model_file); + mod.eval(); + if (option.use_gpu) { + mod.to(at::kCUDA); + } else { + mod.to(at::kCPU); + } + // get inputs_nums and outputs_nums + auto graph = mod.get_method("forward").graph(); + auto inputs = graph->inputs(); + // remove self node + _numinputs = inputs.size() - 1; + auto outputs = graph->outputs(); + _numoutputs = outputs.size(); + _poros_module = baidu::mirana::poros::Compile(mod, _prewarm_datas, _options); + if (_poros_module == nullptr) { + FDERROR << "PorosBackend initlize Failed, try initialize again." + << std::endl; + return false; + } + initialized_ = true; + return true; +} + +bool PorosBackend::InitFromPoros(const std::string& model_file, + const PorosBackendOption& option) { + if (initialized_) { + FDERROR << "PorosBackend is already initlized, cannot initialize again." + << std::endl; + return false; + } + BuildOption(option); + _poros_module = baidu::mirana::poros::Load(model_file, _options); + if (_poros_module == nullptr) { + FDERROR << "PorosBackend initlize Failed, try initialize again." + << std::endl; + return false; + } + // get inputs_nums and outputs_nums + auto graph = _poros_module->get_method("forward").graph(); + auto inputs = graph->inputs(); + // remove self node + _numinputs = inputs.size() - 1; + auto outputs = graph->outputs(); + _numoutputs = outputs.size(); + initialized_ = true; + return true; +} + +bool PorosBackend::Infer(std::vector& inputs, + std::vector* outputs) { + // Convert FD Tensor to PyTorch Tensor + std::vector poros_inputs; + bool is_backend_cuda = + _options.device == baidu::mirana::poros::Device::GPU ? true : false; + for (size_t i = 0; i < inputs.size(); ++i) { + poros_inputs.push_back(CreatePorosValue(inputs[i], is_backend_cuda)); + } + // Infer + auto poros_outputs = _poros_module->forward(poros_inputs); + // Convert PyTorch Tensor to FD Tensor + if (poros_outputs.isTensor()) { + CopyTensorToCpu(poros_outputs.toTensor(), &((*outputs)[0]), + is_backend_cuda); + } else if (poros_outputs.isTuple()) { + // deal with multi outputs + auto poros_outputs_tuple = poros_outputs.toTuple(); + size_t index = 0; + for (size_t i = 0; i < poros_outputs_tuple->elements().size(); ++i) { + auto poros_tensor = poros_outputs_tuple->elements()[i]; + if (poros_tensor.isTensor()) { + CopyTensorToCpu(poros_tensor.toTensor(), &((*outputs)[index]), + is_backend_cuda); + index += 1; + } else if (poros_tensor.isList()) { + auto poros_tensor_list = poros_tensor.toList(); + for (const auto list_idx : c10::irange(0, poros_tensor_list.size())) { + const auto& elt = poros_tensor_list.get(list_idx); + CopyTensorToCpu(elt.toTensor(), &((*outputs)[index]), + is_backend_cuda); + index += 1; + } + } else if (poros_tensor.isTuple()) { + auto poros_tensor_tuple = poros_tensor.toTuple(); + for (size_t j = 0; j < poros_tensor_tuple->elements().size(); ++j) { + CopyTensorToCpu(poros_tensor_tuple->elements()[j].toTensor(), + &((*outputs)[index]), is_backend_cuda); + index += 1; + } + } else { + continue; + } + } + } else { + FDERROR << "Convert to FDTensor Failed!!!!!" << std::endl; + } + return true; +} + +} // namespace fastdeploy \ No newline at end of file diff --git a/fastdeploy/backends/poros/poros_backend.h b/fastdeploy/backends/poros/poros_backend.h new file mode 100755 index 000000000..656249e00 --- /dev/null +++ b/fastdeploy/backends/poros/poros_backend.h @@ -0,0 +1,107 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +#include "fastdeploy/backends/backend.h" + +#include "fastdeploy/backends/poros/common/compile.h" +#include "fastdeploy/backends/poros/common/poros_module.h" + +namespace fastdeploy { + +struct PorosBackendOption { +#ifdef WITH_GPU + bool use_gpu = true; +#else + bool use_gpu = false; +#endif + int gpu_id = 0; + bool long_to_int = true; + // There is calculation precision in tf32 mode on A10, it can bring some + // performance improvement, but there may be diff + bool use_nvidia_tf32 = false; + // Threshold for the number of non-const ops + int32_t unconst_ops_thres = -1; + std::string poros_file = ""; + std::vector prewarm_datatypes = {FDDataType::FP32}; + // TRT options + bool enable_fp16 = false; + bool enable_int8 = false; + bool is_dynamic = false; + size_t max_batch_size = 32; + size_t max_workspace_size = 1 << 30; +}; + +// Convert data type from fastdeploy to poros +at::ScalarType GetPorosDtype(const FDDataType& fd_dtype); + +// Convert data type from poros to fastdeploy +FDDataType GetFdDtype(const at::ScalarType& dtype); + +// at::ScalarType to std::string for FDERROR +std::string AtType2String(const at::ScalarType& dtype); + +// Create at::Tensor +// is_backend_cuda specify if Poros use GPU Device +// While is_backend_cuda = true, and tensor.device = Device::GPU +at::Tensor CreatePorosValue(FDTensor& tensor, bool is_backend_cuda = false); + +// Copy memory data from at::Tensor to fastdeploy::FDTensor +void CopyTensorToCpu(const at::Tensor& tensor, FDTensor* fd_tensor, + bool is_backend_cuda = false); + +class PorosBackend : public BaseBackend { + public: + PorosBackend() {} + virtual ~PorosBackend() = default; + + void BuildOption(const PorosBackendOption& option); + + bool InitFromTorchScript( + const std::string& model_file, + const PorosBackendOption& option = PorosBackendOption()); + + bool InitFromPoros(const std::string& model_file, + const PorosBackendOption& option = PorosBackendOption()); + + bool Compile(const std::string& model_file, + std::vector>& prewarm_tensors, + const PorosBackendOption& option = PorosBackendOption()); + + bool Infer(std::vector& inputs, std::vector* outputs); + + int NumInputs() const { return _numinputs; } + + int NumOutputs() const { return _numoutputs; } + + TensorInfo GetInputInfo(int index) override; + TensorInfo GetOutputInfo(int index) override; + std::vector GetInputInfos() override; + std::vector GetOutputInfos() override; + + private: + baidu::mirana::poros::PorosOptions _options; + std::unique_ptr _poros_module; + std::vector> _prewarm_datas; + int _numinputs = 1; + int _numoutputs = 1; +}; + +} // namespace fastdeploy diff --git a/fastdeploy/backends/poros/utils.cc b/fastdeploy/backends/poros/utils.cc new file mode 100644 index 000000000..e7b749b58 --- /dev/null +++ b/fastdeploy/backends/poros/utils.cc @@ -0,0 +1,186 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/backends/poros/poros_backend.h" + +#ifdef WITH_GPU +#include +#endif + +namespace fastdeploy { + +std::string AtType2String(const at::ScalarType& dtype) { + std::string out; + switch (dtype) { + case at::kByte: + out = "at::kByte"; + break; + case at::kChar: + out = "at::kChar"; + break; + case at::kShort: + out = "at::kShort"; + break; + case at::kInt: + out = "at::kInt"; + break; + case at::kLong: + out = "at::kLong"; + break; + case at::kHalf: + out = "at::kHalf"; + break; + case at::kFloat: + out = "at::kFloat"; + break; + case at::kDouble: + out = "at::kDouble"; + break; + default: + out = "at::UNKNOWN"; + } + return out; +} + +at::ScalarType GetPorosDtype(const FDDataType& fd_dtype) { + if (fd_dtype == FDDataType::FP32) { + return at::kFloat; + } else if (fd_dtype == FDDataType::FP64) { + return at::kDouble; + } else if (fd_dtype == FDDataType::INT32) { + return at::kInt; + } else if (fd_dtype == FDDataType::INT64) { + return at::kLong; + } + FDERROR << "Unrecognized fastdeply data type:" << Str(fd_dtype) << "." + << std::endl; + return at::kFloat; +} + +FDDataType GetFdDtype(const at::ScalarType& poros_dtype) { + if (poros_dtype == at::kFloat) { + return FDDataType::FP32; + } else if (poros_dtype == at::kDouble) { + return FDDataType::FP64; + } else if (poros_dtype == at::kInt) { + return FDDataType::INT32; + } else if (poros_dtype == at::kLong) { + return FDDataType::INT64; + } + FDERROR << "Unrecognized poros data type:" << AtType2String(poros_dtype) + << "." << std::endl; + return FDDataType::FP32; +} + +at::Tensor CreatePorosValue(FDTensor& tensor, bool is_backend_cuda) { + FDASSERT(tensor.device == Device::GPU || tensor.device == Device::CPU, + "Only support tensor which device is CPU or GPU for PorosBackend."); + auto data_type = GetPorosDtype(tensor.dtype); + size_t numel = tensor.Numel(); + at::Tensor poros_value; + if (is_backend_cuda) { + poros_value = std::move( + at::empty(tensor.shape, {at::kCUDA}).to(data_type).contiguous()); + } else { + poros_value = std::move( + at::empty(tensor.shape, {at::kCPU}).to(data_type).contiguous()); + } + if (data_type == at::kFloat) { + if (is_backend_cuda) { + cudaMemcpy(poros_value.data_ptr(), static_cast(tensor.Data()), + numel * sizeof(float), cudaMemcpyHostToDevice); + } else { + memcpy(poros_value.data_ptr(), static_cast(tensor.Data()), + numel * sizeof(float)); + } + } else if (data_type == at::kInt) { + if (is_backend_cuda) { + cudaMemcpy(poros_value.data_ptr(), static_cast(tensor.Data()), + numel * sizeof(int32_t), cudaMemcpyHostToDevice); + } else { + memcpy(poros_value.data_ptr(), static_cast(tensor.Data()), + numel * sizeof(int32_t)); + } + } else if (data_type == at::kLong) { + if (is_backend_cuda) { + cudaMemcpy(poros_value.data_ptr(), static_cast(tensor.Data()), + numel * sizeof(int64_t), cudaMemcpyHostToDevice); + } else { + memcpy(poros_value.data_ptr(), static_cast(tensor.Data()), + numel * sizeof(int64_t)); + } + } else if (data_type == at::kDouble) { + if (is_backend_cuda) { + cudaMemcpy(poros_value.data_ptr(), static_cast(tensor.Data()), + numel * sizeof(double), cudaMemcpyHostToDevice); + } else { + memcpy(poros_value.data_ptr(), static_cast(tensor.Data()), + numel * sizeof(double)); + } + } else { + FDASSERT(false, + "Unrecognized data type while calling " + "PorosBackend::CreatePorosValue()."); + } + return poros_value; +} + +void CopyTensorToCpu(const at::Tensor& tensor, FDTensor* fd_tensor, + bool is_backend_cuda) { + const auto data_type = tensor.scalar_type(); + std::vector shape; + auto sizes = tensor.sizes(); + for (size_t i = 0; i < sizes.size(); i++) { + shape.push_back(sizes[i]); + } + auto fd_dtype = GetFdDtype(data_type); + fd_tensor->Resize(shape, fd_dtype); + size_t numel = tensor.numel(); + // at::Tensor -> FDTensor + if (data_type == at::kFloat) { + if (is_backend_cuda) { + cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(float), + cudaMemcpyDeviceToHost); + } else { + memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(float)); + } + return; + } else if (data_type == at::kInt) { + if (is_backend_cuda) { + cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int32_t), + cudaMemcpyDeviceToHost); + } else { + memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int32_t)); + } + return; + } else if (data_type == at::kLong) { + if (is_backend_cuda) { + cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int64_t), + cudaMemcpyDeviceToHost); + } else { + memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(int64_t)); + } + return; + } else if (data_type == at::kDouble) { + if (is_backend_cuda) { + cudaMemcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(double), + cudaMemcpyDeviceToHost); + } else { + memcpy(fd_tensor->Data(), tensor.data_ptr(), numel * sizeof(double)); + } + return; + } +} + +} // namespace fastdeploy \ No newline at end of file diff --git a/fastdeploy/core/config.h.in b/fastdeploy/core/config.h.in old mode 100644 new mode 100755 index 9696a6e7f..8d6a07bb6 --- a/fastdeploy/core/config.h.in +++ b/fastdeploy/core/config.h.in @@ -29,6 +29,10 @@ #cmakedefine ENABLE_PADDLE_BACKEND #endif +#ifndef ENABLE_POROS_BACKEND +#cmakedefine ENABLE_POROS_BACKEND +#endif + #ifndef ENABLE_OPENVINO_BACKEND #cmakedefine ENABLE_OPENVINO_BACKEND #endif diff --git a/fastdeploy/pybind/runtime.cc b/fastdeploy/pybind/runtime.cc index 092fc9ebb..c2e2df19e 100755 --- a/fastdeploy/pybind/runtime.cc +++ b/fastdeploy/pybind/runtime.cc @@ -24,6 +24,7 @@ void BindRuntime(pybind11::module& m) { .def("use_cpu", &RuntimeOption::UseCpu) .def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum) .def("use_paddle_backend", &RuntimeOption::UsePaddleBackend) + .def("use_poros_backend", &RuntimeOption::UsePorosBackend) .def("use_ort_backend", &RuntimeOption::UseOrtBackend) .def("set_ort_graph_opt_level", &RuntimeOption::SetOrtGraphOptLevel) .def("use_trt_backend", &RuntimeOption::UseTrtBackend) @@ -62,7 +63,12 @@ void BindRuntime(pybind11::module& m) { .def_readwrite("trt_enable_int8", &RuntimeOption::trt_enable_int8) .def_readwrite("trt_max_batch_size", &RuntimeOption::trt_max_batch_size) .def_readwrite("trt_max_workspace_size", - &RuntimeOption::trt_max_workspace_size); + &RuntimeOption::trt_max_workspace_size) + .def_readwrite("is_dynamic", &RuntimeOption::is_dynamic) + .def_readwrite("long_to_int", &RuntimeOption::long_to_int) + .def_readwrite("use_nvidia_tf32", &RuntimeOption::use_nvidia_tf32) + .def_readwrite("unconst_ops_thres", &RuntimeOption::unconst_ops_thres) + .def_readwrite("poros_file", &RuntimeOption::poros_file); pybind11::class_(m, "TensorInfo") .def_readwrite("name", &TensorInfo::name) @@ -72,6 +78,30 @@ void BindRuntime(pybind11::module& m) { pybind11::class_(m, "Runtime") .def(pybind11::init()) .def("init", &Runtime::Init) + .def("compile", + [](Runtime& self, + std::vector>& warm_datas, + const RuntimeOption& _option) { + size_t rows = warm_datas.size(); + size_t columns = warm_datas[0].size(); + std::vector> warm_tensors( + rows, std::vector(columns)); + for (size_t i = 0; i < rows; ++i) { + for (size_t j = 0; j < columns; ++j) { + auto dtype = + NumpyDataTypeToFDDataType(warm_datas[i][j].dtype()); + std::vector data_shape; + data_shape.insert( + data_shape.begin(), warm_datas[i][j].shape(), + warm_datas[i][j].shape() + warm_datas[i][j].ndim()); + warm_tensors[i][j].Resize(data_shape, dtype); + memcpy(warm_tensors[i][j].MutableData(), + warm_datas[i][j].mutable_data(), + warm_datas[i][j].nbytes()); + } + } + return self.Compile(warm_tensors, _option); + }) .def("infer", [](Runtime& self, std::vector& inputs) { std::vector outputs(self.NumOutputs()); @@ -121,11 +151,13 @@ void BindRuntime(pybind11::module& m) { .value("UNKOWN", Backend::UNKNOWN) .value("ORT", Backend::ORT) .value("TRT", Backend::TRT) + .value("POROS", Backend::POROS) .value("PDINFER", Backend::PDINFER) .value("LITE", Backend::LITE); pybind11::enum_(m, "ModelFormat", pybind11::arithmetic(), "ModelFormat for inference.") .value("PADDLE", ModelFormat::PADDLE) + .value("TORCHSCRIPT", ModelFormat::TORCHSCRIPT) .value("ONNX", ModelFormat::ONNX); pybind11::enum_(m, "Device", pybind11::arithmetic(), "Device for inference.") diff --git a/fastdeploy/runtime.cc b/fastdeploy/runtime.cc index 8df9d6548..d381e9776 100755 --- a/fastdeploy/runtime.cc +++ b/fastdeploy/runtime.cc @@ -29,6 +29,10 @@ #include "fastdeploy/backends/paddle/paddle_backend.h" #endif +#ifdef ENABLE_POROS_BACKEND +#include "fastdeploy/backends/poros/poros_backend.h" +#endif + #ifdef ENABLE_OPENVINO_BACKEND #include "fastdeploy/backends/openvino/ov_backend.h" #endif @@ -50,6 +54,9 @@ std::vector GetAvailableBackends() { #ifdef ENABLE_PADDLE_BACKEND backends.push_back(Backend::PDINFER); #endif +#ifdef ENABLE_POROS_BACKEND + backends.push_back(Backend::POROS); +#endif #ifdef ENABLE_OPENVINO_BACKEND backends.push_back(Backend::OPENVINO); #endif @@ -76,6 +83,8 @@ std::string Str(const Backend& b) { return "Backend::TRT"; } else if (b == Backend::PDINFER) { return "Backend::PDINFER"; + } else if (b == Backend::POROS) { + return "Backend::POROS"; } else if (b == Backend::OPENVINO) { return "Backend::OPENVINO"; } else if (b == Backend::LITE) { @@ -89,6 +98,8 @@ std::string Str(const ModelFormat& f) { return "ModelFormat::PADDLE"; } else if (f == ModelFormat::ONNX) { return "ModelFormat::ONNX"; + } else if (f == ModelFormat::TORCHSCRIPT) { + return "ModelFormat::TORCHSCRIPT"; } return "UNKNOWN-ModelFormat"; } @@ -102,6 +113,8 @@ std::ostream& operator<<(std::ostream& out, const Backend& backend) { out << "Backend::PDINFER"; } else if (backend == Backend::OPENVINO) { out << "Backend::OPENVINO"; + } else if (backend == Backend::POROS) { + out << "Backend::POROS"; } else if (backend == Backend::LITE) { out << "Backend::LITE"; } @@ -114,6 +127,8 @@ std::ostream& operator<<(std::ostream& out, const ModelFormat& format) { out << "ModelFormat::PADDLE"; } else if (format == ModelFormat::ONNX) { out << "ModelFormat::ONNX"; + } else if (format == ModelFormat::TORCHSCRIPT) { + out << "ModelFormat::TORCHSCRIPT"; } out << "UNKNOWN-ModelFormat"; return out; @@ -137,9 +152,17 @@ bool CheckModelFormat(const std::string& model_file, << model_file << std::endl; return false; } + } else if (model_format == ModelFormat::TORCHSCRIPT) { + if (model_file.size() < 3 || + model_file.substr(model_file.size() - 3, 3) != ".pt") { + FDERROR << "With model format of ModelFormat::TORCHSCRIPT, the model file " + "should ends with `.pt`, but now it's " + << model_file << std::endl; + return false; + } } else { FDERROR << "Only support model format with frontend ModelFormat::PADDLE / " - "ModelFormat::ONNX." + "ModelFormat::ONNX / ModelFormat::TORCHSCRIPT." << std::endl; return false; } @@ -155,6 +178,10 @@ ModelFormat GuessModelFormat(const std::string& model_file) { model_file.substr(model_file.size() - 5, 5) == ".onnx") { FDINFO << "Model Format: ONNX." << std::endl; return ModelFormat::ONNX; + } else if (model_file.size() > 3 && + model_file.substr(model_file.size() - 3, 3) == ".pt") { + FDINFO << "Model Format: Torchscript." << std::endl; + return ModelFormat::TORCHSCRIPT; } FDERROR << "Cannot guess which model format you are using, please set " @@ -173,10 +200,13 @@ void RuntimeOption::SetModelPath(const std::string& model_path, } else if (format == ModelFormat::ONNX) { model_file = model_path; model_format = ModelFormat::ONNX; + } else if (format == ModelFormat::TORCHSCRIPT) { + model_file = model_path; + model_format = ModelFormat::TORCHSCRIPT; } else { FDASSERT( false, - "The model format only can be ModelFormat::PADDLE/ModelFormat::ONNX."); + "The model format only can be ModelFormat::PADDLE/ModelFormat::ONNX/ModelFormat::TORCHSCRIPT."); } } @@ -223,6 +253,15 @@ void RuntimeOption::UseOrtBackend() { #endif } +// use poros backend +void RuntimeOption::UsePorosBackend() { +#ifdef ENABLE_POROS_BACKEND + backend = Backend::POROS; +#else + FDASSERT(false, "The FastDeploy didn't compile with PorosBackend."); +#endif +} + void RuntimeOption::UseTrtBackend() { #ifdef ENABLE_TRT_BACKEND backend = Backend::TRT; @@ -324,6 +363,36 @@ void RuntimeOption::SetTrtCacheFile(const std::string& cache_file_path) { trt_serialize_file = cache_file_path; } +bool Runtime::Compile(std::vector>& prewarm_tensors, + const RuntimeOption& _option) { +#ifdef ENABLE_POROS_BACKEND + option = _option; + auto poros_option = PorosBackendOption(); + poros_option.use_gpu = (option.device == Device::GPU) ? true : false; + poros_option.gpu_id = option.device_id; + poros_option.long_to_int = option.long_to_int; + poros_option.use_nvidia_tf32 = option.use_nvidia_tf32; + poros_option.unconst_ops_thres = option.unconst_ops_thres; + poros_option.poros_file = option.poros_file; + poros_option.is_dynamic = option.is_dynamic; + poros_option.enable_fp16 = option.trt_enable_fp16; + poros_option.max_batch_size = option.trt_max_batch_size; + poros_option.max_workspace_size = option.trt_max_workspace_size; + FDASSERT(option.model_format == ModelFormat::TORCHSCRIPT, + "PorosBackend only support model format of ModelFormat::TORCHSCRIPT."); + backend_ = utils::make_unique(); + auto casted_backend = dynamic_cast(backend_.get()); + FDASSERT( + casted_backend->Compile(option.model_file, prewarm_tensors, poros_option), + "Load model from Torchscript failed while initliazing PorosBackend."); +#else + FDASSERT(false, + "PorosBackend is not available, please compiled with " + "ENABLE_POROS_BACKEND=ON."); +#endif + return true; +} + bool Runtime::Init(const RuntimeOption& _option) { option = _option; if (option.model_format == ModelFormat::AUTOREC) { @@ -334,6 +403,8 @@ bool Runtime::Init(const RuntimeOption& _option) { option.backend = Backend::ORT; } else if (IsBackendAvailable(Backend::PDINFER)) { option.backend = Backend::PDINFER; + } else if (IsBackendAvailable(Backend::POROS)) { + option.backend = Backend::POROS; } else if (IsBackendAvailable(Backend::OPENVINO)) { option.backend = Backend::OPENVINO; } else { @@ -365,6 +436,15 @@ bool Runtime::Init(const RuntimeOption& _option) { CreatePaddleBackend(); FDINFO << "Runtime initialized with Backend::PDINFER in " << Str(option.device) << "." << std::endl; + } else if (option.backend == Backend::POROS) { + FDASSERT(option.device == Device::CPU || option.device == Device::GPU, + "Backend::POROS only supports Device::CPU/Device::GPU."); + FDASSERT( + option.model_format == ModelFormat::TORCHSCRIPT, + "Backend::POROS only supports model format of ModelFormat::TORCHSCRIPT."); + FDINFO << "Runtime initialized with Backend::POROS in " + << Str(option.device) << "." << std::endl; + return true; } else if (option.backend == Backend::OPENVINO) { FDASSERT(option.device == Device::CPU, "Backend::OPENVINO only supports Device::CPU"); @@ -379,7 +459,8 @@ bool Runtime::Init(const RuntimeOption& _option) { << "." << std::endl; } else { FDERROR << "Runtime only support " - "Backend::ORT/Backend::TRT/Backend::PDINFER as backend now." + "Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as " + "backend now." << std::endl; return false; } diff --git a/fastdeploy/runtime.h b/fastdeploy/runtime.h index 0cea2f026..80979218c 100755 --- a/fastdeploy/runtime.h +++ b/fastdeploy/runtime.h @@ -38,6 +38,7 @@ enum Backend { ORT, ///< ONNX Runtime, support Paddle/ONNX format model, CPU / Nvidia GPU TRT, ///< TensorRT, support Paddle/ONNX format model, Nvidia GPU only PDINFER, ///< Paddle Inference, support Paddle format model, CPU / Nvidia GPU + POROS, ///< Poros, support TorchScript format model, CPU / Nvidia GPU OPENVINO, ///< Intel OpenVINO, support Paddle/ONNX format, CPU only LITE, ///< Paddle Lite, support Paddle format model, ARM CPU only }; @@ -47,6 +48,7 @@ enum ModelFormat { AUTOREC, ///< Auto recognize the model format by model file name PADDLE, ///< Model with paddlepaddle format ONNX, ///< Model with ONNX format + TORCHSCRIPT, ///< Model with TorchScript format }; FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, @@ -117,6 +119,9 @@ struct FASTDEPLOY_DECL RuntimeOption { /// Set TensorRT as inference backend, only support GPU void UseTrtBackend(); + /// Set Poros backend as inference backend, support CPU/GPU + void UsePorosBackend(); + /// Set OpenVINO as inference backend, only support CPU void UseOpenVINOBackend(); @@ -243,6 +248,13 @@ struct FASTDEPLOY_DECL RuntimeOption { size_t trt_max_batch_size = 32; size_t trt_max_workspace_size = 1 << 30; + // ======Only for Poros Backend======= + bool is_dynamic = false; + bool long_to_int = true; + bool use_nvidia_tf32 = false; + int unconst_ops_thres = -1; + std::string poros_file = ""; + std::string model_file = ""; // Path of model file std::string params_file = ""; // Path of parameters file, can be empty ModelFormat model_format = ModelFormat::AUTOREC; // format of input model @@ -270,6 +282,15 @@ struct FASTDEPLOY_DECL Runtime { bool Infer(std::vector& input_tensors, std::vector* output_tensors); + /** \brief Compile TorchScript Module, only for Poros backend + * + * \param[in] prewarm_tensors Prewarm datas for compile + * \param[in] _option Runtime option + * \return true if compile successed, otherwise false + */ + bool Compile(std::vector>& prewarm_tensors, + const RuntimeOption& _option); + /** \brief Get number of inputs */ int NumInputs() { return backend_->NumInputs(); } diff --git a/python/fastdeploy/c_lib_wrap.py.in b/python/fastdeploy/c_lib_wrap.py.in old mode 100644 new mode 100755 index 64a404eef..7736bd578 --- a/python/fastdeploy/c_lib_wrap.py.in +++ b/python/fastdeploy/c_lib_wrap.py.in @@ -34,6 +34,11 @@ def is_built_with_trt() -> bool: def is_built_with_paddle() -> bool: return True if "@ENABLE_PADDLE_BACKEND@" == "ON" else False + +def is_built_with_poros() ->bool: + return True if "@ENABLE_POROS_BACKEND@" == "ON" else False + + def is_built_with_openvino() ->bool: return True if "@ENABLE_OPENVINO_BACKEND@" == "ON" else False diff --git a/python/fastdeploy/runtime.py b/python/fastdeploy/runtime.py index aaba6abb3..c576369ee 100755 --- a/python/fastdeploy/runtime.py +++ b/python/fastdeploy/runtime.py @@ -13,6 +13,7 @@ # limitations under the License. from __future__ import absolute_import import logging +import numpy as np from . import ModelFormat from . import c_lib_wrap as C @@ -28,8 +29,24 @@ class Runtime: """ self._runtime = C.Runtime() + self.runtime_option = runtime_option assert self._runtime.init( - runtime_option._option), "Initialize Runtime Failed!" + self.runtime_option._option), "Initialize Runtime Failed!" + + def forward(self, *inputs): + """Inference with input data for poros + + :param data: (list[str : numpy.ndarray])The input data list + :return list of numpy.ndarray + """ + if self.runtime_option._option.model_format != ModelFormat.TORCHSCRIPT: + raise Exception( + "The forward function is only used for Poros backend, please call infer function" + ) + inputs_dict = dict() + for i in range(len(inputs)): + inputs_dict["x" + str(i)] = inputs[i] + return self.infer(inputs_dict) def infer(self, data): """Inference with input data. @@ -41,6 +58,27 @@ class Runtime: data, list), "The input data should be type of dict or list." return self._runtime.infer(data) + def compile(self, warm_datas): + """compile with prewarm data for poros + + :param data: (list[str : numpy.ndarray])The prewarm data list + :return TorchScript Model + """ + if self.runtime_option._option.model_format != ModelFormat.TORCHSCRIPT: + raise Exception( + "The compile function is only used for Poros backend, please call infer function" + ) + assert isinstance(warm_datas, + list), "The prewarm data should be type of list." + for i in range(len(warm_datas)): + warm_data = warm_datas[i] + if isinstance(warm_data[0], np.ndarray): + warm_data = list(data for data in warm_data) + else: + warm_data = list(data.numpy() for data in warm_data) + warm_datas[i] = warm_data + return self._runtime.compile(warm_datas, self.runtime_option._option) + def num_inputs(self): """Get number of inputs of the loaded model. """ @@ -85,6 +123,65 @@ class RuntimeOption: def __init__(self): self._option = C.RuntimeOption() + @property + def is_dynamic(self): + """Only for Poros backend + + :param value: (bool)Whether to enable dynamic shape, default False + """ + return self._option.is_dynamic + + @property + def unconst_ops_thres(self): + """Only for Poros backend + + :param value: (int)Minimum number of subgraph OPs, default 10 + """ + return self._option.unconst_ops_thres + + @property + def long_to_int(self): + """Only for Poros backend + + :param value: (bool)Whether to convert long dtype to int dtype, default True + """ + return self._option.long_to_int + + @property + def use_nvidia_tf32(self): + """Only for Poros backend + + :param value: (bool)The calculation accuracy of tf32 mode exists on the A card, which can bring some performance improvements, default False + """ + return self._option.use_nvidia_tf32 + + @is_dynamic.setter + def is_dynamic(self, value): + assert isinstance( + value, bool), "The value to set `is_dynamic` must be type of bool." + self._option.is_dynamic = value + + @unconst_ops_thres.setter + def unconst_ops_thres(self, value): + assert isinstance( + value, + int), "The value to set `unconst_ops_thres` must be type of int." + self._option.unconst_ops_thres = value + + @long_to_int.setter + def long_to_int(self, value): + assert isinstance( + value, + bool), "The value to set `long_to_int` must be type of bool." + self._option.long_to_int = value + + @use_nvidia_tf32.setter + def use_nvidia_tf32(self, value): + assert isinstance( + value, + bool), "The value to set `use_nvidia_tf32` must be type of bool." + self._option.use_nvidia_tf32 = value + def set_model_path(self, model_path, params_path="", @@ -125,6 +222,11 @@ class RuntimeOption: """ return self._option.use_paddle_backend() + def use_poros_backend(self): + """Use Poros backend, support inference TorchScript model on CPU/Nvidia GPU. + """ + return self._option.use_poros_backend() + def use_ort_backend(self): """Use ONNX Runtime backend, support inference Paddle/ONNX model on CPU/Nvidia GPU. """ @@ -235,7 +337,8 @@ class RuntimeOption: continue if hasattr(getattr(self._option, attr), "__call__"): continue - message += " {} : {}\t\n".format(attr, getattr(self._option, attr)) + message += " {} : {}\t\n".format(attr, + getattr(self._option, attr)) message.strip("\n") message += ")" return message diff --git a/python/setup.py b/python/setup.py old mode 100644 new mode 100755 index 1b0311df4..56a0bc534 --- a/python/setup.py +++ b/python/setup.py @@ -53,13 +53,16 @@ setup_configs["ENABLE_OPENVINO_BACKEND"] = os.getenv("ENABLE_OPENVINO_BACKEND", "OFF") setup_configs["ENABLE_PADDLE_BACKEND"] = os.getenv("ENABLE_PADDLE_BACKEND", "OFF") +setup_configs["ENABLE_POROS_BACKEND"] = os.getenv("ENABLE_POROS_BACKEND", + "OFF") setup_configs["ENABLE_VISION"] = os.getenv("ENABLE_VISION", "OFF") setup_configs["ENABLE_TEXT"] = os.getenv("ENABLE_TEXT", "OFF") setup_configs["ENABLE_TRT_BACKEND"] = os.getenv("ENABLE_TRT_BACKEND", "OFF") setup_configs["WITH_GPU"] = os.getenv("WITH_GPU", "OFF") setup_configs["BUILD_ON_JETSON"] = os.getenv("BUILD_ON_JETSON", "OFF") setup_configs["TRT_DIRECTORY"] = os.getenv("TRT_DIRECTORY", "UNDEFINED") -setup_configs["CUDA_DIRECTORY"] = os.getenv("CUDA_DIRECTORY", "/usr/local/cuda") +setup_configs["CUDA_DIRECTORY"] = os.getenv("CUDA_DIRECTORY", + "/usr/local/cuda") setup_configs["LIBRARY_NAME"] = PACKAGE_NAME setup_configs["PY_LIBRARY_NAME"] = PACKAGE_NAME + "_main" setup_configs["OPENCV_DIRECTORY"] = os.getenv("OPENCV_DIRECTORY", "") @@ -89,7 +92,8 @@ extras_require = {} # Default value is set to TRUE\1 to keep the settings same as the current ones. # However going forward the recomemded way to is to set this to False\0 -USE_MSVC_STATIC_RUNTIME = bool(os.getenv('USE_MSVC_STATIC_RUNTIME', '1') == '1') +USE_MSVC_STATIC_RUNTIME = bool( + os.getenv('USE_MSVC_STATIC_RUNTIME', '1') == '1') ONNX_NAMESPACE = os.getenv('ONNX_NAMESPACE', 'paddle2onnx') ################################################################################ # Version @@ -119,7 +123,8 @@ assert CMAKE, 'Could not find "cmake" executable!' @contextmanager def cd(path): if not os.path.isabs(path): - raise RuntimeError('Can only cd to absolute path, got: {}'.format(path)) + raise RuntimeError('Can only cd to absolute path, got: {}'.format( + path)) orig_path = os.getcwd() os.chdir(path) try: