From d7a65e5c70e14ca1dc7831369fb0e2e39d869b46 Mon Sep 17 00:00:00 2001 From: Jason Date: Fri, 6 Jan 2023 13:44:05 +0800 Subject: [PATCH] [Other] Upgrade runtime module (#1068) * Upgrade runtime module * Update option.h * Fix build error * Move enumerates * little modification * little modification * little modification: * Remove some useless flags --- CMakeLists.txt | 17 +- FastDeploy.cmake.in | 1 - cmake/opencv.cmake | 6 - cmake/timvx.cmake | 5 - fastdeploy/backends/lite/option.h | 11 + fastdeploy/backends/openvino/option.h | 1 + fastdeploy/backends/ort/option.h | 1 + fastdeploy/backends/paddle/option.h | 1 + fastdeploy/backends/paddle/paddle_backend.cc | 2 + fastdeploy/backends/poros/option.h | 1 + .../rknpu2_config.h => rknpu2/option.h} | 0 .../{rknpu => }/rknpu2/rknpu2_backend.cc | 4 +- .../{rknpu => }/rknpu2/rknpu2_backend.h | 2 +- fastdeploy/backends/sophgo/option.h | 1 + fastdeploy/backends/tensorrt/option.h | 1 + fastdeploy/core/config.h.in | 4 - fastdeploy/core/fd_tensor.h | 2 +- fastdeploy/core/fd_type.cc | 155 +-- fastdeploy/core/fd_type.h | 19 - fastdeploy/fastdeploy_model.h | 4 +- fastdeploy/pybind/main.h | 2 +- fastdeploy/pybind/rknpu2_config_pybind.cc | 24 +- fastdeploy/runtime.cc | 1012 ----------------- fastdeploy/runtime.h | 572 +--------- fastdeploy/runtime/enum_variables.cc | 85 ++ fastdeploy/runtime/enum_variables.h | 79 ++ fastdeploy/runtime/runtime.cc | 492 ++++++++ fastdeploy/runtime/runtime.h | 109 ++ fastdeploy/runtime/runtime_option.cc | 515 +++++++++ fastdeploy/runtime/runtime_option.h | 482 ++++++++ .../detection/contrib/yolov7end2end_trt.cc | 6 +- 31 files changed, 1838 insertions(+), 1778 deletions(-) rename fastdeploy/backends/{rknpu/rknpu2/rknpu2_config.h => rknpu2/option.h} (100%) rename fastdeploy/backends/{rknpu => }/rknpu2/rknpu2_backend.cc (99%) rename fastdeploy/backends/{rknpu => }/rknpu2/rknpu2_backend.h (98%) mode change 100755 => 100644 fastdeploy/core/fd_type.cc delete mode 100755 fastdeploy/runtime.cc create mode 100644 fastdeploy/runtime/enum_variables.cc create mode 100644 fastdeploy/runtime/enum_variables.h create mode 100644 fastdeploy/runtime/runtime.cc create mode 100755 fastdeploy/runtime/runtime.h create mode 100644 fastdeploy/runtime/runtime_option.cc create mode 100644 fastdeploy/runtime/runtime_option.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 42bc600bb..eb5a18fdc 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,15 +71,12 @@ option(WITH_ASCEND "Whether to compile for Huawei Ascend deploy." OFF) option(WITH_TIMVX "Whether to compile for TIMVX deploy." OFF) option(WITH_KUNLUNXIN "Whether to compile for KunlunXin XPU deploy." OFF) option(WITH_TESTING "Whether to compile with unittest." OFF) + ############################# Options for Android cross compiling ######################### option(WITH_OPENCV_STATIC "Use OpenCV static lib for Android." OFF) option(WITH_LITE_STATIC "Use Paddle Lite static lib for Android." OFF) option(WITH_OPENMP "Use OpenMP support for Android." OFF) -# Please don't open this flag now, some bugs exists. -# Only support Linux Now -# option(ENABLE_OPENCV_CUDA "Whether to enable opencv with cuda, this will allow process image with GPU." OFF) - # Whether to build fastdeploy with vision/text/... examples, only for testings. option(BUILD_EXAMPLES "Whether to build fastdeploy with vision examples" OFF) @@ -187,7 +184,6 @@ add_definitions(-DFASTDEPLOY_LIB) configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/core/config.h.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/core/config.h) configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/main.cc.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/main.cc) file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*.cc) -file(GLOB_RECURSE FDTENSOR_FUNC_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/function/*.cc) file(GLOB_RECURSE FDTENSOR_FUNC_CUDA_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/function/*.cu) file(GLOB_RECURSE DEPLOY_OP_CUDA_KERNEL_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/op_cuda_kernels/*.cu) file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/ort/*.cc) @@ -195,7 +191,7 @@ file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fas file(GLOB_RECURSE DEPLOY_POROS_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/poros/*.cc) file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cpp) file(GLOB_RECURSE DEPLOY_OPENVINO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/openvino/*.cc) -file(GLOB_RECURSE DEPLOY_RKNPU2_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/rknpu/rknpu2/*.cc) +file(GLOB_RECURSE DEPLOY_RKNPU2_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/rknpu2/*.cc) file(GLOB_RECURSE DEPLOY_SOPHGO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/sophgo/*.cc) file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/lite/*.cc) file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cc) @@ -420,15 +416,6 @@ endif() if(ENABLE_VISION) add_definitions(-DENABLE_VISION) add_definitions(-DENABLE_VISION_VISUALIZE) - if(ENABLE_OPENCV_CUDA) - if(NOT WITH_GPU) - message(FATAL_ERROR "ENABLE_OPENCV_CUDA is available on Linux and WITH_GPU=ON, but now WITH_GPU=OFF.") - endif() - if(APPLE OR ANDROID OR IOS OR WIN32) - message(FATAL_ERROR "Cannot enable opencv with cuda in mac/ios/android/windows os, please set -DENABLE_OPENCV_CUDA=OFF.") - endif() - add_definitions(-DENABLE_OPENCV_CUDA) - endif() add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/yaml-cpp) list(APPEND DEPEND_LIBS yaml-cpp) if(BUILD_CUDA_SRC) diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in index 6ba0b4307..a9f52d2e3 100755 --- a/FastDeploy.cmake.in +++ b/FastDeploy.cmake.in @@ -20,7 +20,6 @@ set(PADDLEINFERENCE_VERSION @PADDLEINFERENCE_VERSION@) set(OPENVINO_VERSION @OPENVINO_VERSION@) set(WITH_LITE_STATIC @WITH_LITE_STATIC@) set(WITH_OPENCV_STATIC @WITH_OPENCV_STATIC@) -# set(ENABLE_OPENCV_CUDA @ENABLE_OPENCV_CUDA@) set(OPENCV_FILENAME @OPENCV_FILENAME@) set(OPENVINO_FILENAME @OPENVINO_FILENAME@) set(PADDLELITE_FILENAME @PADDLELITE_FILENAME@) diff --git a/cmake/opencv.cmake b/cmake/opencv.cmake index fd2ecabe4..5103a69d9 100755 --- a/cmake/opencv.cmake +++ b/cmake/opencv.cmake @@ -42,12 +42,6 @@ else() if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") set(OPENCV_FILENAME "opencv-linux-aarch64-3.4.14") endif() - if(ENABLE_OPENCV_CUDA) - if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") - message(FATAL_ERROR "Cannot set ENABLE_OPENCV_CUDA=ON while in linux-aarch64 platform.") - endif() - set(OPENCV_FILENAME "opencv-linux-x64-gpu-3.4.16") - endif() endif() if(NOT OPENCV_FILENAME) diff --git a/cmake/timvx.cmake b/cmake/timvx.cmake index 973face96..aae1fba1a 100755 --- a/cmake/timvx.cmake +++ b/cmake/timvx.cmake @@ -29,11 +29,6 @@ if(${WITH_GPU}) set(WITH_GPU OFF) endif() -if(${ENABLE_OPENCV_CUDA}) - message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_OPENCV_CUDA=OFF") - set(ENABLE_OPENCV_CUDA OFF) -endif() - if(${ENABLE_TEXT}) set(ENABLE_TEXT OFF CACHE BOOL "Force ENABLE_TEXT OFF" FORCE) message(STATUS "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_TEXT=OFF") diff --git a/fastdeploy/backends/lite/option.h b/fastdeploy/backends/lite/option.h index 2a4ba7a33..072f23771 100755 --- a/fastdeploy/backends/lite/option.h +++ b/fastdeploy/backends/lite/option.h @@ -14,6 +14,7 @@ #pragma once +#include "fastdeploy/core/fd_type.h" #include #include #include @@ -21,6 +22,16 @@ #include namespace fastdeploy { +/*! Paddle Lite power mode for mobile device. */ +enum LitePowerMode { + LITE_POWER_HIGH = 0, ///< Use Lite Backend with high power mode + LITE_POWER_LOW = 1, ///< Use Lite Backend with low power mode + LITE_POWER_FULL = 2, ///< Use Lite Backend with full power mode + LITE_POWER_NO_BIND = 3, ///< Use Lite Backend with no bind power mode + LITE_POWER_RAND_HIGH = 4, ///< Use Lite Backend with rand high mode + LITE_POWER_RAND_LOW = 5 ///< Use Lite Backend with rand low power mode +}; + struct LiteBackendOption { // cpu num threads int threads = 1; diff --git a/fastdeploy/backends/openvino/option.h b/fastdeploy/backends/openvino/option.h index fa18d5ef9..e78a73496 100644 --- a/fastdeploy/backends/openvino/option.h +++ b/fastdeploy/backends/openvino/option.h @@ -14,6 +14,7 @@ #pragma once +#include "fastdeploy/core/fd_type.h" #include #include #include diff --git a/fastdeploy/backends/ort/option.h b/fastdeploy/backends/ort/option.h index db58dbdd7..78f117b99 100644 --- a/fastdeploy/backends/ort/option.h +++ b/fastdeploy/backends/ort/option.h @@ -14,6 +14,7 @@ #pragma once +#include "fastdeploy/core/fd_type.h" #include #include #include diff --git a/fastdeploy/backends/paddle/option.h b/fastdeploy/backends/paddle/option.h index 24fda8277..3f2d03ca0 100644 --- a/fastdeploy/backends/paddle/option.h +++ b/fastdeploy/backends/paddle/option.h @@ -14,6 +14,7 @@ #pragma once +#include "fastdeploy/core/fd_type.h" #include #include #include diff --git a/fastdeploy/backends/paddle/paddle_backend.cc b/fastdeploy/backends/paddle/paddle_backend.cc index de2ac6223..f5340ed86 100644 --- a/fastdeploy/backends/paddle/paddle_backend.cc +++ b/fastdeploy/backends/paddle/paddle_backend.cc @@ -31,6 +31,8 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) { config_.Exp_DisableTensorRtOPs(option.trt_disabled_ops_); auto precision = paddle_infer::PrecisionType::kFloat32; if (option.trt_option.enable_fp16) { + FDINFO << "Will try to use tensorrt fp16 inference with Paddle Backend." + << std::endl; precision = paddle_infer::PrecisionType::kHalf; } bool use_static = false; diff --git a/fastdeploy/backends/poros/option.h b/fastdeploy/backends/poros/option.h index 4d9a11a07..2b715f7dc 100755 --- a/fastdeploy/backends/poros/option.h +++ b/fastdeploy/backends/poros/option.h @@ -14,6 +14,7 @@ #pragma once +#include "fastdeploy/core/fd_type.h" #include #include #include diff --git a/fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h b/fastdeploy/backends/rknpu2/option.h similarity index 100% rename from fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h rename to fastdeploy/backends/rknpu2/option.h diff --git a/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.cc b/fastdeploy/backends/rknpu2/rknpu2_backend.cc similarity index 99% rename from fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.cc rename to fastdeploy/backends/rknpu2/rknpu2_backend.cc index 94a6d42d3..bcb892fb6 100644 --- a/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.cc +++ b/fastdeploy/backends/rknpu2/rknpu2_backend.cc @@ -11,7 +11,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h" +#include "fastdeploy/backends/rknpu2/rknpu2_backend.h" #include "fastdeploy/utils/perf.h" namespace fastdeploy { RKNPU2Backend::~RKNPU2Backend() { @@ -478,4 +478,4 @@ RKNPU2Backend::FDDataTypeToRknnTensorType(fastdeploy::FDDataType type) { FDERROR << "rknn_tensor_type don't support this type" << std::endl; return RKNN_TENSOR_TYPE_MAX; } -} // namespace fastdeploy \ No newline at end of file +} // namespace fastdeploy diff --git a/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h b/fastdeploy/backends/rknpu2/rknpu2_backend.h similarity index 98% rename from fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h rename to fastdeploy/backends/rknpu2/rknpu2_backend.h index 33704679c..5482c4758 100644 --- a/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h +++ b/fastdeploy/backends/rknpu2/rknpu2_backend.h @@ -14,7 +14,7 @@ #pragma once #include "fastdeploy/backends/backend.h" -#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h" +#include "fastdeploy/backends/rknpu2/option.h" #include "fastdeploy/core/fd_tensor.h" #include "rknn_api.h" // NOLINT #include diff --git a/fastdeploy/backends/sophgo/option.h b/fastdeploy/backends/sophgo/option.h index 320cb7ae2..f4339c32f 100644 --- a/fastdeploy/backends/sophgo/option.h +++ b/fastdeploy/backends/sophgo/option.h @@ -13,6 +13,7 @@ // limitations under the License. #pragma once +#include "fastdeploy/core/fd_type.h" #include #include #include diff --git a/fastdeploy/backends/tensorrt/option.h b/fastdeploy/backends/tensorrt/option.h index 3f7c2a208..94ec010d0 100755 --- a/fastdeploy/backends/tensorrt/option.h +++ b/fastdeploy/backends/tensorrt/option.h @@ -13,6 +13,7 @@ // limitations under the License. #pragma once +#include "fastdeploy/core/fd_type.h" #include #include #include diff --git a/fastdeploy/core/config.h.in b/fastdeploy/core/config.h.in index 21c398af0..c2541cc46 100755 --- a/fastdeploy/core/config.h.in +++ b/fastdeploy/core/config.h.in @@ -57,10 +57,6 @@ #cmakedefine ENABLE_TEXT #endif -#ifndef ENABLE_OPENCV_CUDA -#cmakedefine ENABLE_OPENCV_CUDA -#endif - #ifdef ENABLE_VISION #ifndef ENABLE_VISION_VISUALIZE #define ENABLE_VISION_VISUALIZE diff --git a/fastdeploy/core/fd_tensor.h b/fastdeploy/core/fd_tensor.h index c6e1ed5cb..5584f1b30 100644 --- a/fastdeploy/core/fd_tensor.h +++ b/fastdeploy/core/fd_tensor.h @@ -21,11 +21,11 @@ #include "fastdeploy/core/allocate.h" #include "fastdeploy/core/fd_scalar.h" #include "fastdeploy/core/fd_type.h" +#include "fastdeploy/runtime/enum_variables.h" namespace fastdeploy { struct FASTDEPLOY_DECL FDTensor { - // std::vector data; void* buffer_ = nullptr; std::vector shape = {0}; diff --git a/fastdeploy/core/fd_type.cc b/fastdeploy/core/fd_type.cc old mode 100755 new mode 100644 index 420e03ff7..17bc2cdb6 --- a/fastdeploy/core/fd_type.cc +++ b/fastdeploy/core/fd_type.cc @@ -44,70 +44,6 @@ int FDDataTypeSize(const FDDataType& data_type) { return -1; } -std::string Str(const Device& d) { - std::string out; - switch (d) { - case Device::CPU: - out = "Device::CPU"; - break; - case Device::GPU: - out = "Device::GPU"; - break; - case Device::RKNPU: - out = "Device::RKNPU"; - break; - case Device::SOPHGOTPUD: - out = "Device::SOPHGOTPUD"; - break; - case Device::IPU: - out = "Device::IPU"; - break; - case Device::TIMVX: - out = "Device::TIMVX"; - break; - case Device::ASCEND: - out = "Device::ASCEND"; - break; - case Device::KUNLUNXIN: - out = "Device::KUNLUNXIN"; - break; - default: - out = "Device::UNKOWN"; - } - return out; -} - -std::ostream& operator<<(std::ostream& out,const Device& d){ - switch (d) { - case Device::CPU: - out << "Device::CPU"; - break; - case Device::GPU: - out << "Device::GPU"; - break; - case Device::RKNPU: - out << "Device::RKNPU"; - break; - case Device::SOPHGOTPUD: - out << "Device::SOPHGOTPUD"; - break; - case Device::TIMVX: - out << "Device::TIMVX"; - break; - case Device::KUNLUNXIN: - out << "Device::KUNLUNXIN"; - break; - case Device::ASCEND: - out << "Device::ASCEND"; - break; - default: - out << "Device::UNKOWN"; - } - return out; -} - - - std::string Str(const FDDataType& fdt) { std::string out; switch (fdt) { @@ -144,37 +80,37 @@ std::string Str(const FDDataType& fdt) { return out; } -std::ostream& operator<<(std::ostream& out,const FDDataType& fdt){ +std::ostream& operator<<(std::ostream& out, const FDDataType& fdt) { switch (fdt) { - case FDDataType::BOOL: - out << "FDDataType::BOOL"; - break; - case FDDataType::INT16: - out << "FDDataType::INT16"; - break; - case FDDataType::INT32: - out << "FDDataType::INT32"; - break; - case FDDataType::INT64: - out << "FDDataType::INT64"; - break; - case FDDataType::FP32: - out << "FDDataType::FP32"; - break; - case FDDataType::FP64: - out << "FDDataType::FP64"; - break; - case FDDataType::FP16: - out << "FDDataType::FP16"; - break; - case FDDataType::UINT8: - out << "FDDataType::UINT8"; - break; - case FDDataType::INT8: - out << "FDDataType::INT8"; - break; - default: - out << "FDDataType::UNKNOWN"; + case FDDataType::BOOL: + out << "FDDataType::BOOL"; + break; + case FDDataType::INT16: + out << "FDDataType::INT16"; + break; + case FDDataType::INT32: + out << "FDDataType::INT32"; + break; + case FDDataType::INT64: + out << "FDDataType::INT64"; + break; + case FDDataType::FP32: + out << "FDDataType::FP32"; + break; + case FDDataType::FP64: + out << "FDDataType::FP64"; + break; + case FDDataType::FP16: + out << "FDDataType::FP16"; + break; + case FDDataType::UINT8: + out << "FDDataType::UINT8"; + break; + case FDDataType::INT8: + out << "FDDataType::INT8"; + break; + default: + out << "FDDataType::UNKNOWN"; } return out; } @@ -206,35 +142,4 @@ const FDDataType TypeToDataType::dtype = UINT8; template <> const FDDataType TypeToDataType::dtype = INT8; -std::string Str(const ModelFormat& f) { - if (f == ModelFormat::PADDLE) { - return "ModelFormat::PADDLE"; - } else if (f == ModelFormat::ONNX) { - return "ModelFormat::ONNX"; - } else if (f == ModelFormat::RKNN) { - return "ModelFormat::RKNN"; - } else if (f == ModelFormat::SOPHGO) { - return "ModelFormat::SOPHGO"; - } else if (f == ModelFormat::TORCHSCRIPT) { - return "ModelFormat::TORCHSCRIPT"; - } - return "UNKNOWN-ModelFormat"; -} - -std::ostream& operator<<(std::ostream& out, const ModelFormat& format) { - if (format == ModelFormat::PADDLE) { - out << "ModelFormat::PADDLE"; - } else if (format == ModelFormat::ONNX) { - out << "ModelFormat::ONNX"; - } else if (format == ModelFormat::RKNN) { - out << "ModelFormat::RKNN"; - } else if (format == ModelFormat::SOPHGO) { - out << "ModelFormat::SOPHGO"; - } else if (format == ModelFormat::TORCHSCRIPT) { - out << "ModelFormat::TORCHSCRIPT"; - } - out << "UNKNOWN-ModelFormat"; - return out; -} - } // namespace fastdeploy diff --git a/fastdeploy/core/fd_type.h b/fastdeploy/core/fd_type.h index 5b49f1e86..2782ecf0b 100755 --- a/fastdeploy/core/fd_type.h +++ b/fastdeploy/core/fd_type.h @@ -22,11 +22,6 @@ namespace fastdeploy { -enum FASTDEPLOY_DECL Device {CPU, GPU, RKNPU, IPU, TIMVX, KUNLUNXIN, ASCEND, - SOPHGOTPUD}; - -FASTDEPLOY_DECL std::string Str(const Device& d); - enum FASTDEPLOY_DECL FDDataType { BOOL, INT16, @@ -52,7 +47,6 @@ enum FASTDEPLOY_DECL FDDataType { INT8 }; -FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, const Device& d); FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, const FDDataType& fdt); @@ -66,17 +60,4 @@ struct FASTDEPLOY_DECL TypeToDataType { static const FDDataType dtype; }; -/*! Deep learning model format */ -enum ModelFormat { - AUTOREC, ///< Auto recognize the model format by model file name - PADDLE, ///< Model with paddlepaddle format - ONNX, ///< Model with ONNX format - RKNN, ///< Model with RKNN format - TORCHSCRIPT, ///< Model with TorchScript format - SOPHGO, ///< Model with SOPHGO format -}; - -FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, - const ModelFormat& format); - } // namespace fastdeploy diff --git a/fastdeploy/fastdeploy_model.h b/fastdeploy/fastdeploy_model.h index 9b78c3d3f..698827cc2 100755 --- a/fastdeploy/fastdeploy_model.h +++ b/fastdeploy/fastdeploy_model.h @@ -121,9 +121,7 @@ class FASTDEPLOY_DECL FastDeployModel { std::vector().swap(reused_output_tensors_); } - virtual fastdeploy::Runtime* CloneRuntime() { - return runtime_->Clone(); - } + virtual fastdeploy::Runtime* CloneRuntime() { return runtime_->Clone(); } virtual bool SetRuntime(fastdeploy::Runtime* clone_runtime) { runtime_ = std::unique_ptr(clone_runtime); diff --git a/fastdeploy/pybind/main.h b/fastdeploy/pybind/main.h index de817bb73..b80aeaca4 100755 --- a/fastdeploy/pybind/main.h +++ b/fastdeploy/pybind/main.h @@ -21,7 +21,7 @@ #include -#include "fastdeploy/runtime.h" +#include "fastdeploy/runtime/runtime.h" #ifdef ENABLE_VISION #include "fastdeploy/vision.h" diff --git a/fastdeploy/pybind/rknpu2_config_pybind.cc b/fastdeploy/pybind/rknpu2_config_pybind.cc index 4880b2db6..c7ce47553 100644 --- a/fastdeploy/pybind/rknpu2_config_pybind.cc +++ b/fastdeploy/pybind/rknpu2_config_pybind.cc @@ -11,23 +11,27 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h" +#include "fastdeploy/backends/rknpu2/option.h" #include "fastdeploy/pybind/main.h" namespace fastdeploy { void BindRKNPU2Config(pybind11::module& m) { - pybind11::enum_(m, "CpuName", pybind11::arithmetic(), - "CpuName for inference.") + pybind11::enum_( + m, "CpuName", pybind11::arithmetic(), "CpuName for inference.") .value("RK356X", fastdeploy::rknpu2::CpuName::RK356X) .value("RK3588", fastdeploy::rknpu2::CpuName::RK3588) .value("UNDEFINED", fastdeploy::rknpu2::CpuName::UNDEFINED); - pybind11::enum_(m, "CoreMask", pybind11::arithmetic(), - "CoreMask for inference.") - .value("RKNN_NPU_CORE_AUTO", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO) + pybind11::enum_( + m, "CoreMask", pybind11::arithmetic(), "CoreMask for inference.") + .value("RKNN_NPU_CORE_AUTO", + fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO) .value("RKNN_NPU_CORE_0", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0) .value("RKNN_NPU_CORE_1", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_1) .value("RKNN_NPU_CORE_2", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_2) - .value("RKNN_NPU_CORE_0_1", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0_1) - .value("RKNN_NPU_CORE_0_1_2", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0_1_2) - .value("RKNN_NPU_CORE_UNDEFINED", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_UNDEFINED); + .value("RKNN_NPU_CORE_0_1", + fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0_1) + .value("RKNN_NPU_CORE_0_1_2", + fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0_1_2) + .value("RKNN_NPU_CORE_UNDEFINED", + fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_UNDEFINED); } -} // namespace fastdeploy \ No newline at end of file +} // namespace fastdeploy \ No newline at end of file diff --git a/fastdeploy/runtime.cc b/fastdeploy/runtime.cc deleted file mode 100755 index 6c1949ed3..000000000 --- a/fastdeploy/runtime.cc +++ /dev/null @@ -1,1012 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "fastdeploy/runtime.h" - -#include "fastdeploy/utils/unique_ptr.h" -#include "fastdeploy/utils/utils.h" - -#ifdef ENABLE_ORT_BACKEND -#include "fastdeploy/backends/ort/ort_backend.h" -#endif - -#ifdef ENABLE_TRT_BACKEND -#include "fastdeploy/backends/tensorrt/trt_backend.h" -#endif - -#ifdef ENABLE_PADDLE_BACKEND -#include "fastdeploy/backends/paddle/paddle_backend.h" -#endif - -#ifdef ENABLE_POROS_BACKEND -#include "fastdeploy/backends/poros/poros_backend.h" -#endif - -#ifdef ENABLE_OPENVINO_BACKEND -#include "fastdeploy/backends/openvino/ov_backend.h" -#endif - -#ifdef ENABLE_LITE_BACKEND -#include "fastdeploy/backends/lite/lite_backend.h" -#endif - -#ifdef ENABLE_RKNPU2_BACKEND -#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h" -#endif - -#ifdef ENABLE_SOPHGO_BACKEND -#include "fastdeploy/backends/sophgo/sophgo_backend.h" -#endif - -namespace fastdeploy { - -std::vector GetAvailableBackends() { - std::vector backends; -#ifdef ENABLE_ORT_BACKEND - backends.push_back(Backend::ORT); -#endif -#ifdef ENABLE_TRT_BACKEND - backends.push_back(Backend::TRT); -#endif -#ifdef ENABLE_PADDLE_BACKEND - backends.push_back(Backend::PDINFER); -#endif -#ifdef ENABLE_POROS_BACKEND - backends.push_back(Backend::POROS); -#endif -#ifdef ENABLE_OPENVINO_BACKEND - backends.push_back(Backend::OPENVINO); -#endif -#ifdef ENABLE_LITE_BACKEND - backends.push_back(Backend::LITE); -#endif -#ifdef ENABLE_RKNPU2_BACKEND - backends.push_back(Backend::RKNPU2); -#endif -#ifdef ENABLE_SOPHGO_BACKEND - backends.push_back(Backend::SOPHGOTPU); -#endif - return backends; -} - -bool IsBackendAvailable(const Backend& backend) { - std::vector backends = GetAvailableBackends(); - for (size_t i = 0; i < backends.size(); ++i) { - if (backend == backends[i]) { - return true; - } - } - return false; -} - -std::string Str(const Backend& b) { - if (b == Backend::ORT) { - return "Backend::ORT"; - } else if (b == Backend::TRT) { - return "Backend::TRT"; - } else if (b == Backend::PDINFER) { - return "Backend::PDINFER"; - } else if (b == Backend::POROS) { - return "Backend::POROS"; - } else if (b == Backend::RKNPU2) { - return "Backend::RKNPU2"; - } else if (b == Backend::SOPHGOTPU) { - return "Backend::SOPHGOTPU"; - } else if (b == Backend::OPENVINO) { - return "Backend::OPENVINO"; - } else if (b == Backend::LITE) { - return "Backend::PDLITE"; - } - return "UNKNOWN-Backend"; -} - -std::ostream& operator<<(std::ostream& out, const Backend& backend) { - if (backend == Backend::ORT) { - out << "Backend::ORT"; - } else if (backend == Backend::TRT) { - out << "Backend::TRT"; - } else if (backend == Backend::PDINFER) { - out << "Backend::PDINFER"; - } else if (backend == Backend::OPENVINO) { - out << "Backend::OPENVINO"; - } else if (backend == Backend::RKNPU2) { - out << "Backend::RKNPU2"; - } else if (backend == Backend::SOPHGOTPU) { - out << "Backend::SOPHGOTPU"; - } else if (backend == Backend::POROS) { - out << "Backend::POROS"; - } else if (backend == Backend::LITE) { - out << "Backend::PDLITE"; - } else { - out << "UNKNOWN-Backend"; - } - return out; -} - -bool CheckModelFormat(const std::string& model_file, - const ModelFormat& model_format) { - if (model_format == ModelFormat::PADDLE) { - if (model_file.size() < 8 || - model_file.substr(model_file.size() - 8, 8) != ".pdmodel") { - FDERROR << "With model format of ModelFormat::PADDLE, the model file " - "should ends with `.pdmodel`, but now it's " - << model_file << std::endl; - return false; - } - } else if (model_format == ModelFormat::ONNX) { - if (model_file.size() < 5 || - model_file.substr(model_file.size() - 5, 5) != ".onnx") { - FDERROR << "With model format of ModelFormat::ONNX, the model file " - "should ends with `.onnx`, but now it's " - << model_file << std::endl; - return false; - } - } else if (model_format == ModelFormat::RKNN) { - if (model_file.size() < 5 || - model_file.substr(model_file.size() - 5, 5) != ".rknn") { - FDERROR << "With model format of ModelFormat::RKNN, the model file " - "should ends with `.rknn`, but now it's " - << model_file << std::endl; - return false; - } - } else if (model_format == ModelFormat::TORCHSCRIPT) { - if (model_file.size() < 3 || - model_file.substr(model_file.size() - 3, 3) != ".pt") { - FDERROR - << "With model format of ModelFormat::TORCHSCRIPT, the model file " - "should ends with `.pt`, but now it's " - << model_file << std::endl; - return false; - } - } else if (model_format == ModelFormat::SOPHGO) { - if (model_file.size() < 7 || - model_file.substr(model_file.size() -7, 7) != ".bmodel") { - FDERROR - << "With model format of ModelFormat::SOPHGO, the model file " - "should ends with `.bmodel`, but now it's " - << model_file << std::endl; - return false; - } - } else { - FDERROR - << "Only support model format with frontend ModelFormat::PADDLE / " - "ModelFormat::ONNX / ModelFormat::RKNN / ModelFormat::TORCHSCRIPT." - << std::endl; - return false; - } - return true; -} - -ModelFormat GuessModelFormat(const std::string& model_file) { - if (model_file.size() > 8 && - model_file.substr(model_file.size() - 8, 8) == ".pdmodel") { - FDINFO << "Model Format: PaddlePaddle." << std::endl; - return ModelFormat::PADDLE; - } else if (model_file.size() > 5 && - model_file.substr(model_file.size() - 5, 5) == ".onnx") { - FDINFO << "Model Format: ONNX." << std::endl; - return ModelFormat::ONNX; - } else if (model_file.size() > 3 && - model_file.substr(model_file.size() - 3, 3) == ".pt") { - FDINFO << "Model Format: Torchscript." << std::endl; - return ModelFormat::TORCHSCRIPT; - } else if (model_file.size() > 5 && - model_file.substr(model_file.size() - 5, 5) == ".rknn") { - FDINFO << "Model Format: RKNN." << std::endl; - return ModelFormat::RKNN; - } else if (model_file.size() > 7 && - model_file.substr(model_file.size() - 7, 7) == ".bmodel") { - FDINFO << "Model Format: SOPHGO." << std::endl; - return ModelFormat::SOPHGO; - } - - FDERROR << "Cannot guess which model format you are using, please set " - "RuntimeOption::model_format manually." - << std::endl; - return ModelFormat::PADDLE; -} - -void RuntimeOption::SetModelPath(const std::string& model_path, - const std::string& params_path, - const ModelFormat& format) { - if (format == ModelFormat::PADDLE) { - model_file = model_path; - params_file = params_path; - model_format = ModelFormat::PADDLE; - } else if (format == ModelFormat::ONNX) { - model_file = model_path; - model_format = ModelFormat::ONNX; - } else if (format == ModelFormat::TORCHSCRIPT) { - model_file = model_path; - model_format = ModelFormat::TORCHSCRIPT; - } else { - FDASSERT(false, - "The model format only can be " - "ModelFormat::PADDLE/ModelFormat::ONNX/ModelFormat::TORCHSCRIPT."); - } -} - -void RuntimeOption::SetModelBuffer(const char * model_buffer, - size_t model_buffer_size, - const char * params_buffer, - size_t params_buffer_size, - const ModelFormat& format) { - model_buffer_size_ = model_buffer_size; - params_buffer_size_ = params_buffer_size; - model_from_memory_ = true; - if (format == ModelFormat::PADDLE) { - model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size); - params_buffer_ = std::string(params_buffer, params_buffer + params_buffer_size); - model_format = ModelFormat::PADDLE; - } else if (format == ModelFormat::ONNX) { - model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size); - model_format = ModelFormat::ONNX; - } else if (format == ModelFormat::TORCHSCRIPT) { - model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size); - model_format = ModelFormat::TORCHSCRIPT; - } else { - FDASSERT(false, - "The model format only can be " - "ModelFormat::PADDLE/ModelFormat::ONNX/ModelFormat::TORCHSCRIPT."); - } -} - -void RuntimeOption::UseGpu(int gpu_id) { -#ifdef WITH_GPU - device = Device::GPU; - device_id = gpu_id; -#else - FDWARNING << "The FastDeploy didn't compile with GPU, will force to use CPU." - << std::endl; - device = Device::CPU; -#endif -} - -void RuntimeOption::UseCpu() { device = Device::CPU; } - -void RuntimeOption::UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name, - fastdeploy::rknpu2::CoreMask rknpu2_core) { - rknpu2_cpu_name_ = rknpu2_name; - rknpu2_core_mask_ = rknpu2_core; - device = Device::RKNPU; -} - -void RuntimeOption::UseTimVX() { - enable_timvx = true; - device = Device::TIMVX; -} - -void RuntimeOption::UseKunlunXin(int kunlunxin_id, - int l3_workspace_size, - bool locked, - bool autotune, - const std::string &autotune_file, - const std::string &precision, - bool adaptive_seqlen, - bool enable_multi_stream) { - enable_kunlunxin = true; - device_id = kunlunxin_id; - kunlunxin_l3_workspace_size = l3_workspace_size; - kunlunxin_locked=locked; - kunlunxin_autotune=autotune; - kunlunxin_autotune_file=autotune_file; - kunlunxin_precision = precision; - kunlunxin_adaptive_seqlen=adaptive_seqlen; - kunlunxin_enable_multi_stream=enable_multi_stream; - device = Device::KUNLUNXIN; -} - -void RuntimeOption::UseAscend(){ - enable_ascend = true; - device = Device::ASCEND; -} - -void RuntimeOption::UseSophgo() { - device = Device::SOPHGOTPUD; - UseSophgoBackend(); -} - -void RuntimeOption::SetExternalStream(void* external_stream) { - external_stream_ = external_stream; -} - -void RuntimeOption::SetCpuThreadNum(int thread_num) { - FDASSERT(thread_num > 0, "The thread_num must be greater than 0."); - cpu_thread_num = thread_num; -} - -void RuntimeOption::SetOrtGraphOptLevel(int level) { - std::vector supported_level{-1, 0, 1, 2}; - auto valid_level = std::find(supported_level.begin(), supported_level.end(), - level) != supported_level.end(); - FDASSERT(valid_level, "The level must be -1, 0, 1, 2."); - ort_graph_opt_level = level; -} - -// use paddle inference backend -void RuntimeOption::UsePaddleBackend() { -#ifdef ENABLE_PADDLE_BACKEND - backend = Backend::PDINFER; -#else - FDASSERT(false, "The FastDeploy didn't compile with Paddle Inference."); -#endif -} - -// use onnxruntime backend -void RuntimeOption::UseOrtBackend() { -#ifdef ENABLE_ORT_BACKEND - backend = Backend::ORT; -#else - FDASSERT(false, "The FastDeploy didn't compile with OrtBackend."); -#endif -} - -// use sophgoruntime backend -void RuntimeOption::UseSophgoBackend() { -#ifdef ENABLE_SOPHGO_BACKEND - backend = Backend::SOPHGOTPU; -#else - FDASSERT(false, "The FastDeploy didn't compile with SophgoBackend."); -#endif -} - -// use poros backend -void RuntimeOption::UsePorosBackend() { -#ifdef ENABLE_POROS_BACKEND - backend = Backend::POROS; -#else - FDASSERT(false, "The FastDeploy didn't compile with PorosBackend."); -#endif -} - -void RuntimeOption::UseTrtBackend() { -#ifdef ENABLE_TRT_BACKEND - backend = Backend::TRT; -#else - FDASSERT(false, "The FastDeploy didn't compile with TrtBackend."); -#endif -} - -void RuntimeOption::UseOpenVINOBackend() { -#ifdef ENABLE_OPENVINO_BACKEND - backend = Backend::OPENVINO; -#else - FDASSERT(false, "The FastDeploy didn't compile with OpenVINO."); -#endif -} - -void RuntimeOption::UseLiteBackend() { -#ifdef ENABLE_LITE_BACKEND - backend = Backend::LITE; -#else - FDASSERT(false, "The FastDeploy didn't compile with Paddle Lite."); -#endif -} - -void RuntimeOption::SetPaddleMKLDNN(bool pd_mkldnn) { - pd_enable_mkldnn = pd_mkldnn; -} - -void RuntimeOption::DeletePaddleBackendPass(const std::string& pass_name) { - pd_delete_pass_names.push_back(pass_name); -} -void RuntimeOption::EnablePaddleLogInfo() { pd_enable_log_info = true; } - -void RuntimeOption::DisablePaddleLogInfo() { pd_enable_log_info = false; } - -void RuntimeOption::EnablePaddleToTrt() { - FDASSERT(backend == Backend::TRT, - "Should call UseTrtBackend() before call EnablePaddleToTrt()."); -#ifdef ENABLE_PADDLE_BACKEND - FDINFO << "While using TrtBackend with EnablePaddleToTrt, FastDeploy will " - "change to use Paddle Inference Backend." - << std::endl; - backend = Backend::PDINFER; - pd_enable_trt = true; -#else - FDASSERT(false, "While using TrtBackend with EnablePaddleToTrt, require the " - "FastDeploy is compiled with Paddle Inference Backend, " - "please rebuild your FastDeploy."); -#endif -} - -void RuntimeOption::SetPaddleMKLDNNCacheSize(int size) { - FDASSERT(size > 0, "Parameter size must greater than 0."); - pd_mkldnn_cache_size = size; -} - -void RuntimeOption::SetOpenVINODevice(const std::string& name) { - openvino_device = name; -} - -void RuntimeOption::EnableLiteFP16() { lite_enable_fp16 = true; } - -void RuntimeOption::DisableLiteFP16() { lite_enable_fp16 = false; } -void RuntimeOption::EnableLiteInt8() { lite_enable_int8 = true; } - -void RuntimeOption::DisableLiteInt8() { lite_enable_int8 = false; } -void RuntimeOption::SetLitePowerMode(LitePowerMode mode) { - lite_power_mode = mode; -} - -void RuntimeOption::SetLiteOptimizedModelDir( - const std::string& optimized_model_dir) { - lite_optimized_model_dir = optimized_model_dir; -} - -void RuntimeOption::SetLiteSubgraphPartitionPath( - const std::string& nnadapter_subgraph_partition_config_path) { - lite_nnadapter_subgraph_partition_config_path = - nnadapter_subgraph_partition_config_path; -} - -void RuntimeOption::SetLiteSubgraphPartitionConfigBuffer( - const std::string& nnadapter_subgraph_partition_config_buffer){ - lite_nnadapter_subgraph_partition_config_buffer = nnadapter_subgraph_partition_config_buffer; -} - -void RuntimeOption::SetLiteDeviceNames(const std::vector& nnadapter_device_names){ - lite_nnadapter_device_names = nnadapter_device_names; -} - -void RuntimeOption::SetLiteContextProperties(const std::string& nnadapter_context_properties){ - lite_nnadapter_context_properties = nnadapter_context_properties; -} - -void RuntimeOption::SetLiteModelCacheDir(const std::string& nnadapter_model_cache_dir){ - lite_nnadapter_model_cache_dir = nnadapter_model_cache_dir; -} - - -void RuntimeOption::SetLiteDynamicShapeInfo( - const std::map>>& - nnadapter_dynamic_shape_info){ - lite_nnadapter_dynamic_shape_info = nnadapter_dynamic_shape_info; -} - -void RuntimeOption::SetLiteMixedPrecisionQuantizationConfigPath( - const std::string& nnadapter_mixed_precision_quantization_config_path){ - lite_nnadapter_mixed_precision_quantization_config_path = nnadapter_mixed_precision_quantization_config_path; -} - - -void RuntimeOption::SetTrtInputShape(const std::string& input_name, - const std::vector& min_shape, - const std::vector& opt_shape, - const std::vector& max_shape) { - trt_min_shape[input_name].clear(); - trt_max_shape[input_name].clear(); - trt_opt_shape[input_name].clear(); - trt_min_shape[input_name].assign(min_shape.begin(), min_shape.end()); - if (opt_shape.size() == 0) { - trt_opt_shape[input_name].assign(min_shape.begin(), min_shape.end()); - } else { - trt_opt_shape[input_name].assign(opt_shape.begin(), opt_shape.end()); - } - if (max_shape.size() == 0) { - trt_max_shape[input_name].assign(min_shape.begin(), min_shape.end()); - } else { - trt_max_shape[input_name].assign(max_shape.begin(), max_shape.end()); - } -} - -void RuntimeOption::SetTrtMaxWorkspaceSize(size_t max_workspace_size) { - trt_max_workspace_size = max_workspace_size; -} -void RuntimeOption::SetTrtMaxBatchSize(size_t max_batch_size) { - trt_max_batch_size = max_batch_size; -} - -void RuntimeOption::EnableTrtFP16() { trt_enable_fp16 = true; } - -void RuntimeOption::DisableTrtFP16() { trt_enable_fp16 = false; } - -void RuntimeOption::EnablePinnedMemory() { enable_pinned_memory = true; } - -void RuntimeOption::DisablePinnedMemory() { enable_pinned_memory = false; } - -void RuntimeOption::SetTrtCacheFile(const std::string& cache_file_path) { - trt_serialize_file = cache_file_path; -} - -void RuntimeOption::SetOpenVINOStreams(int num_streams) { - ov_num_streams = num_streams; -} - -bool Runtime::Compile(std::vector>& prewarm_tensors, - const RuntimeOption& _option) { -#ifdef ENABLE_POROS_BACKEND - option = _option; - auto poros_option = PorosBackendOption(); - poros_option.use_gpu = (option.device == Device::GPU) ? true : false; - poros_option.gpu_id = option.device_id; - poros_option.long_to_int = option.long_to_int; - poros_option.use_nvidia_tf32 = option.use_nvidia_tf32; - poros_option.unconst_ops_thres = option.unconst_ops_thres; - poros_option.poros_file = option.poros_file; - poros_option.is_dynamic = option.is_dynamic; - poros_option.enable_fp16 = option.trt_enable_fp16; - poros_option.max_batch_size = option.trt_max_batch_size; - poros_option.max_workspace_size = option.trt_max_workspace_size; - FDASSERT( - option.model_format == ModelFormat::TORCHSCRIPT, - "PorosBackend only support model format of ModelFormat::TORCHSCRIPT."); - backend_ = utils::make_unique(); - auto casted_backend = dynamic_cast(backend_.get()); - FDASSERT( - casted_backend->Compile(option.model_file, prewarm_tensors, poros_option), - "Load model from Torchscript failed while initliazing PorosBackend."); -#else - FDASSERT(false, "PorosBackend is not available, please compiled with " - "ENABLE_POROS_BACKEND=ON."); -#endif - return true; -} - -void RuntimeOption::EnablePaddleTrtCollectShape() { pd_collect_shape = true; } - -void RuntimeOption::DisablePaddleTrtCollectShape() { pd_collect_shape = false; } - -void RuntimeOption::DisablePaddleTrtOPs(const std::vector& ops) { - trt_disabled_ops_.insert(trt_disabled_ops_.end(), ops.begin(), ops.end()); -} - -void RuntimeOption::UseIpu(int device_num, int micro_batch_size, - bool enable_pipelining, int batches_per_step) { -#ifdef WITH_IPU - device = Device::IPU; - ipu_device_num = device_num; - ipu_micro_batch_size = micro_batch_size; - ipu_enable_pipelining = enable_pipelining; - ipu_batches_per_step = batches_per_step; -#else - FDWARNING << "The FastDeploy didn't compile with IPU, will force to use CPU." - << std::endl; - device = Device::CPU; -#endif -} - -void RuntimeOption::SetIpuConfig(bool enable_fp16, int replica_num, - float available_memory_proportion, - bool enable_half_partial) { - ipu_enable_fp16 = enable_fp16; - ipu_replica_num = replica_num; - ipu_available_memory_proportion = available_memory_proportion; - ipu_enable_half_partial = enable_half_partial; -} - -bool Runtime::Init(const RuntimeOption& _option) { - option = _option; - if (option.model_format == ModelFormat::AUTOREC) { - option.model_format = GuessModelFormat(_option.model_file); - } - if (option.backend == Backend::UNKNOWN) { - if (IsBackendAvailable(Backend::ORT)) { - option.backend = Backend::ORT; - } else if (IsBackendAvailable(Backend::PDINFER)) { - option.backend = Backend::PDINFER; - } else if (IsBackendAvailable(Backend::POROS)) { - option.backend = Backend::POROS; - } else if (IsBackendAvailable(Backend::OPENVINO)) { - option.backend = Backend::OPENVINO; - } else if (IsBackendAvailable(Backend::RKNPU2)) { - option.backend = Backend::RKNPU2; - } else if (IsBackendAvailable(Backend::SOPHGOTPU)) { - option.backend = Backend::SOPHGOTPU; - } else { - FDERROR << "Please define backend in RuntimeOption, current it's " - "Backend::UNKNOWN." - << std::endl; - return false; - } - } - - if (option.backend == Backend::ORT) { - FDASSERT(option.device == Device::CPU || option.device == Device::GPU, - "Backend::ORT only supports Device::CPU/Device::GPU."); - CreateOrtBackend(); - FDINFO << "Runtime initialized with Backend::ORT in " << Str(option.device) - << "." << std::endl; - } else if (option.backend == Backend::TRT) { - FDASSERT(option.device == Device::GPU, - "Backend::TRT only supports Device::GPU."); - CreateTrtBackend(); - FDINFO << "Runtime initialized with Backend::TRT in " << Str(option.device) - << "." << std::endl; - } else if (option.backend == Backend::PDINFER) { - FDASSERT( - option.device == Device::CPU || option.device == Device::GPU || - option.device == Device::IPU, - "Backend::PDINFER only supports Device::CPU/Device::GPU/Device::IPU."); - FDASSERT( - option.model_format == ModelFormat::PADDLE, - "Backend::PDINFER only supports model format of ModelFormat::PADDLE."); - CreatePaddleBackend(); - FDINFO << "Runtime initialized with Backend::PDINFER in " - << Str(option.device) << "." << std::endl; - } else if (option.backend == Backend::POROS) { - FDASSERT(option.device == Device::CPU || option.device == Device::GPU, - "Backend::POROS only supports Device::CPU/Device::GPU."); - FDASSERT(option.model_format == ModelFormat::TORCHSCRIPT, - "Backend::POROS only supports model format of " - "ModelFormat::TORCHSCRIPT."); - FDINFO << "Runtime initialized with Backend::POROS in " - << Str(option.device) << "." << std::endl; - return true; - } else if (option.backend == Backend::OPENVINO) { - FDASSERT(option.device == Device::CPU, - "Backend::OPENVINO only supports Device::CPU"); - CreateOpenVINOBackend(); - FDINFO << "Runtime initialized with Backend::OPENVINO in " - << Str(option.device) << "." << std::endl; - } else if (option.backend == Backend::LITE) { - FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX || option.device == Device::KUNLUNXIN || option.device == Device::ASCEND, - "Backend::LITE only supports Device::CPU/Device::TIMVX/Device::KUNLUNXIN."); - CreateLiteBackend(); - FDINFO << "Runtime initialized with Backend::LITE in " << Str(option.device) - << "." << std::endl; - } else if (option.backend == Backend::RKNPU2) { - FDASSERT(option.device == Device::RKNPU, - "Backend::RKNPU2 only supports Device::RKNPU2"); - CreateRKNPU2Backend(); - - FDINFO << "Runtime initialized with Backend::RKNPU2 in " - << Str(option.device) << "." << std::endl; - } else if (option.backend == Backend::SOPHGOTPU) { - FDASSERT(option.device == Device::SOPHGOTPUD, - "Backend::SOPHGO only supports Device::SOPHGO"); - CreateSophgoNPUBackend(); - - FDINFO << "Runtime initialized with Backend::SOPHGO in " - << Str(option.device) << "." << std::endl; - } - else { - FDERROR << "Runtime only support " - "Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as " - "backend now." - << std::endl; - return false; - } - return true; -} - -TensorInfo Runtime::GetInputInfo(int index) { - return backend_->GetInputInfo(index); -} - -TensorInfo Runtime::GetOutputInfo(int index) { - return backend_->GetOutputInfo(index); -} - -std::vector Runtime::GetInputInfos() { - return backend_->GetInputInfos(); -} - -std::vector Runtime::GetOutputInfos() { - return backend_->GetOutputInfos(); -} - -bool Runtime::Infer(std::vector& input_tensors, - std::vector* output_tensors) { - for (auto& tensor : input_tensors) { - FDASSERT(tensor.device_id < 0 || tensor.device_id == option.device_id, - "Device id of input tensor(%d) and runtime(%d) are not same.", - tensor.device_id, option.device_id); - } - return backend_->Infer(input_tensors, output_tensors); -} - -bool Runtime::Infer() { - bool result = backend_->Infer(input_tensors_, &output_tensors_, false); - for (auto& tensor : output_tensors_) { - tensor.device_id = option.device_id; - } - return result; -} - -void Runtime::BindInputTensor(const std::string& name, FDTensor& input) { - bool is_exist = false; - for (auto& t : input_tensors_) { - if (t.name == name) { - is_exist = true; - t.SetExternalData(input.shape, input.dtype, input.MutableData(), - input.device, input.device_id); - break; - } - } - if (!is_exist) { - FDTensor new_tensor(name); - new_tensor.SetExternalData(input.shape, input.dtype, input.MutableData(), - input.device, input.device_id); - input_tensors_.emplace_back(std::move(new_tensor)); - } -} - -FDTensor* Runtime::GetOutputTensor(const std::string& name) { - for (auto& t : output_tensors_) { - if (t.name == name) { - return &t; - } - } - FDWARNING << "The output name [" << name << "] don't exist." << std::endl; - return nullptr; -} - -void Runtime::CreatePaddleBackend() { -#ifdef ENABLE_PADDLE_BACKEND - auto pd_option = PaddleBackendOption(); - pd_option.model_file = option.model_file; - pd_option.params_file = option.params_file; - pd_option.enable_mkldnn = option.pd_enable_mkldnn; - pd_option.enable_log_info = option.pd_enable_log_info; - pd_option.mkldnn_cache_size = option.pd_mkldnn_cache_size; - pd_option.use_gpu = (option.device == Device::GPU) ? true : false; - pd_option.use_ipu = (option.device == Device::IPU) ? true : false; - pd_option.gpu_id = option.device_id; - pd_option.delete_pass_names = option.pd_delete_pass_names; - pd_option.cpu_thread_num = option.cpu_thread_num; - pd_option.enable_pinned_memory = option.enable_pinned_memory; - pd_option.external_stream_ = option.external_stream_; - pd_option.model_from_memory_ = option.model_from_memory_; - if (pd_option.model_from_memory_) { - pd_option.model_buffer_ = option.model_buffer_; - pd_option.params_buffer_ = option.params_buffer_; - pd_option.model_buffer_size_ = option.model_buffer_size_; - pd_option.params_buffer_size_ = option.params_buffer_size_; - } -#ifdef ENABLE_TRT_BACKEND - if (pd_option.use_gpu && option.pd_enable_trt) { - pd_option.enable_trt = true; - pd_option.collect_shape = option.pd_collect_shape; - auto trt_option = TrtBackendOption(); - trt_option.gpu_id = option.device_id; - trt_option.enable_fp16 = option.trt_enable_fp16; - trt_option.max_batch_size = option.trt_max_batch_size; - trt_option.max_workspace_size = option.trt_max_workspace_size; - trt_option.max_shape = option.trt_max_shape; - trt_option.min_shape = option.trt_min_shape; - trt_option.opt_shape = option.trt_opt_shape; - trt_option.serialize_file = option.trt_serialize_file; - trt_option.enable_pinned_memory = option.enable_pinned_memory; - pd_option.trt_option = trt_option; - pd_option.trt_disabled_ops_ = option.trt_disabled_ops_; - } -#endif -#ifdef WITH_IPU - if (pd_option.use_ipu) { - auto ipu_option = IpuOption(); - ipu_option.ipu_device_num = option.ipu_device_num; - ipu_option.ipu_micro_batch_size = option.ipu_micro_batch_size; - ipu_option.ipu_enable_pipelining = option.ipu_enable_pipelining; - ipu_option.ipu_batches_per_step = option.ipu_batches_per_step; - ipu_option.ipu_enable_fp16 = option.ipu_enable_fp16; - ipu_option.ipu_replica_num = option.ipu_replica_num; - ipu_option.ipu_available_memory_proportion = - option.ipu_available_memory_proportion; - ipu_option.ipu_enable_half_partial = option.ipu_enable_half_partial; - pd_option.ipu_option = ipu_option; - } -#endif - FDASSERT(option.model_format == ModelFormat::PADDLE, - "PaddleBackend only support model format of ModelFormat::PADDLE."); - backend_ = utils::make_unique(); - auto casted_backend = dynamic_cast(backend_.get()); - if (pd_option.model_from_memory_) { - FDASSERT(casted_backend->InitFromPaddle(option.model_buffer_, option.params_buffer_, - pd_option), - "Load model from Paddle failed while initliazing PaddleBackend."); - } else { - FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file, - pd_option), - "Load model from Paddle failed while initliazing PaddleBackend."); - } -#else - FDASSERT(false, "PaddleBackend is not available, please compiled with " - "ENABLE_PADDLE_BACKEND=ON."); -#endif -} - -void Runtime::CreateOpenVINOBackend() { -#ifdef ENABLE_OPENVINO_BACKEND - auto ov_option = OpenVINOBackendOption(); - ov_option.cpu_thread_num = option.cpu_thread_num; - ov_option.device = option.openvino_device; - ov_option.shape_infos = option.ov_shape_infos; - ov_option.num_streams = option.ov_num_streams; - for (const auto& op : option.ov_cpu_operators) { - ov_option.cpu_operators.insert(op); - } - FDASSERT(option.model_format == ModelFormat::PADDLE || - option.model_format == ModelFormat::ONNX, - "OpenVINOBackend only support model format of ModelFormat::PADDLE / " - "ModelFormat::ONNX."); - backend_ = utils::make_unique(); - auto casted_backend = dynamic_cast(backend_.get()); - - if (option.model_format == ModelFormat::ONNX) { - FDASSERT(casted_backend->InitFromOnnx(option.model_file, ov_option), - "Load model from ONNX failed while initliazing OrtBackend."); - } else { - FDASSERT(casted_backend->InitFromPaddle(option.model_file, - option.params_file, ov_option), - "Load model from Paddle failed while initliazing OrtBackend."); - } -#else - FDASSERT(false, "OpenVINOBackend is not available, please compiled with " - "ENABLE_OPENVINO_BACKEND=ON."); -#endif -} - -void Runtime::CreateOrtBackend() { -#ifdef ENABLE_ORT_BACKEND - auto ort_option = OrtBackendOption(); - ort_option.graph_optimization_level = option.ort_graph_opt_level; - ort_option.intra_op_num_threads = option.cpu_thread_num; - ort_option.inter_op_num_threads = option.ort_inter_op_num_threads; - ort_option.execution_mode = option.ort_execution_mode; - ort_option.use_gpu = (option.device == Device::GPU) ? true : false; - ort_option.gpu_id = option.device_id; - ort_option.external_stream_ = option.external_stream_; - - FDASSERT(option.model_format == ModelFormat::PADDLE || - option.model_format == ModelFormat::ONNX, - "OrtBackend only support model format of ModelFormat::PADDLE / " - "ModelFormat::ONNX."); - backend_ = utils::make_unique(); - auto casted_backend = dynamic_cast(backend_.get()); - if (option.model_format == ModelFormat::ONNX) { - FDASSERT(casted_backend->InitFromOnnx(option.model_file, ort_option), - "Load model from ONNX failed while initliazing OrtBackend."); - } else { - FDASSERT(casted_backend->InitFromPaddle(option.model_file, - option.params_file, ort_option), - "Load model from Paddle failed while initliazing OrtBackend."); - } -#else - FDASSERT(false, "OrtBackend is not available, please compiled with " - "ENABLE_ORT_BACKEND=ON."); -#endif -} - -void Runtime::CreateTrtBackend() { -#ifdef ENABLE_TRT_BACKEND - auto trt_option = TrtBackendOption(); - trt_option.model_file = option.model_file; - trt_option.params_file = option.params_file; - trt_option.model_format = option.model_format; - trt_option.gpu_id = option.device_id; - trt_option.enable_fp16 = option.trt_enable_fp16; - trt_option.enable_int8 = option.trt_enable_int8; - trt_option.max_batch_size = option.trt_max_batch_size; - trt_option.max_workspace_size = option.trt_max_workspace_size; - trt_option.max_shape = option.trt_max_shape; - trt_option.min_shape = option.trt_min_shape; - trt_option.opt_shape = option.trt_opt_shape; - trt_option.serialize_file = option.trt_serialize_file; - trt_option.enable_pinned_memory = option.enable_pinned_memory; - trt_option.external_stream_ = option.external_stream_; - - FDASSERT(option.model_format == ModelFormat::PADDLE || - option.model_format == ModelFormat::ONNX, - "TrtBackend only support model format of ModelFormat::PADDLE / " - "ModelFormat::ONNX."); - backend_ = utils::make_unique(); - auto casted_backend = dynamic_cast(backend_.get()); - if (option.model_format == ModelFormat::ONNX) { - FDASSERT(casted_backend->InitFromOnnx(option.model_file, trt_option), - "Load model from ONNX failed while initliazing TrtBackend."); - } else { - FDASSERT(casted_backend->InitFromPaddle(option.model_file, - option.params_file, trt_option), - "Load model from Paddle failed while initliazing TrtBackend."); - } -#else - FDASSERT(false, "TrtBackend is not available, please compiled with " - "ENABLE_TRT_BACKEND=ON."); -#endif -} - -void Runtime::CreateLiteBackend() { -#ifdef ENABLE_LITE_BACKEND - auto lite_option = LiteBackendOption(); - lite_option.threads = option.cpu_thread_num; - lite_option.enable_int8 = option.lite_enable_int8; - lite_option.enable_fp16 = option.lite_enable_fp16; - lite_option.power_mode = static_cast(option.lite_power_mode); - lite_option.optimized_model_dir = option.lite_optimized_model_dir; - lite_option.nnadapter_subgraph_partition_config_path = option.lite_nnadapter_subgraph_partition_config_path; - lite_option.nnadapter_subgraph_partition_config_buffer = option.lite_nnadapter_subgraph_partition_config_buffer; - lite_option.nnadapter_device_names = option.lite_nnadapter_device_names; - lite_option.nnadapter_context_properties = option.lite_nnadapter_context_properties; - lite_option.nnadapter_model_cache_dir = option.lite_nnadapter_model_cache_dir; - lite_option.nnadapter_dynamic_shape_info = option.lite_nnadapter_dynamic_shape_info; - lite_option.nnadapter_mixed_precision_quantization_config_path = option.lite_nnadapter_mixed_precision_quantization_config_path; - lite_option.enable_timvx = option.enable_timvx; - lite_option.enable_ascend = option.enable_ascend; - lite_option.enable_kunlunxin = option.enable_kunlunxin; - lite_option.device_id = option.device_id; - lite_option.kunlunxin_l3_workspace_size = option.kunlunxin_l3_workspace_size; - lite_option.kunlunxin_locked = option.kunlunxin_locked; - lite_option.kunlunxin_autotune = option.kunlunxin_autotune; - lite_option.kunlunxin_autotune_file = option.kunlunxin_autotune_file; - lite_option.kunlunxin_precision = option.kunlunxin_precision; - lite_option.kunlunxin_adaptive_seqlen = option.kunlunxin_adaptive_seqlen; - lite_option.kunlunxin_enable_multi_stream = option.kunlunxin_enable_multi_stream; - - FDASSERT(option.model_format == ModelFormat::PADDLE, - "LiteBackend only support model format of ModelFormat::PADDLE"); - backend_ = utils::make_unique(); - auto casted_backend = dynamic_cast(backend_.get()); - FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file, - lite_option), - "Load model from nb file failed while initializing LiteBackend."); -#else - FDASSERT(false, "LiteBackend is not available, please compiled with " - "ENABLE_LITE_BACKEND=ON."); -#endif -} - -void Runtime::CreateRKNPU2Backend() { -#ifdef ENABLE_RKNPU2_BACKEND - auto rknpu2_option = RKNPU2BackendOption(); - rknpu2_option.cpu_name = option.rknpu2_cpu_name_; - rknpu2_option.core_mask = option.rknpu2_core_mask_; - FDASSERT(option.model_format == ModelFormat::RKNN, - "RKNPU2Backend only support model format of ModelFormat::RKNN"); - backend_ = utils::make_unique(); - auto casted_backend = dynamic_cast(backend_.get()); - FDASSERT(casted_backend->InitFromRKNN(option.model_file, rknpu2_option), - "Load model from nb file failed while initializing LiteBackend."); -#else - FDASSERT(false, "RKNPU2Backend is not available, please compiled with " - "ENABLE_RKNPU2_BACKEND=ON."); -#endif -} - -void Runtime::CreateSophgoNPUBackend() { -#ifdef ENABLE_SOPHGO_BACKEND - auto sophgo_option = SophgoBackendOption(); - FDASSERT(option.model_format == ModelFormat::SOPHGO, - "SophgoBackend only support model format of ModelFormat::SOPHGO"); - backend_ = utils::make_unique(); - auto casted_backend = dynamic_cast(backend_.get()); - FDASSERT(casted_backend->InitFromSophgo(option.model_file, sophgo_option), - "Load model from nb file failed while initializing LiteBackend."); -#else - FDASSERT(false, "SophgoBackend is not available, please compiled with " - "ENABLE_SOPHGO_BACKEND=ON."); -#endif -} - -Runtime* Runtime::Clone(void* stream, int device_id) { - Runtime* runtime = new Runtime(); - if (option.backend != Backend::OPENVINO && - option.backend != Backend::PDINFER && option.backend != Backend::TRT) { - runtime->Init(option); - FDWARNING << "Only OpenVINO/Paddle Inference/TensorRT support \ - clone engine to reduce CPU/GPU memory usage now. For " - << option.backend - << ", FastDeploy will create a new engine which \ - will not share memory with the current runtime." - << std::endl; - return runtime; - } - FDINFO << "Runtime Clone with Backend:: " << Str(option.backend) << " in " - << Str(option.device) << "." << std::endl; - runtime->option = option; - runtime->backend_ = backend_->Clone(stream, device_id); - return runtime; -} - -} // namespace fastdeploy diff --git a/fastdeploy/runtime.h b/fastdeploy/runtime.h index 46532b16b..f6c75fe8d 100755 --- a/fastdeploy/runtime.h +++ b/fastdeploy/runtime.h @@ -19,573 +19,5 @@ */ #pragma once - -#include -#include -#include - -#include "backends/rknpu/rknpu2/rknpu2_config.h" -#include "fastdeploy/backends/backend.h" -#include "fastdeploy/utils/perf.h" - -/** \brief All C++ FastDeploy APIs are defined inside this namespace -* -*/ -namespace fastdeploy { - -/*! Inference backend supported in FastDeploy */ -enum Backend { - UNKNOWN, ///< Unknown inference backend - ORT, ///< ONNX Runtime, support Paddle/ONNX format model, CPU / Nvidia GPU - TRT, ///< TensorRT, support Paddle/ONNX format model, Nvidia GPU only - PDINFER, ///< Paddle Inference, support Paddle format model, CPU / Nvidia GPU - POROS, ///< Poros, support TorchScript format model, CPU / Nvidia GPU - OPENVINO, ///< Intel OpenVINO, support Paddle/ONNX format, CPU only - LITE, ///< Paddle Lite, support Paddle format model, ARM CPU only - RKNPU2, ///< RKNPU2, support RKNN format model, Rockchip NPU only - SOPHGOTPU, ///< SOPHGOTPU, support SOPHGO format model, Sophgo TPU only -}; - -FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, - const Backend& backend); - -/*! Paddle Lite power mode for mobile device. */ -enum LitePowerMode { - LITE_POWER_HIGH = 0, ///< Use Lite Backend with high power mode - LITE_POWER_LOW = 1, ///< Use Lite Backend with low power mode - LITE_POWER_FULL = 2, ///< Use Lite Backend with full power mode - LITE_POWER_NO_BIND = 3, ///< Use Lite Backend with no bind power mode - LITE_POWER_RAND_HIGH = 4, ///< Use Lite Backend with rand high mode - LITE_POWER_RAND_LOW = 5 ///< Use Lite Backend with rand low power mode -}; - -FASTDEPLOY_DECL std::string Str(const Backend& b); -FASTDEPLOY_DECL std::string Str(const ModelFormat& f); - -/** - * @brief Get all the available inference backend in FastDeploy - */ -FASTDEPLOY_DECL std::vector GetAvailableBackends(); - -/** - * @brief Check if the inference backend available - */ -FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend); - -bool CheckModelFormat(const std::string& model_file, - const ModelFormat& model_format); -ModelFormat GuessModelFormat(const std::string& model_file); - -/*! @brief Option object used when create a new Runtime object - */ -struct FASTDEPLOY_DECL RuntimeOption { - /** \brief Set path of model file and parameter file - * - * \param[in] model_path Path of model file, e.g ResNet50/model.pdmodel for Paddle format model / ResNet50/model.onnx for ONNX format model - * \param[in] params_path Path of parameter file, this only used when the model format is Paddle, e.g Resnet50/model.pdiparams - * \param[in] format Format of the loaded model - */ - void SetModelPath(const std::string& model_path, - const std::string& params_path = "", - const ModelFormat& format = ModelFormat::PADDLE); - - /** \brief Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory - * - * \param[in] model_buffer The memory buffer of model - * \param[in] model_buffer_size The size of the model data - * \param[in] params_buffer The memory buffer of the combined parameters file - * \param[in] params_buffer_size The size of the combined parameters data - * \param[in] format Format of the loaded model - */ - void SetModelBuffer(const char * model_buffer, - size_t model_buffer_size, - const char * params_buffer, - size_t params_buffer_size, - const ModelFormat& format = ModelFormat::PADDLE); - - /// Use cpu to inference, the runtime will inference on CPU by default - void UseCpu(); - - /// Use Nvidia GPU to inference - void UseGpu(int gpu_id = 0); - - void UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name = - fastdeploy::rknpu2::CpuName::RK3588, - fastdeploy::rknpu2::CoreMask rknpu2_core = - fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0); - - /// Use TimVX to inference - void UseTimVX(); - - /// Use Huawei Ascend to inference - void UseAscend(); - - /// - /// \brief Turn on KunlunXin XPU. - /// - /// \param kunlunxin_id the KunlunXin XPU card to use (default is 0). - /// \param l3_workspace_size The size of the video memory allocated by the l3 - /// cache, the maximum is 16M. - /// \param locked Whether the allocated L3 cache can be locked. If false, - /// it means that the L3 cache is not locked, and the allocated L3 - /// cache can be shared by multiple models, and multiple models - /// sharing the L3 cache will be executed sequentially on the card. - /// \param autotune Whether to autotune the conv operator in the model. If - /// true, when the conv operator of a certain dimension is executed - /// for the first time, it will automatically search for a better - /// algorithm to improve the performance of subsequent conv operators - /// of the same dimension. - /// \param autotune_file Specify the path of the autotune file. If - /// autotune_file is specified, the algorithm specified in the - /// file will be used and autotune will not be performed again. - /// \param precision Calculation accuracy of multi_encoder - /// \param adaptive_seqlen Is the input of multi_encoder variable length - /// \param enable_multi_stream Whether to enable the multi stream of - /// KunlunXin XPU. - /// - void UseKunlunXin(int kunlunxin_id = 0, - int l3_workspace_size = 0xfffc00, - bool locked = false, - bool autotune = true, - const std::string& autotune_file = "", - const std::string& precision = "int16", - bool adaptive_seqlen = false, - bool enable_multi_stream = false); - - /// Use Sophgo to inference - void UseSophgo(); - - void SetExternalStream(void* external_stream); - - /* - * @brief Set number of cpu threads while inference on CPU, by default it will decided by the different backends - */ - void SetCpuThreadNum(int thread_num); - - /// Set ORT graph opt level, default is decide by ONNX Runtime itself - void SetOrtGraphOptLevel(int level = -1); - - /// Set Paddle Inference as inference backend, support CPU/GPU - void UsePaddleBackend(); - - /// Wrapper function of UsePaddleBackend() - void UsePaddleInferBackend() { return UsePaddleBackend(); } - - /// Set ONNX Runtime as inference backend, support CPU/GPU - void UseOrtBackend(); - - /// Set SOPHGO Runtime as inference backend, support CPU/GPU - void UseSophgoBackend(); - - /// Set TensorRT as inference backend, only support GPU - void UseTrtBackend(); - - /// Set Poros backend as inference backend, support CPU/GPU - void UsePorosBackend(); - - /// Set OpenVINO as inference backend, only support CPU - void UseOpenVINOBackend(); - - /// Set Paddle Lite as inference backend, only support arm cpu - void UseLiteBackend(); - - /// Wrapper function of UseLiteBackend() - void UsePaddleLiteBackend() { return UseLiteBackend(); } - - /// Set mkldnn switch while using Paddle Inference as inference backend - void SetPaddleMKLDNN(bool pd_mkldnn = true); - - /* - * @brief If TensorRT backend is used, EnablePaddleToTrt will change to use Paddle Inference backend, and use its integrated TensorRT instead. - */ - void EnablePaddleToTrt(); - - /** - * @brief Delete pass by name while using Paddle Inference as inference backend, this can be called multiple times to delete a set of passes - */ - void DeletePaddleBackendPass(const std::string& delete_pass_name); - - /** - * @brief Enable print debug information while using Paddle Inference as inference backend, the backend disable the debug information by default - */ - void EnablePaddleLogInfo(); - - /** - * @brief Disable print debug information while using Paddle Inference as inference backend - */ - void DisablePaddleLogInfo(); - - /** - * @brief Set shape cache size while using Paddle Inference with mkldnn, by default it will cache all the difference shape - */ - void SetPaddleMKLDNNCacheSize(int size); - - /** - * @brief Set device name for OpenVINO, default 'CPU', can also be 'AUTO', 'GPU', 'GPU.1'.... - */ - void SetOpenVINODevice(const std::string& name = "CPU"); - - /** - * @brief Set shape info for OpenVINO - */ - void SetOpenVINOShapeInfo( - const std::map>& shape_info) { - ov_shape_infos = shape_info; - } - - /** - * @brief While use OpenVINO backend with intel GPU, use this interface to specify operators run on CPU - */ - void SetOpenVINOCpuOperators(const std::vector& operators) { - ov_cpu_operators = operators; - } - - /** - * @brief Set optimzed model dir for Paddle Lite backend. - */ - void SetLiteOptimizedModelDir(const std::string& optimized_model_dir); - - /** - * @brief Set subgraph partition path for Paddle Lite backend. - */ - void SetLiteSubgraphPartitionPath( - const std::string& nnadapter_subgraph_partition_config_path); - - /** - * @brief Set subgraph partition path for Paddle Lite backend. - */ - void SetLiteSubgraphPartitionConfigBuffer( - const std::string& nnadapter_subgraph_partition_config_buffer); - - /** - * @brief Set device name for Paddle Lite backend. - */ - void SetLiteDeviceNames( - const std::vector& nnadapter_device_names); - - /** - * @brief Set context properties for Paddle Lite backend. - */ - void SetLiteContextProperties( - const std::string& nnadapter_context_properties); - - /** - * @brief Set model cache dir for Paddle Lite backend. - */ - void SetLiteModelCacheDir( - const std::string& nnadapter_model_cache_dir); - - /** - * @brief Set dynamic shape info for Paddle Lite backend. - */ - void SetLiteDynamicShapeInfo( - const std::map>>& - nnadapter_dynamic_shape_info); - - /** - * @brief Set mixed precision quantization config path for Paddle Lite backend. - */ - void SetLiteMixedPrecisionQuantizationConfigPath( - const std::string& nnadapter_mixed_precision_quantization_config_path); - - /** - * @brief enable half precision while use paddle lite backend - */ - void EnableLiteFP16(); - - /** - * @brief disable half precision, change to full precision(float32) - */ - void DisableLiteFP16(); - - /** - * @brief enable int8 precision while use paddle lite backend - */ - void EnableLiteInt8(); - - /** - * @brief disable int8 precision, change to full precision(float32) - */ - void DisableLiteInt8(); - - /** - * @brief Set power mode while using Paddle Lite as inference backend, mode(0: LITE_POWER_HIGH; 1: LITE_POWER_LOW; 2: LITE_POWER_FULL; 3: LITE_POWER_NO_BIND, 4: LITE_POWER_RAND_HIGH; 5: LITE_POWER_RAND_LOW, refer [paddle lite](https://paddle-lite.readthedocs.io/zh/latest/api_reference/cxx_api_doc.html#set-power-mode) for more details) - */ - void SetLitePowerMode(LitePowerMode mode); - - /** \brief Set shape range of input tensor for the model that contain dynamic input shape while using TensorRT backend - * - * \param[in] input_name The name of input for the model which is dynamic shape - * \param[in] min_shape The minimal shape for the input tensor - * \param[in] opt_shape The optimized shape for the input tensor, just set the most common shape, if set as default value, it will keep same with min_shape - * \param[in] max_shape The maximum shape for the input tensor, if set as default value, it will keep same with min_shape - */ - void SetTrtInputShape( - const std::string& input_name, const std::vector& min_shape, - const std::vector& opt_shape = std::vector(), - const std::vector& max_shape = std::vector()); - - /// Set max_workspace_size for TensorRT, default 1<<30 - void SetTrtMaxWorkspaceSize(size_t trt_max_workspace_size); - - /// Set max_batch_size for TensorRT, default 32 - void SetTrtMaxBatchSize(size_t max_batch_size); - - /** - * @brief Enable FP16 inference while using TensorRT backend. Notice: not all the GPU device support FP16, on those device doesn't support FP16, FastDeploy will fallback to FP32 automaticly - */ - void EnableTrtFP16(); - - /// Disable FP16 inference while using TensorRT backend - void DisableTrtFP16(); - - /** - * @brief Set cache file path while use TensorRT backend. Loadding a Paddle/ONNX model and initialize TensorRT will take a long time, by this interface it will save the tensorrt engine to `cache_file_path`, and load it directly while execute the code again - */ - void SetTrtCacheFile(const std::string& cache_file_path); - - /** - * @brief Enable pinned memory. Pinned memory can be utilized to speedup the data transfer between CPU and GPU. Currently it's only suppurted in TRT backend and Paddle Inference backend. - */ - void EnablePinnedMemory(); - - /** - * @brief Disable pinned memory - */ - void DisablePinnedMemory(); - - /** - * @brief Enable to collect shape in paddle trt backend - */ - void EnablePaddleTrtCollectShape(); - - /** - * @brief Disable to collect shape in paddle trt backend - */ - void DisablePaddleTrtCollectShape(); - - /** - * @brief Prevent ops running in paddle trt backend - */ - void DisablePaddleTrtOPs(const std::vector& ops); - - /* - * @brief Set number of streams by the OpenVINO backends - */ - void SetOpenVINOStreams(int num_streams); - - /** \Use Graphcore IPU to inference. - * - * \param[in] device_num the number of IPUs. - * \param[in] micro_batch_size the batch size in the graph, only work when graph has no batch shape info. - * \param[in] enable_pipelining enable pipelining. - * \param[in] batches_per_step the number of batches per run in pipelining. - */ - void UseIpu(int device_num = 1, int micro_batch_size = 1, - bool enable_pipelining = false, int batches_per_step = 1); - - /** \brief Set IPU config. - * - * \param[in] enable_fp16 enable fp16. - * \param[in] replica_num the number of graph replication. - * \param[in] available_memory_proportion the available memory proportion for matmul/conv. - * \param[in] enable_half_partial enable fp16 partial for matmul, only work with fp16. - */ - void SetIpuConfig(bool enable_fp16 = false, int replica_num = 1, - float available_memory_proportion = 1.0, - bool enable_half_partial = false); - - Backend backend = Backend::UNKNOWN; - // for cpu inference and preprocess - // default will let the backend choose their own default value - int cpu_thread_num = -1; - int device_id = 0; - - Device device = Device::CPU; - - void* external_stream_ = nullptr; - - bool enable_pinned_memory = false; - - // ======Only for ORT Backend======== - // -1 means use default value by ort - // 0: ORT_DISABLE_ALL 1: ORT_ENABLE_BASIC 2: ORT_ENABLE_EXTENDED 3: - // ORT_ENABLE_ALL - int ort_graph_opt_level = -1; - int ort_inter_op_num_threads = -1; - // 0: ORT_SEQUENTIAL 1: ORT_PARALLEL - int ort_execution_mode = -1; - - // ======Only for Paddle Backend===== - bool pd_enable_mkldnn = true; - bool pd_enable_log_info = false; - bool pd_enable_trt = false; - bool pd_collect_shape = false; - int pd_mkldnn_cache_size = 1; - std::vector pd_delete_pass_names; - - // ======Only for Paddle IPU Backend ======= - int ipu_device_num = 1; - int ipu_micro_batch_size = 1; - bool ipu_enable_pipelining = false; - int ipu_batches_per_step = 1; - bool ipu_enable_fp16 = false; - int ipu_replica_num = 1; - float ipu_available_memory_proportion = 1.0; - bool ipu_enable_half_partial = false; - - // ======Only for Paddle Lite Backend===== - // 0: LITE_POWER_HIGH 1: LITE_POWER_LOW 2: LITE_POWER_FULL - // 3: LITE_POWER_NO_BIND 4: LITE_POWER_RAND_HIGH - // 5: LITE_POWER_RAND_LOW - LitePowerMode lite_power_mode = LitePowerMode::LITE_POWER_NO_BIND; - // enable int8 or not - bool lite_enable_int8 = false; - // enable fp16 or not - bool lite_enable_fp16 = false; - // optimized model dir for CxxConfig - std::string lite_optimized_model_dir = ""; - std::string lite_nnadapter_subgraph_partition_config_path = ""; - // and other nnadapter settings for CxxConfig - std::string lite_nnadapter_subgraph_partition_config_buffer = ""; - std::string lite_nnadapter_context_properties = ""; - std::string lite_nnadapter_model_cache_dir = ""; - std::string lite_nnadapter_mixed_precision_quantization_config_path = ""; - std::map>> - lite_nnadapter_dynamic_shape_info = {{"", {{0}}}}; - std::vector lite_nnadapter_device_names = {}; - - bool enable_timvx = false; - bool enable_ascend = false; - bool enable_kunlunxin = false; - - // ======Only for Trt Backend======= - std::map> trt_max_shape; - std::map> trt_min_shape; - std::map> trt_opt_shape; - std::string trt_serialize_file = ""; - bool trt_enable_fp16 = false; - bool trt_enable_int8 = false; - size_t trt_max_batch_size = 1; - size_t trt_max_workspace_size = 1 << 30; - // ======Only for PaddleTrt Backend======= - std::vector trt_disabled_ops_{}; - - // ======Only for Poros Backend======= - bool is_dynamic = false; - bool long_to_int = true; - bool use_nvidia_tf32 = false; - int unconst_ops_thres = -1; - std::string poros_file = ""; - - // ======Only for OpenVINO Backend======= - int ov_num_streams = 0; - std::string openvino_device = "CPU"; - std::map> ov_shape_infos; - std::vector ov_cpu_operators; - - // ======Only for RKNPU2 Backend======= - fastdeploy::rknpu2::CpuName rknpu2_cpu_name_ = - fastdeploy::rknpu2::CpuName::RK3588; - fastdeploy::rknpu2::CoreMask rknpu2_core_mask_ = - fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO; - - // ======Only for KunlunXin XPU Backend======= - int kunlunxin_l3_workspace_size = 0xfffc00; - bool kunlunxin_locked = false; - bool kunlunxin_autotune = true; - std::string kunlunxin_autotune_file = ""; - std::string kunlunxin_precision = "int16"; - bool kunlunxin_adaptive_seqlen = false; - bool kunlunxin_enable_multi_stream = false; - - std::string model_file = ""; // Path of model file - std::string params_file = ""; // Path of parameters file, can be empty - // format of input model - ModelFormat model_format = ModelFormat::AUTOREC; - - std::string model_buffer_ = ""; - std::string params_buffer_ = ""; - size_t model_buffer_size_ = 0; - size_t params_buffer_size_ = 0; - bool model_from_memory_ = false; -}; - -/*! @brief Runtime object used to inference the loaded model on different devices - */ -struct FASTDEPLOY_DECL Runtime { - public: - /// Intialize a Runtime object with RuntimeOption - bool Init(const RuntimeOption& _option); - - /** \brief Inference the model by the input data, and write to the output - * - * \param[in] input_tensors Notice the FDTensor::name should keep same with the model's input - * \param[in] output_tensors Inference results - * \return true if the inference successed, otherwise false - */ - bool Infer(std::vector& input_tensors, - std::vector* output_tensors); - - /** \brief No params inference the model. - * - * the input and output data need to pass through the BindInputTensor and GetOutputTensor interfaces. - */ - bool Infer(); - - /** \brief Compile TorchScript Module, only for Poros backend - * - * \param[in] prewarm_tensors Prewarm datas for compile - * \param[in] _option Runtime option - * \return true if compile successed, otherwise false - */ - bool Compile(std::vector>& prewarm_tensors, - const RuntimeOption& _option); - - /** \brief Get number of inputs - */ - int NumInputs() { return backend_->NumInputs(); } - /** \brief Get number of outputs - */ - int NumOutputs() { return backend_->NumOutputs(); } - /** \brief Get input information by index - */ - TensorInfo GetInputInfo(int index); - /** \brief Get output information by index - */ - TensorInfo GetOutputInfo(int index); - /** \brief Get all the input information - */ - std::vector GetInputInfos(); - /** \brief Get all the output information - */ - std::vector GetOutputInfos(); - /** \brief Bind FDTensor by name, no copy and share input memory - */ - void BindInputTensor(const std::string& name, FDTensor& input); - /** \brief Get output FDTensor by name, no copy and share backend output memory - */ - FDTensor* GetOutputTensor(const std::string& name); - - /** \brief Clone new Runtime when multiple instances of the same model are created - * - * \param[in] stream CUDA Stream, defualt param is nullptr - * \return new Runtime* by this clone - */ - Runtime* Clone(void* stream = nullptr, int device_id = -1); - - RuntimeOption option; - - private: - void CreateOrtBackend(); - void CreatePaddleBackend(); - void CreateTrtBackend(); - void CreateOpenVINOBackend(); - void CreateLiteBackend(); - void CreateRKNPU2Backend(); - void CreateSophgoNPUBackend(); - std::unique_ptr backend_; - std::vector input_tensors_; - std::vector output_tensors_; -}; -} // namespace fastdeploy +#include "fastdeploy/core/config.h" +#include "fastdeploy/runtime/runtime.h" diff --git a/fastdeploy/runtime/enum_variables.cc b/fastdeploy/runtime/enum_variables.cc new file mode 100644 index 000000000..ed7b87ba6 --- /dev/null +++ b/fastdeploy/runtime/enum_variables.cc @@ -0,0 +1,85 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/runtime/enum_variables.h" + +namespace fastdeploy { +std::ostream& operator<<(std::ostream& out, const Backend& backend) { + if (backend == Backend::ORT) { + out << "Backend::ORT"; + } else if (backend == Backend::TRT) { + out << "Backend::TRT"; + } else if (backend == Backend::PDINFER) { + out << "Backend::PDINFER"; + } else if (backend == Backend::OPENVINO) { + out << "Backend::OPENVINO"; + } else if (backend == Backend::RKNPU2) { + out << "Backend::RKNPU2"; + } else if (backend == Backend::SOPHGOTPU) { + out << "Backend::SOPHGOTPU"; + } else if (backend == Backend::POROS) { + out << "Backend::POROS"; + } else if (backend == Backend::LITE) { + out << "Backend::PDLITE"; + } else { + out << "UNKNOWN-Backend"; + } + return out; +} + +std::ostream& operator<<(std::ostream& out, const Device& d) { + switch (d) { + case Device::CPU: + out << "Device::CPU"; + break; + case Device::GPU: + out << "Device::GPU"; + break; + case Device::RKNPU: + out << "Device::RKNPU"; + break; + case Device::SOPHGOTPUD: + out << "Device::SOPHGOTPUD"; + break; + case Device::TIMVX: + out << "Device::TIMVX"; + break; + case Device::KUNLUNXIN: + out << "Device::KUNLUNXIN"; + break; + case Device::ASCEND: + out << "Device::ASCEND"; + break; + default: + out << "Device::UNKOWN"; + } + return out; +} + +std::ostream& operator<<(std::ostream& out, const ModelFormat& format) { + if (format == ModelFormat::PADDLE) { + out << "ModelFormat::PADDLE"; + } else if (format == ModelFormat::ONNX) { + out << "ModelFormat::ONNX"; + } else if (format == ModelFormat::RKNN) { + out << "ModelFormat::RKNN"; + } else if (format == ModelFormat::SOPHGO) { + out << "ModelFormat::SOPHGO"; + } else if (format == ModelFormat::TORCHSCRIPT) { + out << "ModelFormat::TORCHSCRIPT"; + } + out << "UNKNOWN-ModelFormat"; + return out; +} +} // namespace fastdeploy diff --git a/fastdeploy/runtime/enum_variables.h b/fastdeploy/runtime/enum_variables.h new file mode 100644 index 000000000..bfcdd7eef --- /dev/null +++ b/fastdeploy/runtime/enum_variables.h @@ -0,0 +1,79 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/*! \file enum_variables.h + \brief A brief file description. + + More details + */ + +#pragma once +#include "fastdeploy/utils/utils.h" +#include +#include + +namespace fastdeploy { + +/*! Inference backend supported in FastDeploy */ +enum Backend { + UNKNOWN, ///< Unknown inference backend + ORT, ///< ONNX Runtime, support Paddle/ONNX format model, CPU / Nvidia GPU + TRT, ///< TensorRT, support Paddle/ONNX format model, Nvidia GPU only + PDINFER, ///< Paddle Inference, support Paddle format model, CPU / Nvidia GPU + POROS, ///< Poros, support TorchScript format model, CPU / Nvidia GPU + OPENVINO, ///< Intel OpenVINO, support Paddle/ONNX format, CPU only + LITE, ///< Paddle Lite, support Paddle format model, ARM CPU only + RKNPU2, ///< RKNPU2, support RKNN format model, Rockchip NPU only + SOPHGOTPU, ///< SOPHGOTPU, support SOPHGO format model, Sophgo TPU only +}; + +enum FASTDEPLOY_DECL Device { + CPU, + GPU, + RKNPU, + IPU, + TIMVX, + KUNLUNXIN, + ASCEND, + SOPHGOTPUD +}; + +/*! Deep learning model format */ +enum ModelFormat { + AUTOREC, ///< Auto recognize the model format by model file name + PADDLE, ///< Model with paddlepaddle format + ONNX, ///< Model with ONNX format + RKNN, ///< Model with RKNN format + TORCHSCRIPT, ///< Model with TorchScript format + SOPHGO, ///< Model with SOPHGO format +}; + +/// Describle all the supported backends for specified model format +static std::map> s_default_backends_cfg = { + {ModelFormat::PADDLE, {Backend::PDINFER, Backend::LITE, + Backend::ORT, Backend::OPENVINO, Backend::TRT}}, + {ModelFormat::ONNX, {Backend::ORT, Backend::OPENVINO, Backend::TRT}}, + {ModelFormat::RKNN, {Backend::RKNPU2}}, + {ModelFormat::TORCHSCRIPT, {Backend::POROS}}, + {ModelFormat::SOPHGO, {Backend::SOPHGOTPU}} +}; + +FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, const Backend& b); + +FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, const Device& d); + +FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, + const ModelFormat& f); + +} // namespace fastdeploy diff --git a/fastdeploy/runtime/runtime.cc b/fastdeploy/runtime/runtime.cc new file mode 100644 index 000000000..bb825c8b9 --- /dev/null +++ b/fastdeploy/runtime/runtime.cc @@ -0,0 +1,492 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/runtime/runtime.h" + +#include "fastdeploy/utils/unique_ptr.h" +#include "fastdeploy/utils/utils.h" + +#ifdef ENABLE_ORT_BACKEND +#include "fastdeploy/backends/ort/ort_backend.h" +#endif + +#ifdef ENABLE_TRT_BACKEND +#include "fastdeploy/backends/tensorrt/trt_backend.h" +#endif + +#ifdef ENABLE_PADDLE_BACKEND +#include "fastdeploy/backends/paddle/paddle_backend.h" +#endif + +#ifdef ENABLE_POROS_BACKEND +#include "fastdeploy/backends/poros/poros_backend.h" +#endif + +#ifdef ENABLE_OPENVINO_BACKEND +#include "fastdeploy/backends/openvino/ov_backend.h" +#endif + +#ifdef ENABLE_LITE_BACKEND +#include "fastdeploy/backends/lite/lite_backend.h" +#endif + +#ifdef ENABLE_RKNPU2_BACKEND +#include "fastdeploy/backends/rknpu2/rknpu2_backend.h" +#endif + +#ifdef ENABLE_SOPHGO_BACKEND +#include "fastdeploy/backends/sophgo/sophgo_backend.h" +#endif + +namespace fastdeploy { + +bool Runtime::Init(const RuntimeOption& _option) { + option = _option; + // Choose default backend by model format + if (option.backend == Backend::UNKNOWN) { + auto iter = s_default_backends_cfg.find(option.model_format); + if (iter == s_default_backends_cfg.end()) { + FDERROR << "Cannot found a default backend for model format: " + << option.model_format + << ", please define the inference backend in RuntimeOption." + << std::endl; + return false; + } + for (const auto& b : iter->second) { + if (IsBackendAvailable(b)) { + option.backend = b; + FDINFO << "FastDeploy will choose " << b << " to inference this model." + << std::endl; + } + } + if (option.backend == Backend::UNKNOWN) { + FDERROR << "Cannot found available backends for model format: " + << option.model_format << "." << std::endl; + return false; + } + } + + if (option.backend == Backend::ORT) { + FDASSERT(option.device == Device::CPU || option.device == Device::GPU, + "Backend::ORT only supports Device::CPU/Device::GPU."); + CreateOrtBackend(); + FDINFO << "Runtime initialized with Backend::ORT in " << option.device + << "." << std::endl; + } else if (option.backend == Backend::TRT) { + FDASSERT(option.device == Device::GPU, + "Backend::TRT only supports Device::GPU."); + CreateTrtBackend(); + FDINFO << "Runtime initialized with Backend::TRT in " << option.device + << "." << std::endl; + } else if (option.backend == Backend::PDINFER) { + FDASSERT( + option.device == Device::CPU || option.device == Device::GPU || + option.device == Device::IPU, + "Backend::PDINFER only supports Device::CPU/Device::GPU/Device::IPU."); + FDASSERT( + option.model_format == ModelFormat::PADDLE, + "Backend::PDINFER only supports model format of ModelFormat::PADDLE."); + CreatePaddleBackend(); + FDINFO << "Runtime initialized with Backend::PDINFER in " << option.device + << "." << std::endl; + } else if (option.backend == Backend::POROS) { + FDASSERT(option.device == Device::CPU || option.device == Device::GPU, + "Backend::POROS only supports Device::CPU/Device::GPU."); + FDASSERT(option.model_format == ModelFormat::TORCHSCRIPT, + "Backend::POROS only supports model format of " + "ModelFormat::TORCHSCRIPT."); + FDINFO << "Runtime initialized with Backend::POROS in " << option.device + << "." << std::endl; + return true; + } else if (option.backend == Backend::OPENVINO) { + FDASSERT(option.device == Device::CPU, + "Backend::OPENVINO only supports Device::CPU"); + CreateOpenVINOBackend(); + FDINFO << "Runtime initialized with Backend::OPENVINO in " << option.device + << "." << std::endl; + } else if (option.backend == Backend::LITE) { + FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX || + option.device == Device::KUNLUNXIN || + option.device == Device::ASCEND, + "Backend::LITE only supports " + "Device::CPU/Device::TIMVX/Device::KUNLUNXIN."); + CreateLiteBackend(); + FDINFO << "Runtime initialized with Backend::LITE in " << option.device + << "." << std::endl; + } else if (option.backend == Backend::RKNPU2) { + FDASSERT(option.device == Device::RKNPU, + "Backend::RKNPU2 only supports Device::RKNPU2"); + CreateRKNPU2Backend(); + + FDINFO << "Runtime initialized with Backend::RKNPU2 in " << option.device + << "." << std::endl; + } else if (option.backend == Backend::SOPHGOTPU) { + FDASSERT(option.device == Device::SOPHGOTPUD, + "Backend::SOPHGO only supports Device::SOPHGO"); + CreateSophgoNPUBackend(); + + FDINFO << "Runtime initialized with Backend::SOPHGO in " << option.device + << "." << std::endl; + } else { + FDERROR << "Runtime only support " + "Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as " + "backend now." + << std::endl; + return false; + } + return true; +} + +TensorInfo Runtime::GetInputInfo(int index) { + return backend_->GetInputInfo(index); +} + +TensorInfo Runtime::GetOutputInfo(int index) { + return backend_->GetOutputInfo(index); +} + +std::vector Runtime::GetInputInfos() { + return backend_->GetInputInfos(); +} + +std::vector Runtime::GetOutputInfos() { + return backend_->GetOutputInfos(); +} + +bool Runtime::Infer(std::vector& input_tensors, + std::vector* output_tensors) { + for (auto& tensor : input_tensors) { + FDASSERT(tensor.device_id < 0 || tensor.device_id == option.device_id, + "Device id of input tensor(%d) and runtime(%d) are not same.", + tensor.device_id, option.device_id); + } + return backend_->Infer(input_tensors, output_tensors); +} + +bool Runtime::Infer() { + bool result = backend_->Infer(input_tensors_, &output_tensors_, false); + for (auto& tensor : output_tensors_) { + tensor.device_id = option.device_id; + } + return result; +} + +void Runtime::BindInputTensor(const std::string& name, FDTensor& input) { + bool is_exist = false; + for (auto& t : input_tensors_) { + if (t.name == name) { + is_exist = true; + t.SetExternalData(input.shape, input.dtype, input.MutableData(), + input.device, input.device_id); + break; + } + } + if (!is_exist) { + FDTensor new_tensor(name); + new_tensor.SetExternalData(input.shape, input.dtype, input.MutableData(), + input.device, input.device_id); + input_tensors_.emplace_back(std::move(new_tensor)); + } +} + +FDTensor* Runtime::GetOutputTensor(const std::string& name) { + for (auto& t : output_tensors_) { + if (t.name == name) { + return &t; + } + } + FDWARNING << "The output name [" << name << "] don't exist." << std::endl; + return nullptr; +} + +void Runtime::CreatePaddleBackend() { +#ifdef ENABLE_PADDLE_BACKEND + auto pd_option = PaddleBackendOption(); + pd_option.model_file = option.model_file; + pd_option.params_file = option.params_file; + pd_option.enable_mkldnn = option.pd_enable_mkldnn; + pd_option.enable_log_info = option.pd_enable_log_info; + pd_option.mkldnn_cache_size = option.pd_mkldnn_cache_size; + pd_option.use_gpu = (option.device == Device::GPU) ? true : false; + pd_option.use_ipu = (option.device == Device::IPU) ? true : false; + pd_option.gpu_id = option.device_id; + pd_option.delete_pass_names = option.pd_delete_pass_names; + pd_option.cpu_thread_num = option.cpu_thread_num; + pd_option.enable_pinned_memory = option.enable_pinned_memory; + pd_option.external_stream_ = option.external_stream_; + pd_option.model_from_memory_ = option.model_from_memory_; + if (pd_option.model_from_memory_) { + pd_option.model_buffer_ = option.model_buffer_; + pd_option.params_buffer_ = option.params_buffer_; + pd_option.model_buffer_size_ = option.model_buffer_size_; + pd_option.params_buffer_size_ = option.params_buffer_size_; + } +#ifdef ENABLE_TRT_BACKEND + if (pd_option.use_gpu && option.pd_enable_trt) { + pd_option.enable_trt = true; + pd_option.collect_shape = option.pd_collect_shape; + auto trt_option = TrtBackendOption(); + trt_option.gpu_id = option.device_id; + trt_option.enable_fp16 = option.trt_enable_fp16; + trt_option.max_batch_size = option.trt_max_batch_size; + trt_option.max_workspace_size = option.trt_max_workspace_size; + trt_option.max_shape = option.trt_max_shape; + trt_option.min_shape = option.trt_min_shape; + trt_option.opt_shape = option.trt_opt_shape; + trt_option.serialize_file = option.trt_serialize_file; + trt_option.enable_pinned_memory = option.enable_pinned_memory; + pd_option.trt_option = trt_option; + pd_option.trt_disabled_ops_ = option.trt_disabled_ops_; + } +#endif +#ifdef WITH_IPU + if (pd_option.use_ipu) { + auto ipu_option = IpuOption(); + ipu_option.ipu_device_num = option.ipu_device_num; + ipu_option.ipu_micro_batch_size = option.ipu_micro_batch_size; + ipu_option.ipu_enable_pipelining = option.ipu_enable_pipelining; + ipu_option.ipu_batches_per_step = option.ipu_batches_per_step; + ipu_option.ipu_enable_fp16 = option.ipu_enable_fp16; + ipu_option.ipu_replica_num = option.ipu_replica_num; + ipu_option.ipu_available_memory_proportion = + option.ipu_available_memory_proportion; + ipu_option.ipu_enable_half_partial = option.ipu_enable_half_partial; + pd_option.ipu_option = ipu_option; + } +#endif + FDASSERT(option.model_format == ModelFormat::PADDLE, + "PaddleBackend only support model format of ModelFormat::PADDLE."); + backend_ = utils::make_unique(); + auto casted_backend = dynamic_cast(backend_.get()); + if (pd_option.model_from_memory_) { + FDASSERT(casted_backend->InitFromPaddle(option.model_buffer_, + option.params_buffer_, pd_option), + "Load model from Paddle failed while initliazing PaddleBackend."); + } else { + FDASSERT(casted_backend->InitFromPaddle(option.model_file, + option.params_file, pd_option), + "Load model from Paddle failed while initliazing PaddleBackend."); + } +#else + FDASSERT(false, + "PaddleBackend is not available, please compiled with " + "ENABLE_PADDLE_BACKEND=ON."); +#endif +} + +void Runtime::CreateOpenVINOBackend() { +#ifdef ENABLE_OPENVINO_BACKEND + auto ov_option = OpenVINOBackendOption(); + ov_option.cpu_thread_num = option.cpu_thread_num; + ov_option.device = option.openvino_device; + ov_option.shape_infos = option.ov_shape_infos; + ov_option.num_streams = option.ov_num_streams; + for (const auto& op : option.ov_cpu_operators) { + ov_option.cpu_operators.insert(op); + } + FDASSERT(option.model_format == ModelFormat::PADDLE || + option.model_format == ModelFormat::ONNX, + "OpenVINOBackend only support model format of ModelFormat::PADDLE / " + "ModelFormat::ONNX."); + backend_ = utils::make_unique(); + auto casted_backend = dynamic_cast(backend_.get()); + + if (option.model_format == ModelFormat::ONNX) { + FDASSERT(casted_backend->InitFromOnnx(option.model_file, ov_option), + "Load model from ONNX failed while initliazing OrtBackend."); + } else { + FDASSERT(casted_backend->InitFromPaddle(option.model_file, + option.params_file, ov_option), + "Load model from Paddle failed while initliazing OrtBackend."); + } +#else + FDASSERT(false, + "OpenVINOBackend is not available, please compiled with " + "ENABLE_OPENVINO_BACKEND=ON."); +#endif +} + +void Runtime::CreateOrtBackend() { +#ifdef ENABLE_ORT_BACKEND + auto ort_option = OrtBackendOption(); + ort_option.graph_optimization_level = option.ort_graph_opt_level; + ort_option.intra_op_num_threads = option.cpu_thread_num; + ort_option.inter_op_num_threads = option.ort_inter_op_num_threads; + ort_option.execution_mode = option.ort_execution_mode; + ort_option.use_gpu = (option.device == Device::GPU) ? true : false; + ort_option.gpu_id = option.device_id; + ort_option.external_stream_ = option.external_stream_; + + FDASSERT(option.model_format == ModelFormat::PADDLE || + option.model_format == ModelFormat::ONNX, + "OrtBackend only support model format of ModelFormat::PADDLE / " + "ModelFormat::ONNX."); + backend_ = utils::make_unique(); + auto casted_backend = dynamic_cast(backend_.get()); + if (option.model_format == ModelFormat::ONNX) { + FDASSERT(casted_backend->InitFromOnnx(option.model_file, ort_option), + "Load model from ONNX failed while initliazing OrtBackend."); + } else { + FDASSERT(casted_backend->InitFromPaddle(option.model_file, + option.params_file, ort_option), + "Load model from Paddle failed while initliazing OrtBackend."); + } +#else + FDASSERT(false, + "OrtBackend is not available, please compiled with " + "ENABLE_ORT_BACKEND=ON."); +#endif +} + +void Runtime::CreateTrtBackend() { +#ifdef ENABLE_TRT_BACKEND + auto trt_option = TrtBackendOption(); + trt_option.model_file = option.model_file; + trt_option.params_file = option.params_file; + trt_option.model_format = option.model_format; + trt_option.gpu_id = option.device_id; + trt_option.enable_fp16 = option.trt_enable_fp16; + trt_option.enable_int8 = option.trt_enable_int8; + trt_option.max_batch_size = option.trt_max_batch_size; + trt_option.max_workspace_size = option.trt_max_workspace_size; + trt_option.max_shape = option.trt_max_shape; + trt_option.min_shape = option.trt_min_shape; + trt_option.opt_shape = option.trt_opt_shape; + trt_option.serialize_file = option.trt_serialize_file; + trt_option.enable_pinned_memory = option.enable_pinned_memory; + trt_option.external_stream_ = option.external_stream_; + + FDASSERT(option.model_format == ModelFormat::PADDLE || + option.model_format == ModelFormat::ONNX, + "TrtBackend only support model format of ModelFormat::PADDLE / " + "ModelFormat::ONNX."); + backend_ = utils::make_unique(); + auto casted_backend = dynamic_cast(backend_.get()); + if (option.model_format == ModelFormat::ONNX) { + FDASSERT(casted_backend->InitFromOnnx(option.model_file, trt_option), + "Load model from ONNX failed while initliazing TrtBackend."); + } else { + FDASSERT(casted_backend->InitFromPaddle(option.model_file, + option.params_file, trt_option), + "Load model from Paddle failed while initliazing TrtBackend."); + } +#else + FDASSERT(false, + "TrtBackend is not available, please compiled with " + "ENABLE_TRT_BACKEND=ON."); +#endif +} + +void Runtime::CreateLiteBackend() { +#ifdef ENABLE_LITE_BACKEND + auto lite_option = LiteBackendOption(); + lite_option.threads = option.cpu_thread_num; + lite_option.enable_int8 = option.lite_enable_int8; + lite_option.enable_fp16 = option.lite_enable_fp16; + lite_option.power_mode = static_cast(option.lite_power_mode); + lite_option.optimized_model_dir = option.lite_optimized_model_dir; + lite_option.nnadapter_subgraph_partition_config_path = + option.lite_nnadapter_subgraph_partition_config_path; + lite_option.nnadapter_subgraph_partition_config_buffer = + option.lite_nnadapter_subgraph_partition_config_buffer; + lite_option.nnadapter_device_names = option.lite_nnadapter_device_names; + lite_option.nnadapter_context_properties = + option.lite_nnadapter_context_properties; + lite_option.nnadapter_model_cache_dir = option.lite_nnadapter_model_cache_dir; + lite_option.nnadapter_dynamic_shape_info = + option.lite_nnadapter_dynamic_shape_info; + lite_option.nnadapter_mixed_precision_quantization_config_path = + option.lite_nnadapter_mixed_precision_quantization_config_path; + lite_option.enable_timvx = option.enable_timvx; + lite_option.enable_ascend = option.enable_ascend; + lite_option.enable_kunlunxin = option.enable_kunlunxin; + lite_option.device_id = option.device_id; + lite_option.kunlunxin_l3_workspace_size = option.kunlunxin_l3_workspace_size; + lite_option.kunlunxin_locked = option.kunlunxin_locked; + lite_option.kunlunxin_autotune = option.kunlunxin_autotune; + lite_option.kunlunxin_autotune_file = option.kunlunxin_autotune_file; + lite_option.kunlunxin_precision = option.kunlunxin_precision; + lite_option.kunlunxin_adaptive_seqlen = option.kunlunxin_adaptive_seqlen; + lite_option.kunlunxin_enable_multi_stream = + option.kunlunxin_enable_multi_stream; + + FDASSERT(option.model_format == ModelFormat::PADDLE, + "LiteBackend only support model format of ModelFormat::PADDLE"); + backend_ = utils::make_unique(); + auto casted_backend = dynamic_cast(backend_.get()); + FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file, + lite_option), + "Load model from nb file failed while initializing LiteBackend."); +#else + FDASSERT(false, + "LiteBackend is not available, please compiled with " + "ENABLE_LITE_BACKEND=ON."); +#endif +} + +void Runtime::CreateRKNPU2Backend() { +#ifdef ENABLE_RKNPU2_BACKEND + auto rknpu2_option = RKNPU2BackendOption(); + rknpu2_option.cpu_name = option.rknpu2_cpu_name_; + rknpu2_option.core_mask = option.rknpu2_core_mask_; + FDASSERT(option.model_format == ModelFormat::RKNN, + "RKNPU2Backend only support model format of ModelFormat::RKNN"); + backend_ = utils::make_unique(); + auto casted_backend = dynamic_cast(backend_.get()); + FDASSERT(casted_backend->InitFromRKNN(option.model_file, rknpu2_option), + "Load model from nb file failed while initializing LiteBackend."); +#else + FDASSERT(false, + "RKNPU2Backend is not available, please compiled with " + "ENABLE_RKNPU2_BACKEND=ON."); +#endif +} + +void Runtime::CreateSophgoNPUBackend() { +#ifdef ENABLE_SOPHGO_BACKEND + auto sophgo_option = SophgoBackendOption(); + FDASSERT(option.model_format == ModelFormat::SOPHGO, + "SophgoBackend only support model format of ModelFormat::SOPHGO"); + backend_ = utils::make_unique(); + auto casted_backend = dynamic_cast(backend_.get()); + FDASSERT(casted_backend->InitFromSophgo(option.model_file, sophgo_option), + "Load model from nb file failed while initializing LiteBackend."); +#else + FDASSERT(false, + "SophgoBackend is not available, please compiled with " + "ENABLE_SOPHGO_BACKEND=ON."); +#endif +} + +Runtime* Runtime::Clone(void* stream, int device_id) { + Runtime* runtime = new Runtime(); + if (option.backend != Backend::OPENVINO && + option.backend != Backend::PDINFER && option.backend != Backend::TRT) { + runtime->Init(option); + FDWARNING << "Only OpenVINO/Paddle Inference/TensorRT support \ + clone engine to reduce CPU/GPU memory usage now. For " + << option.backend + << ", FastDeploy will create a new engine which \ + will not share memory with the current runtime." + << std::endl; + return runtime; + } + FDINFO << "Runtime Clone with Backend:: " << option.backend << " in " + << option.device << "." << std::endl; + runtime->option = option; + runtime->backend_ = backend_->Clone(stream, device_id); + return runtime; +} + +} // namespace fastdeploy diff --git a/fastdeploy/runtime/runtime.h b/fastdeploy/runtime/runtime.h new file mode 100755 index 000000000..36a661463 --- /dev/null +++ b/fastdeploy/runtime/runtime.h @@ -0,0 +1,109 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/*! \file runtime.h + \brief A brief file description. + + More details + */ + +#pragma once +#include "fastdeploy/backends/backend.h" +#include "fastdeploy/core/fd_tensor.h" +#include "fastdeploy/runtime/runtime_option.h" +#include "fastdeploy/utils/perf.h" + +/** \brief All C++ FastDeploy APIs are defined inside this namespace +* +*/ +namespace fastdeploy { + +/*! @brief Runtime object used to inference the loaded model on different devices + */ +struct FASTDEPLOY_DECL Runtime { + public: + /// Intialize a Runtime object with RuntimeOption + bool Init(const RuntimeOption& _option); + + /** \brief Inference the model by the input data, and write to the output + * + * \param[in] input_tensors Notice the FDTensor::name should keep same with the model's input + * \param[in] output_tensors Inference results + * \return true if the inference successed, otherwise false + */ + bool Infer(std::vector& input_tensors, + std::vector* output_tensors); + + /** \brief No params inference the model. + * + * the input and output data need to pass through the BindInputTensor and GetOutputTensor interfaces. + */ + bool Infer(); + + /** \brief Compile TorchScript Module, only for Poros backend + * + * \param[in] prewarm_tensors Prewarm datas for compile + * \param[in] _option Runtime option + * \return true if compile successed, otherwise false + */ + bool Compile(std::vector>& prewarm_tensors, + const RuntimeOption& _option); + + /** \brief Get number of inputs + */ + int NumInputs() { return backend_->NumInputs(); } + /** \brief Get number of outputs + */ + int NumOutputs() { return backend_->NumOutputs(); } + /** \brief Get input information by index + */ + TensorInfo GetInputInfo(int index); + /** \brief Get output information by index + */ + TensorInfo GetOutputInfo(int index); + /** \brief Get all the input information + */ + std::vector GetInputInfos(); + /** \brief Get all the output information + */ + std::vector GetOutputInfos(); + /** \brief Bind FDTensor by name, no copy and share input memory + */ + void BindInputTensor(const std::string& name, FDTensor& input); + /** \brief Get output FDTensor by name, no copy and share backend output memory + */ + FDTensor* GetOutputTensor(const std::string& name); + + /** \brief Clone new Runtime when multiple instances of the same model are created + * + * \param[in] stream CUDA Stream, defualt param is nullptr + * \return new Runtime* by this clone + */ + Runtime* Clone(void* stream = nullptr, int device_id = -1); + + RuntimeOption option; + + private: + void CreateOrtBackend(); + void CreatePaddleBackend(); + void CreateTrtBackend(); + void CreateOpenVINOBackend(); + void CreateLiteBackend(); + void CreateRKNPU2Backend(); + void CreateSophgoNPUBackend(); + std::unique_ptr backend_; + std::vector input_tensors_; + std::vector output_tensors_; +}; +} // namespace fastdeploy diff --git a/fastdeploy/runtime/runtime_option.cc b/fastdeploy/runtime/runtime_option.cc new file mode 100644 index 000000000..8e2ab6af8 --- /dev/null +++ b/fastdeploy/runtime/runtime_option.cc @@ -0,0 +1,515 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/runtime/runtime.h" +#include "fastdeploy/utils/unique_ptr.h" +#include "fastdeploy/utils/utils.h" + +namespace fastdeploy { + +std::vector GetAvailableBackends() { + std::vector backends; +#ifdef ENABLE_ORT_BACKEND + backends.push_back(Backend::ORT); +#endif +#ifdef ENABLE_TRT_BACKEND + backends.push_back(Backend::TRT); +#endif +#ifdef ENABLE_PADDLE_BACKEND + backends.push_back(Backend::PDINFER); +#endif +#ifdef ENABLE_POROS_BACKEND + backends.push_back(Backend::POROS); +#endif +#ifdef ENABLE_OPENVINO_BACKEND + backends.push_back(Backend::OPENVINO); +#endif +#ifdef ENABLE_LITE_BACKEND + backends.push_back(Backend::LITE); +#endif +#ifdef ENABLE_RKNPU2_BACKEND + backends.push_back(Backend::RKNPU2); +#endif +#ifdef ENABLE_SOPHGO_BACKEND + backends.push_back(Backend::SOPHGOTPU); +#endif + return backends; +} + +bool IsBackendAvailable(const Backend& backend) { + std::vector backends = GetAvailableBackends(); + for (size_t i = 0; i < backends.size(); ++i) { + if (backend == backends[i]) { + return true; + } + } + return false; +} + +bool CheckModelFormat(const std::string& model_file, + const ModelFormat& model_format) { + if (model_format == ModelFormat::PADDLE) { + if (model_file.size() < 8 || + model_file.substr(model_file.size() - 8, 8) != ".pdmodel") { + FDERROR << "With model format of ModelFormat::PADDLE, the model file " + "should ends with `.pdmodel`, but now it's " + << model_file << std::endl; + return false; + } + } else if (model_format == ModelFormat::ONNX) { + if (model_file.size() < 5 || + model_file.substr(model_file.size() - 5, 5) != ".onnx") { + FDERROR << "With model format of ModelFormat::ONNX, the model file " + "should ends with `.onnx`, but now it's " + << model_file << std::endl; + return false; + } + } else if (model_format == ModelFormat::RKNN) { + if (model_file.size() < 5 || + model_file.substr(model_file.size() - 5, 5) != ".rknn") { + FDERROR << "With model format of ModelFormat::RKNN, the model file " + "should ends with `.rknn`, but now it's " + << model_file << std::endl; + return false; + } + } else if (model_format == ModelFormat::TORCHSCRIPT) { + if (model_file.size() < 3 || + model_file.substr(model_file.size() - 3, 3) != ".pt") { + FDERROR + << "With model format of ModelFormat::TORCHSCRIPT, the model file " + "should ends with `.pt`, but now it's " + << model_file << std::endl; + return false; + } + } else if (model_format == ModelFormat::SOPHGO) { + if (model_file.size() < 7 || + model_file.substr(model_file.size() - 7, 7) != ".bmodel") { + FDERROR << "With model format of ModelFormat::SOPHGO, the model file " + "should ends with `.bmodel`, but now it's " + << model_file << std::endl; + return false; + } + } else { + FDERROR + << "Only support model format with frontend ModelFormat::PADDLE / " + "ModelFormat::ONNX / ModelFormat::RKNN / ModelFormat::TORCHSCRIPT." + << std::endl; + return false; + } + return true; +} + +ModelFormat GuessModelFormat(const std::string& model_file) { + if (model_file.size() > 8 && + model_file.substr(model_file.size() - 8, 8) == ".pdmodel") { + FDINFO << "Model Format: PaddlePaddle." << std::endl; + return ModelFormat::PADDLE; + } else if (model_file.size() > 5 && + model_file.substr(model_file.size() - 5, 5) == ".onnx") { + FDINFO << "Model Format: ONNX." << std::endl; + return ModelFormat::ONNX; + } else if (model_file.size() > 3 && + model_file.substr(model_file.size() - 3, 3) == ".pt") { + FDINFO << "Model Format: Torchscript." << std::endl; + return ModelFormat::TORCHSCRIPT; + } else if (model_file.size() > 5 && + model_file.substr(model_file.size() - 5, 5) == ".rknn") { + FDINFO << "Model Format: RKNN." << std::endl; + return ModelFormat::RKNN; + } else if (model_file.size() > 7 && + model_file.substr(model_file.size() - 7, 7) == ".bmodel") { + FDINFO << "Model Format: SOPHGO." << std::endl; + return ModelFormat::SOPHGO; + } + + FDERROR << "Cannot guess which model format you are using, please set " + "RuntimeOption::model_format manually." + << std::endl; + return ModelFormat::PADDLE; +} + +void RuntimeOption::SetModelPath(const std::string& model_path, + const std::string& params_path, + const ModelFormat& format) { + if (format == ModelFormat::PADDLE) { + model_file = model_path; + params_file = params_path; + model_format = ModelFormat::PADDLE; + } else if (format == ModelFormat::ONNX) { + model_file = model_path; + model_format = ModelFormat::ONNX; + } else if (format == ModelFormat::TORCHSCRIPT) { + model_file = model_path; + model_format = ModelFormat::TORCHSCRIPT; + } else { + FDASSERT(false, + "The model format only can be " + "ModelFormat::PADDLE/ModelFormat::ONNX/ModelFormat::TORCHSCRIPT."); + } +} + +void RuntimeOption::SetModelBuffer(const char* model_buffer, + size_t model_buffer_size, + const char* params_buffer, + size_t params_buffer_size, + const ModelFormat& format) { + model_buffer_size_ = model_buffer_size; + params_buffer_size_ = params_buffer_size; + model_from_memory_ = true; + if (format == ModelFormat::PADDLE) { + model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size); + params_buffer_ = + std::string(params_buffer, params_buffer + params_buffer_size); + model_format = ModelFormat::PADDLE; + } else if (format == ModelFormat::ONNX) { + model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size); + model_format = ModelFormat::ONNX; + } else if (format == ModelFormat::TORCHSCRIPT) { + model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size); + model_format = ModelFormat::TORCHSCRIPT; + } else { + FDASSERT(false, + "The model format only can be " + "ModelFormat::PADDLE/ModelFormat::ONNX/ModelFormat::TORCHSCRIPT."); + } +} + +void RuntimeOption::UseGpu(int gpu_id) { +#ifdef WITH_GPU + device = Device::GPU; + device_id = gpu_id; +#else + FDWARNING << "The FastDeploy didn't compile with GPU, will force to use CPU." + << std::endl; + device = Device::CPU; +#endif +} + +void RuntimeOption::UseCpu() { device = Device::CPU; } + +void RuntimeOption::UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name, + fastdeploy::rknpu2::CoreMask rknpu2_core) { + rknpu2_cpu_name_ = rknpu2_name; + rknpu2_core_mask_ = rknpu2_core; + device = Device::RKNPU; +} + +void RuntimeOption::UseTimVX() { + enable_timvx = true; + device = Device::TIMVX; +} + +void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size, + bool locked, bool autotune, + const std::string& autotune_file, + const std::string& precision, + bool adaptive_seqlen, + bool enable_multi_stream) { + enable_kunlunxin = true; + device_id = kunlunxin_id; + kunlunxin_l3_workspace_size = l3_workspace_size; + kunlunxin_locked = locked; + kunlunxin_autotune = autotune; + kunlunxin_autotune_file = autotune_file; + kunlunxin_precision = precision; + kunlunxin_adaptive_seqlen = adaptive_seqlen; + kunlunxin_enable_multi_stream = enable_multi_stream; + device = Device::KUNLUNXIN; +} + +void RuntimeOption::UseAscend() { + enable_ascend = true; + device = Device::ASCEND; +} + +void RuntimeOption::UseSophgo() { + device = Device::SOPHGOTPUD; + UseSophgoBackend(); +} + +void RuntimeOption::SetExternalStream(void* external_stream) { + external_stream_ = external_stream; +} + +void RuntimeOption::SetCpuThreadNum(int thread_num) { + FDASSERT(thread_num > 0, "The thread_num must be greater than 0."); + cpu_thread_num = thread_num; +} + +void RuntimeOption::SetOrtGraphOptLevel(int level) { + std::vector supported_level{-1, 0, 1, 2}; + auto valid_level = std::find(supported_level.begin(), supported_level.end(), + level) != supported_level.end(); + FDASSERT(valid_level, "The level must be -1, 0, 1, 2."); + ort_graph_opt_level = level; +} + +// use paddle inference backend +void RuntimeOption::UsePaddleBackend() { +#ifdef ENABLE_PADDLE_BACKEND + backend = Backend::PDINFER; +#else + FDASSERT(false, "The FastDeploy didn't compile with Paddle Inference."); +#endif +} + +// use onnxruntime backend +void RuntimeOption::UseOrtBackend() { +#ifdef ENABLE_ORT_BACKEND + backend = Backend::ORT; +#else + FDASSERT(false, "The FastDeploy didn't compile with OrtBackend."); +#endif +} + +// use sophgoruntime backend +void RuntimeOption::UseSophgoBackend() { +#ifdef ENABLE_SOPHGO_BACKEND + backend = Backend::SOPHGOTPU; +#else + FDASSERT(false, "The FastDeploy didn't compile with SophgoBackend."); +#endif +} + +// use poros backend +void RuntimeOption::UsePorosBackend() { +#ifdef ENABLE_POROS_BACKEND + backend = Backend::POROS; +#else + FDASSERT(false, "The FastDeploy didn't compile with PorosBackend."); +#endif +} + +void RuntimeOption::UseTrtBackend() { +#ifdef ENABLE_TRT_BACKEND + backend = Backend::TRT; +#else + FDASSERT(false, "The FastDeploy didn't compile with TrtBackend."); +#endif +} + +void RuntimeOption::UseOpenVINOBackend() { +#ifdef ENABLE_OPENVINO_BACKEND + backend = Backend::OPENVINO; +#else + FDASSERT(false, "The FastDeploy didn't compile with OpenVINO."); +#endif +} + +void RuntimeOption::UseLiteBackend() { +#ifdef ENABLE_LITE_BACKEND + backend = Backend::LITE; +#else + FDASSERT(false, "The FastDeploy didn't compile with Paddle Lite."); +#endif +} + +void RuntimeOption::SetPaddleMKLDNN(bool pd_mkldnn) { + pd_enable_mkldnn = pd_mkldnn; +} + +void RuntimeOption::DeletePaddleBackendPass(const std::string& pass_name) { + pd_delete_pass_names.push_back(pass_name); +} +void RuntimeOption::EnablePaddleLogInfo() { pd_enable_log_info = true; } + +void RuntimeOption::DisablePaddleLogInfo() { pd_enable_log_info = false; } + +void RuntimeOption::EnablePaddleToTrt() { + FDASSERT(backend == Backend::TRT, + "Should call UseTrtBackend() before call EnablePaddleToTrt()."); +#ifdef ENABLE_PADDLE_BACKEND + FDINFO << "While using TrtBackend with EnablePaddleToTrt, FastDeploy will " + "change to use Paddle Inference Backend." + << std::endl; + backend = Backend::PDINFER; + pd_enable_trt = true; +#else + FDASSERT(false, + "While using TrtBackend with EnablePaddleToTrt, require the " + "FastDeploy is compiled with Paddle Inference Backend, " + "please rebuild your FastDeploy."); +#endif +} + +void RuntimeOption::SetPaddleMKLDNNCacheSize(int size) { + FDASSERT(size > 0, "Parameter size must greater than 0."); + pd_mkldnn_cache_size = size; +} + +void RuntimeOption::SetOpenVINODevice(const std::string& name) { + openvino_device = name; +} + +void RuntimeOption::EnableLiteFP16() { lite_enable_fp16 = true; } + +void RuntimeOption::DisableLiteFP16() { lite_enable_fp16 = false; } +void RuntimeOption::EnableLiteInt8() { lite_enable_int8 = true; } + +void RuntimeOption::DisableLiteInt8() { lite_enable_int8 = false; } +void RuntimeOption::SetLitePowerMode(LitePowerMode mode) { + lite_power_mode = mode; +} + +void RuntimeOption::SetLiteOptimizedModelDir( + const std::string& optimized_model_dir) { + lite_optimized_model_dir = optimized_model_dir; +} + +void RuntimeOption::SetLiteSubgraphPartitionPath( + const std::string& nnadapter_subgraph_partition_config_path) { + lite_nnadapter_subgraph_partition_config_path = + nnadapter_subgraph_partition_config_path; +} + +void RuntimeOption::SetLiteSubgraphPartitionConfigBuffer( + const std::string& nnadapter_subgraph_partition_config_buffer) { + lite_nnadapter_subgraph_partition_config_buffer = + nnadapter_subgraph_partition_config_buffer; +} + +void RuntimeOption::SetLiteDeviceNames( + const std::vector& nnadapter_device_names) { + lite_nnadapter_device_names = nnadapter_device_names; +} + +void RuntimeOption::SetLiteContextProperties( + const std::string& nnadapter_context_properties) { + lite_nnadapter_context_properties = nnadapter_context_properties; +} + +void RuntimeOption::SetLiteModelCacheDir( + const std::string& nnadapter_model_cache_dir) { + lite_nnadapter_model_cache_dir = nnadapter_model_cache_dir; +} + +void RuntimeOption::SetLiteDynamicShapeInfo( + const std::map>>& + nnadapter_dynamic_shape_info) { + lite_nnadapter_dynamic_shape_info = nnadapter_dynamic_shape_info; +} + +void RuntimeOption::SetLiteMixedPrecisionQuantizationConfigPath( + const std::string& nnadapter_mixed_precision_quantization_config_path) { + lite_nnadapter_mixed_precision_quantization_config_path = + nnadapter_mixed_precision_quantization_config_path; +} + +void RuntimeOption::SetTrtInputShape(const std::string& input_name, + const std::vector& min_shape, + const std::vector& opt_shape, + const std::vector& max_shape) { + trt_min_shape[input_name].clear(); + trt_max_shape[input_name].clear(); + trt_opt_shape[input_name].clear(); + trt_min_shape[input_name].assign(min_shape.begin(), min_shape.end()); + if (opt_shape.size() == 0) { + trt_opt_shape[input_name].assign(min_shape.begin(), min_shape.end()); + } else { + trt_opt_shape[input_name].assign(opt_shape.begin(), opt_shape.end()); + } + if (max_shape.size() == 0) { + trt_max_shape[input_name].assign(min_shape.begin(), min_shape.end()); + } else { + trt_max_shape[input_name].assign(max_shape.begin(), max_shape.end()); + } +} + +void RuntimeOption::SetTrtMaxWorkspaceSize(size_t max_workspace_size) { + trt_max_workspace_size = max_workspace_size; +} +void RuntimeOption::SetTrtMaxBatchSize(size_t max_batch_size) { + trt_max_batch_size = max_batch_size; +} + +void RuntimeOption::EnableTrtFP16() { trt_enable_fp16 = true; } + +void RuntimeOption::DisableTrtFP16() { trt_enable_fp16 = false; } + +void RuntimeOption::EnablePinnedMemory() { enable_pinned_memory = true; } + +void RuntimeOption::DisablePinnedMemory() { enable_pinned_memory = false; } + +void RuntimeOption::SetTrtCacheFile(const std::string& cache_file_path) { + trt_serialize_file = cache_file_path; +} + +void RuntimeOption::SetOpenVINOStreams(int num_streams) { + ov_num_streams = num_streams; +} + +bool Runtime::Compile(std::vector>& prewarm_tensors, + const RuntimeOption& _option) { +#ifdef ENABLE_POROS_BACKEND + option = _option; + auto poros_option = PorosBackendOption(); + poros_option.use_gpu = (option.device == Device::GPU) ? true : false; + poros_option.gpu_id = option.device_id; + poros_option.long_to_int = option.long_to_int; + poros_option.use_nvidia_tf32 = option.use_nvidia_tf32; + poros_option.unconst_ops_thres = option.unconst_ops_thres; + poros_option.poros_file = option.poros_file; + poros_option.is_dynamic = option.is_dynamic; + poros_option.enable_fp16 = option.trt_enable_fp16; + poros_option.max_batch_size = option.trt_max_batch_size; + poros_option.max_workspace_size = option.trt_max_workspace_size; + FDASSERT( + option.model_format == ModelFormat::TORCHSCRIPT, + "PorosBackend only support model format of ModelFormat::TORCHSCRIPT."); + backend_ = utils::make_unique(); + auto casted_backend = dynamic_cast(backend_.get()); + FDASSERT( + casted_backend->Compile(option.model_file, prewarm_tensors, poros_option), + "Load model from Torchscript failed while initliazing PorosBackend."); +#else + FDASSERT(false, + "PorosBackend is not available, please compiled with " + "ENABLE_POROS_BACKEND=ON."); +#endif + return true; +} + +void RuntimeOption::EnablePaddleTrtCollectShape() { pd_collect_shape = true; } + +void RuntimeOption::DisablePaddleTrtCollectShape() { pd_collect_shape = false; } + +void RuntimeOption::DisablePaddleTrtOPs(const std::vector& ops) { + trt_disabled_ops_.insert(trt_disabled_ops_.end(), ops.begin(), ops.end()); +} + +void RuntimeOption::UseIpu(int device_num, int micro_batch_size, + bool enable_pipelining, int batches_per_step) { +#ifdef WITH_IPU + device = Device::IPU; + ipu_device_num = device_num; + ipu_micro_batch_size = micro_batch_size; + ipu_enable_pipelining = enable_pipelining; + ipu_batches_per_step = batches_per_step; +#else + FDWARNING << "The FastDeploy didn't compile with IPU, will force to use CPU." + << std::endl; + device = Device::CPU; +#endif +} + +void RuntimeOption::SetIpuConfig(bool enable_fp16, int replica_num, + float available_memory_proportion, + bool enable_half_partial) { + ipu_enable_fp16 = enable_fp16; + ipu_replica_num = replica_num; + ipu_available_memory_proportion = available_memory_proportion; + ipu_enable_half_partial = enable_half_partial; +} + +} // namespace fastdeploy diff --git a/fastdeploy/runtime/runtime_option.h b/fastdeploy/runtime/runtime_option.h new file mode 100644 index 000000000..6b1f1caa7 --- /dev/null +++ b/fastdeploy/runtime/runtime_option.h @@ -0,0 +1,482 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/*! \file runtime_option.h + \brief A brief file description. + + More details + */ + +#pragma once + +#include +#include +#include +#include "fastdeploy/runtime/enum_variables.h" +#include "fastdeploy/backends/lite/option.h" +#include "fastdeploy/backends/openvino/option.h" +#include "fastdeploy/backends/ort/option.h" +#include "fastdeploy/backends/paddle/option.h" +#include "fastdeploy/backends/poros/option.h" +#include "fastdeploy/backends/rknpu2/option.h" +#include "fastdeploy/backends/sophgo/option.h" +#include "fastdeploy/backends/tensorrt/option.h" + +namespace fastdeploy { + +/** + * @brief Get all the available inference backend in FastDeploy + */ +FASTDEPLOY_DECL std::vector GetAvailableBackends(); + +/** + * @brief Check if the inference backend available + */ +FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend); + +bool CheckModelFormat(const std::string& model_file, + const ModelFormat& model_format); +ModelFormat GuessModelFormat(const std::string& model_file); + +/*! @brief Option object used when create a new Runtime object + */ +struct FASTDEPLOY_DECL RuntimeOption { + /** \brief Set path of model file and parameter file + * + * \param[in] model_path Path of model file, e.g ResNet50/model.pdmodel for Paddle format model / ResNet50/model.onnx for ONNX format model + * \param[in] params_path Path of parameter file, this only used when the model format is Paddle, e.g Resnet50/model.pdiparams + * \param[in] format Format of the loaded model + */ + void SetModelPath(const std::string& model_path, + const std::string& params_path = "", + const ModelFormat& format = ModelFormat::PADDLE); + + /** \brief Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory + * + * \param[in] model_buffer The memory buffer of model + * \param[in] model_buffer_size The size of the model data + * \param[in] params_buffer The memory buffer of the combined parameters file + * \param[in] params_buffer_size The size of the combined parameters data + * \param[in] format Format of the loaded model + */ + void SetModelBuffer(const char* model_buffer, size_t model_buffer_size, + const char* params_buffer, size_t params_buffer_size, + const ModelFormat& format = ModelFormat::PADDLE); + + /// Use cpu to inference, the runtime will inference on CPU by default + void UseCpu(); + + /// Use Nvidia GPU to inference + void UseGpu(int gpu_id = 0); + + void UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name = + fastdeploy::rknpu2::CpuName::RK3588, + fastdeploy::rknpu2::CoreMask rknpu2_core = + fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0); + + /// Use TimVX to inference + void UseTimVX(); + + /// Use Huawei Ascend to inference + void UseAscend(); + + /// + /// \brief Turn on KunlunXin XPU. + /// + /// \param kunlunxin_id the KunlunXin XPU card to use (default is 0). + /// \param l3_workspace_size The size of the video memory allocated by the l3 + /// cache, the maximum is 16M. + /// \param locked Whether the allocated L3 cache can be locked. If false, + /// it means that the L3 cache is not locked, and the allocated L3 + /// cache can be shared by multiple models, and multiple models + /// sharing the L3 cache will be executed sequentially on the card. + /// \param autotune Whether to autotune the conv operator in the model. If + /// true, when the conv operator of a certain dimension is executed + /// for the first time, it will automatically search for a better + /// algorithm to improve the performance of subsequent conv operators + /// of the same dimension. + /// \param autotune_file Specify the path of the autotune file. If + /// autotune_file is specified, the algorithm specified in the + /// file will be used and autotune will not be performed again. + /// \param precision Calculation accuracy of multi_encoder + /// \param adaptive_seqlen Is the input of multi_encoder variable length + /// \param enable_multi_stream Whether to enable the multi stream of + /// KunlunXin XPU. + /// + void UseKunlunXin(int kunlunxin_id = 0, int l3_workspace_size = 0xfffc00, + bool locked = false, bool autotune = true, + const std::string& autotune_file = "", + const std::string& precision = "int16", + bool adaptive_seqlen = false, + bool enable_multi_stream = false); + + /// Use Sophgo to inference + void UseSophgo(); + + void SetExternalStream(void* external_stream); + + /* + * @brief Set number of cpu threads while inference on CPU, by default it will decided by the different backends + */ + void SetCpuThreadNum(int thread_num); + + /// Set ORT graph opt level, default is decide by ONNX Runtime itself + void SetOrtGraphOptLevel(int level = -1); + + /// Set Paddle Inference as inference backend, support CPU/GPU + void UsePaddleBackend(); + + /// Wrapper function of UsePaddleBackend() + void UsePaddleInferBackend() { return UsePaddleBackend(); } + + /// Set ONNX Runtime as inference backend, support CPU/GPU + void UseOrtBackend(); + + /// Set SOPHGO Runtime as inference backend, support CPU/GPU + void UseSophgoBackend(); + + /// Set TensorRT as inference backend, only support GPU + void UseTrtBackend(); + + /// Set Poros backend as inference backend, support CPU/GPU + void UsePorosBackend(); + + /// Set OpenVINO as inference backend, only support CPU + void UseOpenVINOBackend(); + + /// Set Paddle Lite as inference backend, only support arm cpu + void UseLiteBackend(); + + /// Wrapper function of UseLiteBackend() + void UsePaddleLiteBackend() { return UseLiteBackend(); } + + /// Set mkldnn switch while using Paddle Inference as inference backend + void SetPaddleMKLDNN(bool pd_mkldnn = true); + + /* + * @brief If TensorRT backend is used, EnablePaddleToTrt will change to use Paddle Inference backend, and use its integrated TensorRT instead. + */ + void EnablePaddleToTrt(); + + /** + * @brief Delete pass by name while using Paddle Inference as inference backend, this can be called multiple times to delete a set of passes + */ + void DeletePaddleBackendPass(const std::string& delete_pass_name); + + /** + * @brief Enable print debug information while using Paddle Inference as inference backend, the backend disable the debug information by default + */ + void EnablePaddleLogInfo(); + + /** + * @brief Disable print debug information while using Paddle Inference as inference backend + */ + void DisablePaddleLogInfo(); + + /** + * @brief Set shape cache size while using Paddle Inference with mkldnn, by default it will cache all the difference shape + */ + void SetPaddleMKLDNNCacheSize(int size); + + /** + * @brief Set device name for OpenVINO, default 'CPU', can also be 'AUTO', 'GPU', 'GPU.1'.... + */ + void SetOpenVINODevice(const std::string& name = "CPU"); + + /** + * @brief Set shape info for OpenVINO + */ + void SetOpenVINOShapeInfo( + const std::map>& shape_info) { + ov_shape_infos = shape_info; + } + + /** + * @brief While use OpenVINO backend with intel GPU, use this interface to specify operators run on CPU + */ + void SetOpenVINOCpuOperators(const std::vector& operators) { + ov_cpu_operators = operators; + } + + /** + * @brief Set optimzed model dir for Paddle Lite backend. + */ + void SetLiteOptimizedModelDir(const std::string& optimized_model_dir); + + /** + * @brief Set subgraph partition path for Paddle Lite backend. + */ + void SetLiteSubgraphPartitionPath( + const std::string& nnadapter_subgraph_partition_config_path); + + /** + * @brief Set subgraph partition path for Paddle Lite backend. + */ + void SetLiteSubgraphPartitionConfigBuffer( + const std::string& nnadapter_subgraph_partition_config_buffer); + + /** + * @brief Set device name for Paddle Lite backend. + */ + void + SetLiteDeviceNames(const std::vector& nnadapter_device_names); + + /** + * @brief Set context properties for Paddle Lite backend. + */ + void + SetLiteContextProperties(const std::string& nnadapter_context_properties); + + /** + * @brief Set model cache dir for Paddle Lite backend. + */ + void SetLiteModelCacheDir(const std::string& nnadapter_model_cache_dir); + + /** + * @brief Set dynamic shape info for Paddle Lite backend. + */ + void SetLiteDynamicShapeInfo( + const std::map>>& + nnadapter_dynamic_shape_info); + + /** + * @brief Set mixed precision quantization config path for Paddle Lite backend. + */ + void SetLiteMixedPrecisionQuantizationConfigPath( + const std::string& nnadapter_mixed_precision_quantization_config_path); + + /** + * @brief enable half precision while use paddle lite backend + */ + void EnableLiteFP16(); + + /** + * @brief disable half precision, change to full precision(float32) + */ + void DisableLiteFP16(); + + /** + * @brief enable int8 precision while use paddle lite backend + */ + void EnableLiteInt8(); + + /** + * @brief disable int8 precision, change to full precision(float32) + */ + void DisableLiteInt8(); + + /** + * @brief Set power mode while using Paddle Lite as inference backend, mode(0: LITE_POWER_HIGH; 1: LITE_POWER_LOW; 2: LITE_POWER_FULL; 3: LITE_POWER_NO_BIND, 4: LITE_POWER_RAND_HIGH; 5: LITE_POWER_RAND_LOW, refer [paddle lite](https://paddle-lite.readthedocs.io/zh/latest/api_reference/cxx_api_doc.html#set-power-mode) for more details) + */ + void SetLitePowerMode(LitePowerMode mode); + + /** \brief Set shape range of input tensor for the model that contain dynamic input shape while using TensorRT backend + * + * \param[in] input_name The name of input for the model which is dynamic shape + * \param[in] min_shape The minimal shape for the input tensor + * \param[in] opt_shape The optimized shape for the input tensor, just set the most common shape, if set as default value, it will keep same with min_shape + * \param[in] max_shape The maximum shape for the input tensor, if set as default value, it will keep same with min_shape + */ + void SetTrtInputShape( + const std::string& input_name, const std::vector& min_shape, + const std::vector& opt_shape = std::vector(), + const std::vector& max_shape = std::vector()); + + /// Set max_workspace_size for TensorRT, default 1<<30 + void SetTrtMaxWorkspaceSize(size_t trt_max_workspace_size); + + /// Set max_batch_size for TensorRT, default 32 + void SetTrtMaxBatchSize(size_t max_batch_size); + + /** + * @brief Enable FP16 inference while using TensorRT backend. Notice: not all the GPU device support FP16, on those device doesn't support FP16, FastDeploy will fallback to FP32 automaticly + */ + void EnableTrtFP16(); + + /// Disable FP16 inference while using TensorRT backend + void DisableTrtFP16(); + + /** + * @brief Set cache file path while use TensorRT backend. Loadding a Paddle/ONNX model and initialize TensorRT will take a long time, by this interface it will save the tensorrt engine to `cache_file_path`, and load it directly while execute the code again + */ + void SetTrtCacheFile(const std::string& cache_file_path); + + /** + * @brief Enable pinned memory. Pinned memory can be utilized to speedup the data transfer between CPU and GPU. Currently it's only suppurted in TRT backend and Paddle Inference backend. + */ + void EnablePinnedMemory(); + + /** + * @brief Disable pinned memory + */ + void DisablePinnedMemory(); + + /** + * @brief Enable to collect shape in paddle trt backend + */ + void EnablePaddleTrtCollectShape(); + + /** + * @brief Disable to collect shape in paddle trt backend + */ + void DisablePaddleTrtCollectShape(); + + /** + * @brief Prevent ops running in paddle trt backend + */ + void DisablePaddleTrtOPs(const std::vector& ops); + + /* + * @brief Set number of streams by the OpenVINO backends + */ + void SetOpenVINOStreams(int num_streams); + + /** \Use Graphcore IPU to inference. + * + * \param[in] device_num the number of IPUs. + * \param[in] micro_batch_size the batch size in the graph, only work when graph has no batch shape info. + * \param[in] enable_pipelining enable pipelining. + * \param[in] batches_per_step the number of batches per run in pipelining. + */ + void UseIpu(int device_num = 1, int micro_batch_size = 1, + bool enable_pipelining = false, int batches_per_step = 1); + + /** \brief Set IPU config. + * + * \param[in] enable_fp16 enable fp16. + * \param[in] replica_num the number of graph replication. + * \param[in] available_memory_proportion the available memory proportion for matmul/conv. + * \param[in] enable_half_partial enable fp16 partial for matmul, only work with fp16. + */ + void SetIpuConfig(bool enable_fp16 = false, int replica_num = 1, + float available_memory_proportion = 1.0, + bool enable_half_partial = false); + + Backend backend = Backend::UNKNOWN; + // for cpu inference and preprocess + // default will let the backend choose their own default value + int cpu_thread_num = -1; + int device_id = 0; + + Device device = Device::CPU; + + void* external_stream_ = nullptr; + + bool enable_pinned_memory = false; + + // ======Only for ORT Backend======== + // -1 means use default value by ort + // 0: ORT_DISABLE_ALL 1: ORT_ENABLE_BASIC 2: ORT_ENABLE_EXTENDED 3: + // ORT_ENABLE_ALL + int ort_graph_opt_level = -1; + int ort_inter_op_num_threads = -1; + // 0: ORT_SEQUENTIAL 1: ORT_PARALLEL + int ort_execution_mode = -1; + + // ======Only for Paddle Backend===== + bool pd_enable_mkldnn = true; + bool pd_enable_log_info = false; + bool pd_enable_trt = false; + bool pd_collect_shape = false; + int pd_mkldnn_cache_size = 1; + std::vector pd_delete_pass_names; + + // ======Only for Paddle IPU Backend ======= + int ipu_device_num = 1; + int ipu_micro_batch_size = 1; + bool ipu_enable_pipelining = false; + int ipu_batches_per_step = 1; + bool ipu_enable_fp16 = false; + int ipu_replica_num = 1; + float ipu_available_memory_proportion = 1.0; + bool ipu_enable_half_partial = false; + + // ======Only for Paddle Lite Backend===== + // 0: LITE_POWER_HIGH 1: LITE_POWER_LOW 2: LITE_POWER_FULL + // 3: LITE_POWER_NO_BIND 4: LITE_POWER_RAND_HIGH + // 5: LITE_POWER_RAND_LOW + LitePowerMode lite_power_mode = LitePowerMode::LITE_POWER_NO_BIND; + // enable int8 or not + bool lite_enable_int8 = false; + // enable fp16 or not + bool lite_enable_fp16 = false; + // optimized model dir for CxxConfig + std::string lite_optimized_model_dir = ""; + std::string lite_nnadapter_subgraph_partition_config_path = ""; + // and other nnadapter settings for CxxConfig + std::string lite_nnadapter_subgraph_partition_config_buffer = ""; + std::string lite_nnadapter_context_properties = ""; + std::string lite_nnadapter_model_cache_dir = ""; + std::string lite_nnadapter_mixed_precision_quantization_config_path = ""; + std::map>> + lite_nnadapter_dynamic_shape_info = {{"", {{0}}}}; + std::vector lite_nnadapter_device_names = {}; + + bool enable_timvx = false; + bool enable_ascend = false; + bool enable_kunlunxin = false; + + // ======Only for Trt Backend======= + std::map> trt_max_shape; + std::map> trt_min_shape; + std::map> trt_opt_shape; + std::string trt_serialize_file = ""; + bool trt_enable_fp16 = false; + bool trt_enable_int8 = false; + size_t trt_max_batch_size = 1; + size_t trt_max_workspace_size = 1 << 30; + // ======Only for PaddleTrt Backend======= + std::vector trt_disabled_ops_{}; + + // ======Only for Poros Backend======= + bool is_dynamic = false; + bool long_to_int = true; + bool use_nvidia_tf32 = false; + int unconst_ops_thres = -1; + std::string poros_file = ""; + + // ======Only for OpenVINO Backend======= + int ov_num_streams = 0; + std::string openvino_device = "CPU"; + std::map> ov_shape_infos; + std::vector ov_cpu_operators; + + // ======Only for RKNPU2 Backend======= + fastdeploy::rknpu2::CpuName rknpu2_cpu_name_ = + fastdeploy::rknpu2::CpuName::RK3588; + fastdeploy::rknpu2::CoreMask rknpu2_core_mask_ = + fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO; + + // ======Only for KunlunXin XPU Backend======= + int kunlunxin_l3_workspace_size = 0xfffc00; + bool kunlunxin_locked = false; + bool kunlunxin_autotune = true; + std::string kunlunxin_autotune_file = ""; + std::string kunlunxin_precision = "int16"; + bool kunlunxin_adaptive_seqlen = false; + bool kunlunxin_enable_multi_stream = false; + + std::string model_file = ""; // Path of model file + std::string params_file = ""; // Path of parameters file, can be empty + // format of input model + ModelFormat model_format = ModelFormat::PADDLE; + + std::string model_buffer_ = ""; + std::string params_buffer_ = ""; + size_t model_buffer_size_ = 0; + size_t params_buffer_size_ = 0; + bool model_from_memory_ = false; +}; + +} // namespace fastdeploy diff --git a/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc b/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc index cb6a460c8..1c93ef5b4 100755 --- a/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc +++ b/fastdeploy/vision/detection/contrib/yolov7end2end_trt.cc @@ -75,14 +75,14 @@ YOLOv7End2EndTRT::YOLOv7End2EndTRT(const std::string& model_file, runtime_option.model_format = model_format; runtime_option.model_file = model_file; if (runtime_option.device != Device::GPU) { - FDWARNING << Str(runtime_option.device) + FDWARNING << runtime_option.device << " is not support for YOLOv7End2EndTRT," << "will fallback to Device::GPU." << std::endl; runtime_option.device = Device::GPU; } if (runtime_option.backend != Backend::UNKNOWN) { if (runtime_option.backend != Backend::TRT) { - FDWARNING << Str(runtime_option.backend) + FDWARNING << runtime_option.backend << " is not support for YOLOv7End2EndTRT," << "will fallback to Backend::TRT." << std::endl; runtime_option.backend = Backend::TRT; @@ -347,4 +347,4 @@ bool YOLOv7End2EndTRT::Predict(cv::Mat* im, DetectionResult* result, } // namespace detection } // namespace vision -} // namespace fastdeploy \ No newline at end of file +} // namespace fastdeploy