mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 00:57:33 +08:00
[Other] Upgrade runtime module (#1068)
* Upgrade runtime module * Update option.h * Fix build error * Move enumerates * little modification * little modification * little modification: * Remove some useless flags
This commit is contained in:
@@ -71,15 +71,12 @@ option(WITH_ASCEND "Whether to compile for Huawei Ascend deploy." OFF)
|
|||||||
option(WITH_TIMVX "Whether to compile for TIMVX deploy." OFF)
|
option(WITH_TIMVX "Whether to compile for TIMVX deploy." OFF)
|
||||||
option(WITH_KUNLUNXIN "Whether to compile for KunlunXin XPU deploy." OFF)
|
option(WITH_KUNLUNXIN "Whether to compile for KunlunXin XPU deploy." OFF)
|
||||||
option(WITH_TESTING "Whether to compile with unittest." OFF)
|
option(WITH_TESTING "Whether to compile with unittest." OFF)
|
||||||
|
|
||||||
############################# Options for Android cross compiling #########################
|
############################# Options for Android cross compiling #########################
|
||||||
option(WITH_OPENCV_STATIC "Use OpenCV static lib for Android." OFF)
|
option(WITH_OPENCV_STATIC "Use OpenCV static lib for Android." OFF)
|
||||||
option(WITH_LITE_STATIC "Use Paddle Lite static lib for Android." OFF)
|
option(WITH_LITE_STATIC "Use Paddle Lite static lib for Android." OFF)
|
||||||
option(WITH_OPENMP "Use OpenMP support for Android." OFF)
|
option(WITH_OPENMP "Use OpenMP support for Android." OFF)
|
||||||
|
|
||||||
# Please don't open this flag now, some bugs exists.
|
|
||||||
# Only support Linux Now
|
|
||||||
# option(ENABLE_OPENCV_CUDA "Whether to enable opencv with cuda, this will allow process image with GPU." OFF)
|
|
||||||
|
|
||||||
# Whether to build fastdeploy with vision/text/... examples, only for testings.
|
# Whether to build fastdeploy with vision/text/... examples, only for testings.
|
||||||
option(BUILD_EXAMPLES "Whether to build fastdeploy with vision examples" OFF)
|
option(BUILD_EXAMPLES "Whether to build fastdeploy with vision examples" OFF)
|
||||||
|
|
||||||
@@ -187,7 +184,6 @@ add_definitions(-DFASTDEPLOY_LIB)
|
|||||||
configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/core/config.h.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/core/config.h)
|
configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/core/config.h.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/core/config.h)
|
||||||
configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/main.cc.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/main.cc)
|
configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/main.cc.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/main.cc)
|
||||||
file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*.cc)
|
file(GLOB_RECURSE ALL_DEPLOY_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*.cc)
|
||||||
file(GLOB_RECURSE FDTENSOR_FUNC_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/function/*.cc)
|
|
||||||
file(GLOB_RECURSE FDTENSOR_FUNC_CUDA_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/function/*.cu)
|
file(GLOB_RECURSE FDTENSOR_FUNC_CUDA_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/function/*.cu)
|
||||||
file(GLOB_RECURSE DEPLOY_OP_CUDA_KERNEL_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/op_cuda_kernels/*.cu)
|
file(GLOB_RECURSE DEPLOY_OP_CUDA_KERNEL_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/op_cuda_kernels/*.cu)
|
||||||
file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/ort/*.cc)
|
file(GLOB_RECURSE DEPLOY_ORT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/ort/*.cc)
|
||||||
@@ -195,7 +191,7 @@ file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fas
|
|||||||
file(GLOB_RECURSE DEPLOY_POROS_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/poros/*.cc)
|
file(GLOB_RECURSE DEPLOY_POROS_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/poros/*.cc)
|
||||||
file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cpp)
|
file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cpp)
|
||||||
file(GLOB_RECURSE DEPLOY_OPENVINO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/openvino/*.cc)
|
file(GLOB_RECURSE DEPLOY_OPENVINO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/openvino/*.cc)
|
||||||
file(GLOB_RECURSE DEPLOY_RKNPU2_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/rknpu/rknpu2/*.cc)
|
file(GLOB_RECURSE DEPLOY_RKNPU2_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/rknpu2/*.cc)
|
||||||
file(GLOB_RECURSE DEPLOY_SOPHGO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/sophgo/*.cc)
|
file(GLOB_RECURSE DEPLOY_SOPHGO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/sophgo/*.cc)
|
||||||
file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/lite/*.cc)
|
file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/lite/*.cc)
|
||||||
file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cc)
|
file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cc)
|
||||||
@@ -420,15 +416,6 @@ endif()
|
|||||||
if(ENABLE_VISION)
|
if(ENABLE_VISION)
|
||||||
add_definitions(-DENABLE_VISION)
|
add_definitions(-DENABLE_VISION)
|
||||||
add_definitions(-DENABLE_VISION_VISUALIZE)
|
add_definitions(-DENABLE_VISION_VISUALIZE)
|
||||||
if(ENABLE_OPENCV_CUDA)
|
|
||||||
if(NOT WITH_GPU)
|
|
||||||
message(FATAL_ERROR "ENABLE_OPENCV_CUDA is available on Linux and WITH_GPU=ON, but now WITH_GPU=OFF.")
|
|
||||||
endif()
|
|
||||||
if(APPLE OR ANDROID OR IOS OR WIN32)
|
|
||||||
message(FATAL_ERROR "Cannot enable opencv with cuda in mac/ios/android/windows os, please set -DENABLE_OPENCV_CUDA=OFF.")
|
|
||||||
endif()
|
|
||||||
add_definitions(-DENABLE_OPENCV_CUDA)
|
|
||||||
endif()
|
|
||||||
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/yaml-cpp)
|
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/yaml-cpp)
|
||||||
list(APPEND DEPEND_LIBS yaml-cpp)
|
list(APPEND DEPEND_LIBS yaml-cpp)
|
||||||
if(BUILD_CUDA_SRC)
|
if(BUILD_CUDA_SRC)
|
||||||
|
@@ -20,7 +20,6 @@ set(PADDLEINFERENCE_VERSION @PADDLEINFERENCE_VERSION@)
|
|||||||
set(OPENVINO_VERSION @OPENVINO_VERSION@)
|
set(OPENVINO_VERSION @OPENVINO_VERSION@)
|
||||||
set(WITH_LITE_STATIC @WITH_LITE_STATIC@)
|
set(WITH_LITE_STATIC @WITH_LITE_STATIC@)
|
||||||
set(WITH_OPENCV_STATIC @WITH_OPENCV_STATIC@)
|
set(WITH_OPENCV_STATIC @WITH_OPENCV_STATIC@)
|
||||||
# set(ENABLE_OPENCV_CUDA @ENABLE_OPENCV_CUDA@)
|
|
||||||
set(OPENCV_FILENAME @OPENCV_FILENAME@)
|
set(OPENCV_FILENAME @OPENCV_FILENAME@)
|
||||||
set(OPENVINO_FILENAME @OPENVINO_FILENAME@)
|
set(OPENVINO_FILENAME @OPENVINO_FILENAME@)
|
||||||
set(PADDLELITE_FILENAME @PADDLELITE_FILENAME@)
|
set(PADDLELITE_FILENAME @PADDLELITE_FILENAME@)
|
||||||
|
@@ -42,12 +42,6 @@ else()
|
|||||||
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
|
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
|
||||||
set(OPENCV_FILENAME "opencv-linux-aarch64-3.4.14")
|
set(OPENCV_FILENAME "opencv-linux-aarch64-3.4.14")
|
||||||
endif()
|
endif()
|
||||||
if(ENABLE_OPENCV_CUDA)
|
|
||||||
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
|
|
||||||
message(FATAL_ERROR "Cannot set ENABLE_OPENCV_CUDA=ON while in linux-aarch64 platform.")
|
|
||||||
endif()
|
|
||||||
set(OPENCV_FILENAME "opencv-linux-x64-gpu-3.4.16")
|
|
||||||
endif()
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(NOT OPENCV_FILENAME)
|
if(NOT OPENCV_FILENAME)
|
||||||
|
@@ -29,11 +29,6 @@ if(${WITH_GPU})
|
|||||||
set(WITH_GPU OFF)
|
set(WITH_GPU OFF)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(${ENABLE_OPENCV_CUDA})
|
|
||||||
message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_OPENCV_CUDA=OFF")
|
|
||||||
set(ENABLE_OPENCV_CUDA OFF)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(${ENABLE_TEXT})
|
if(${ENABLE_TEXT})
|
||||||
set(ENABLE_TEXT OFF CACHE BOOL "Force ENABLE_TEXT OFF" FORCE)
|
set(ENABLE_TEXT OFF CACHE BOOL "Force ENABLE_TEXT OFF" FORCE)
|
||||||
message(STATUS "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_TEXT=OFF")
|
message(STATUS "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_TEXT=OFF")
|
||||||
|
@@ -14,6 +14,7 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "fastdeploy/core/fd_type.h"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
@@ -21,6 +22,16 @@
|
|||||||
#include <map>
|
#include <map>
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
|
|
||||||
|
/*! Paddle Lite power mode for mobile device. */
|
||||||
|
enum LitePowerMode {
|
||||||
|
LITE_POWER_HIGH = 0, ///< Use Lite Backend with high power mode
|
||||||
|
LITE_POWER_LOW = 1, ///< Use Lite Backend with low power mode
|
||||||
|
LITE_POWER_FULL = 2, ///< Use Lite Backend with full power mode
|
||||||
|
LITE_POWER_NO_BIND = 3, ///< Use Lite Backend with no bind power mode
|
||||||
|
LITE_POWER_RAND_HIGH = 4, ///< Use Lite Backend with rand high mode
|
||||||
|
LITE_POWER_RAND_LOW = 5 ///< Use Lite Backend with rand low power mode
|
||||||
|
};
|
||||||
|
|
||||||
struct LiteBackendOption {
|
struct LiteBackendOption {
|
||||||
// cpu num threads
|
// cpu num threads
|
||||||
int threads = 1;
|
int threads = 1;
|
||||||
|
@@ -14,6 +14,7 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "fastdeploy/core/fd_type.h"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
@@ -14,6 +14,7 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "fastdeploy/core/fd_type.h"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
@@ -14,6 +14,7 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "fastdeploy/core/fd_type.h"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
@@ -31,6 +31,8 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
|
|||||||
config_.Exp_DisableTensorRtOPs(option.trt_disabled_ops_);
|
config_.Exp_DisableTensorRtOPs(option.trt_disabled_ops_);
|
||||||
auto precision = paddle_infer::PrecisionType::kFloat32;
|
auto precision = paddle_infer::PrecisionType::kFloat32;
|
||||||
if (option.trt_option.enable_fp16) {
|
if (option.trt_option.enable_fp16) {
|
||||||
|
FDINFO << "Will try to use tensorrt fp16 inference with Paddle Backend."
|
||||||
|
<< std::endl;
|
||||||
precision = paddle_infer::PrecisionType::kHalf;
|
precision = paddle_infer::PrecisionType::kHalf;
|
||||||
}
|
}
|
||||||
bool use_static = false;
|
bool use_static = false;
|
||||||
|
@@ -14,6 +14,7 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "fastdeploy/core/fd_type.h"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
@@ -11,7 +11,7 @@
|
|||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h"
|
#include "fastdeploy/backends/rknpu2/rknpu2_backend.h"
|
||||||
#include "fastdeploy/utils/perf.h"
|
#include "fastdeploy/utils/perf.h"
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
RKNPU2Backend::~RKNPU2Backend() {
|
RKNPU2Backend::~RKNPU2Backend() {
|
||||||
@@ -478,4 +478,4 @@ RKNPU2Backend::FDDataTypeToRknnTensorType(fastdeploy::FDDataType type) {
|
|||||||
FDERROR << "rknn_tensor_type don't support this type" << std::endl;
|
FDERROR << "rknn_tensor_type don't support this type" << std::endl;
|
||||||
return RKNN_TENSOR_TYPE_MAX;
|
return RKNN_TENSOR_TYPE_MAX;
|
||||||
}
|
}
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
@@ -14,7 +14,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "fastdeploy/backends/backend.h"
|
#include "fastdeploy/backends/backend.h"
|
||||||
#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h"
|
#include "fastdeploy/backends/rknpu2/option.h"
|
||||||
#include "fastdeploy/core/fd_tensor.h"
|
#include "fastdeploy/core/fd_tensor.h"
|
||||||
#include "rknn_api.h" // NOLINT
|
#include "rknn_api.h" // NOLINT
|
||||||
#include <cstring>
|
#include <cstring>
|
@@ -13,6 +13,7 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "fastdeploy/core/fd_type.h"
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
@@ -13,6 +13,7 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
#include "fastdeploy/core/fd_type.h"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
@@ -57,10 +57,6 @@
|
|||||||
#cmakedefine ENABLE_TEXT
|
#cmakedefine ENABLE_TEXT
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef ENABLE_OPENCV_CUDA
|
|
||||||
#cmakedefine ENABLE_OPENCV_CUDA
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef ENABLE_VISION
|
#ifdef ENABLE_VISION
|
||||||
#ifndef ENABLE_VISION_VISUALIZE
|
#ifndef ENABLE_VISION_VISUALIZE
|
||||||
#define ENABLE_VISION_VISUALIZE
|
#define ENABLE_VISION_VISUALIZE
|
||||||
|
@@ -21,11 +21,11 @@
|
|||||||
#include "fastdeploy/core/allocate.h"
|
#include "fastdeploy/core/allocate.h"
|
||||||
#include "fastdeploy/core/fd_scalar.h"
|
#include "fastdeploy/core/fd_scalar.h"
|
||||||
#include "fastdeploy/core/fd_type.h"
|
#include "fastdeploy/core/fd_type.h"
|
||||||
|
#include "fastdeploy/runtime/enum_variables.h"
|
||||||
|
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
|
|
||||||
struct FASTDEPLOY_DECL FDTensor {
|
struct FASTDEPLOY_DECL FDTensor {
|
||||||
|
|
||||||
// std::vector<int8_t> data;
|
// std::vector<int8_t> data;
|
||||||
void* buffer_ = nullptr;
|
void* buffer_ = nullptr;
|
||||||
std::vector<int64_t> shape = {0};
|
std::vector<int64_t> shape = {0};
|
||||||
|
155
fastdeploy/core/fd_type.cc
Executable file → Normal file
155
fastdeploy/core/fd_type.cc
Executable file → Normal file
@@ -44,70 +44,6 @@ int FDDataTypeSize(const FDDataType& data_type) {
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string Str(const Device& d) {
|
|
||||||
std::string out;
|
|
||||||
switch (d) {
|
|
||||||
case Device::CPU:
|
|
||||||
out = "Device::CPU";
|
|
||||||
break;
|
|
||||||
case Device::GPU:
|
|
||||||
out = "Device::GPU";
|
|
||||||
break;
|
|
||||||
case Device::RKNPU:
|
|
||||||
out = "Device::RKNPU";
|
|
||||||
break;
|
|
||||||
case Device::SOPHGOTPUD:
|
|
||||||
out = "Device::SOPHGOTPUD";
|
|
||||||
break;
|
|
||||||
case Device::IPU:
|
|
||||||
out = "Device::IPU";
|
|
||||||
break;
|
|
||||||
case Device::TIMVX:
|
|
||||||
out = "Device::TIMVX";
|
|
||||||
break;
|
|
||||||
case Device::ASCEND:
|
|
||||||
out = "Device::ASCEND";
|
|
||||||
break;
|
|
||||||
case Device::KUNLUNXIN:
|
|
||||||
out = "Device::KUNLUNXIN";
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
out = "Device::UNKOWN";
|
|
||||||
}
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream& out,const Device& d){
|
|
||||||
switch (d) {
|
|
||||||
case Device::CPU:
|
|
||||||
out << "Device::CPU";
|
|
||||||
break;
|
|
||||||
case Device::GPU:
|
|
||||||
out << "Device::GPU";
|
|
||||||
break;
|
|
||||||
case Device::RKNPU:
|
|
||||||
out << "Device::RKNPU";
|
|
||||||
break;
|
|
||||||
case Device::SOPHGOTPUD:
|
|
||||||
out << "Device::SOPHGOTPUD";
|
|
||||||
break;
|
|
||||||
case Device::TIMVX:
|
|
||||||
out << "Device::TIMVX";
|
|
||||||
break;
|
|
||||||
case Device::KUNLUNXIN:
|
|
||||||
out << "Device::KUNLUNXIN";
|
|
||||||
break;
|
|
||||||
case Device::ASCEND:
|
|
||||||
out << "Device::ASCEND";
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
out << "Device::UNKOWN";
|
|
||||||
}
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
std::string Str(const FDDataType& fdt) {
|
std::string Str(const FDDataType& fdt) {
|
||||||
std::string out;
|
std::string out;
|
||||||
switch (fdt) {
|
switch (fdt) {
|
||||||
@@ -144,37 +80,37 @@ std::string Str(const FDDataType& fdt) {
|
|||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream& out,const FDDataType& fdt){
|
std::ostream& operator<<(std::ostream& out, const FDDataType& fdt) {
|
||||||
switch (fdt) {
|
switch (fdt) {
|
||||||
case FDDataType::BOOL:
|
case FDDataType::BOOL:
|
||||||
out << "FDDataType::BOOL";
|
out << "FDDataType::BOOL";
|
||||||
break;
|
break;
|
||||||
case FDDataType::INT16:
|
case FDDataType::INT16:
|
||||||
out << "FDDataType::INT16";
|
out << "FDDataType::INT16";
|
||||||
break;
|
break;
|
||||||
case FDDataType::INT32:
|
case FDDataType::INT32:
|
||||||
out << "FDDataType::INT32";
|
out << "FDDataType::INT32";
|
||||||
break;
|
break;
|
||||||
case FDDataType::INT64:
|
case FDDataType::INT64:
|
||||||
out << "FDDataType::INT64";
|
out << "FDDataType::INT64";
|
||||||
break;
|
break;
|
||||||
case FDDataType::FP32:
|
case FDDataType::FP32:
|
||||||
out << "FDDataType::FP32";
|
out << "FDDataType::FP32";
|
||||||
break;
|
break;
|
||||||
case FDDataType::FP64:
|
case FDDataType::FP64:
|
||||||
out << "FDDataType::FP64";
|
out << "FDDataType::FP64";
|
||||||
break;
|
break;
|
||||||
case FDDataType::FP16:
|
case FDDataType::FP16:
|
||||||
out << "FDDataType::FP16";
|
out << "FDDataType::FP16";
|
||||||
break;
|
break;
|
||||||
case FDDataType::UINT8:
|
case FDDataType::UINT8:
|
||||||
out << "FDDataType::UINT8";
|
out << "FDDataType::UINT8";
|
||||||
break;
|
break;
|
||||||
case FDDataType::INT8:
|
case FDDataType::INT8:
|
||||||
out << "FDDataType::INT8";
|
out << "FDDataType::INT8";
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
out << "FDDataType::UNKNOWN";
|
out << "FDDataType::UNKNOWN";
|
||||||
}
|
}
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
@@ -206,35 +142,4 @@ const FDDataType TypeToDataType<uint8_t>::dtype = UINT8;
|
|||||||
template <>
|
template <>
|
||||||
const FDDataType TypeToDataType<int8_t>::dtype = INT8;
|
const FDDataType TypeToDataType<int8_t>::dtype = INT8;
|
||||||
|
|
||||||
std::string Str(const ModelFormat& f) {
|
|
||||||
if (f == ModelFormat::PADDLE) {
|
|
||||||
return "ModelFormat::PADDLE";
|
|
||||||
} else if (f == ModelFormat::ONNX) {
|
|
||||||
return "ModelFormat::ONNX";
|
|
||||||
} else if (f == ModelFormat::RKNN) {
|
|
||||||
return "ModelFormat::RKNN";
|
|
||||||
} else if (f == ModelFormat::SOPHGO) {
|
|
||||||
return "ModelFormat::SOPHGO";
|
|
||||||
} else if (f == ModelFormat::TORCHSCRIPT) {
|
|
||||||
return "ModelFormat::TORCHSCRIPT";
|
|
||||||
}
|
|
||||||
return "UNKNOWN-ModelFormat";
|
|
||||||
}
|
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
|
|
||||||
if (format == ModelFormat::PADDLE) {
|
|
||||||
out << "ModelFormat::PADDLE";
|
|
||||||
} else if (format == ModelFormat::ONNX) {
|
|
||||||
out << "ModelFormat::ONNX";
|
|
||||||
} else if (format == ModelFormat::RKNN) {
|
|
||||||
out << "ModelFormat::RKNN";
|
|
||||||
} else if (format == ModelFormat::SOPHGO) {
|
|
||||||
out << "ModelFormat::SOPHGO";
|
|
||||||
} else if (format == ModelFormat::TORCHSCRIPT) {
|
|
||||||
out << "ModelFormat::TORCHSCRIPT";
|
|
||||||
}
|
|
||||||
out << "UNKNOWN-ModelFormat";
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -22,11 +22,6 @@
|
|||||||
|
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
|
|
||||||
enum FASTDEPLOY_DECL Device {CPU, GPU, RKNPU, IPU, TIMVX, KUNLUNXIN, ASCEND,
|
|
||||||
SOPHGOTPUD};
|
|
||||||
|
|
||||||
FASTDEPLOY_DECL std::string Str(const Device& d);
|
|
||||||
|
|
||||||
enum FASTDEPLOY_DECL FDDataType {
|
enum FASTDEPLOY_DECL FDDataType {
|
||||||
BOOL,
|
BOOL,
|
||||||
INT16,
|
INT16,
|
||||||
@@ -52,7 +47,6 @@ enum FASTDEPLOY_DECL FDDataType {
|
|||||||
INT8
|
INT8
|
||||||
};
|
};
|
||||||
|
|
||||||
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, const Device& d);
|
|
||||||
|
|
||||||
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
|
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
|
||||||
const FDDataType& fdt);
|
const FDDataType& fdt);
|
||||||
@@ -66,17 +60,4 @@ struct FASTDEPLOY_DECL TypeToDataType {
|
|||||||
static const FDDataType dtype;
|
static const FDDataType dtype;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*! Deep learning model format */
|
|
||||||
enum ModelFormat {
|
|
||||||
AUTOREC, ///< Auto recognize the model format by model file name
|
|
||||||
PADDLE, ///< Model with paddlepaddle format
|
|
||||||
ONNX, ///< Model with ONNX format
|
|
||||||
RKNN, ///< Model with RKNN format
|
|
||||||
TORCHSCRIPT, ///< Model with TorchScript format
|
|
||||||
SOPHGO, ///< Model with SOPHGO format
|
|
||||||
};
|
|
||||||
|
|
||||||
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
|
|
||||||
const ModelFormat& format);
|
|
||||||
|
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -121,9 +121,7 @@ class FASTDEPLOY_DECL FastDeployModel {
|
|||||||
std::vector<FDTensor>().swap(reused_output_tensors_);
|
std::vector<FDTensor>().swap(reused_output_tensors_);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual fastdeploy::Runtime* CloneRuntime() {
|
virtual fastdeploy::Runtime* CloneRuntime() { return runtime_->Clone(); }
|
||||||
return runtime_->Clone();
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual bool SetRuntime(fastdeploy::Runtime* clone_runtime) {
|
virtual bool SetRuntime(fastdeploy::Runtime* clone_runtime) {
|
||||||
runtime_ = std::unique_ptr<Runtime>(clone_runtime);
|
runtime_ = std::unique_ptr<Runtime>(clone_runtime);
|
||||||
|
@@ -21,7 +21,7 @@
|
|||||||
|
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
|
||||||
#include "fastdeploy/runtime.h"
|
#include "fastdeploy/runtime/runtime.h"
|
||||||
|
|
||||||
#ifdef ENABLE_VISION
|
#ifdef ENABLE_VISION
|
||||||
#include "fastdeploy/vision.h"
|
#include "fastdeploy/vision.h"
|
||||||
|
@@ -11,23 +11,27 @@
|
|||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h"
|
#include "fastdeploy/backends/rknpu2/option.h"
|
||||||
#include "fastdeploy/pybind/main.h"
|
#include "fastdeploy/pybind/main.h"
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
void BindRKNPU2Config(pybind11::module& m) {
|
void BindRKNPU2Config(pybind11::module& m) {
|
||||||
pybind11::enum_<fastdeploy::rknpu2::CpuName>(m, "CpuName", pybind11::arithmetic(),
|
pybind11::enum_<fastdeploy::rknpu2::CpuName>(
|
||||||
"CpuName for inference.")
|
m, "CpuName", pybind11::arithmetic(), "CpuName for inference.")
|
||||||
.value("RK356X", fastdeploy::rknpu2::CpuName::RK356X)
|
.value("RK356X", fastdeploy::rknpu2::CpuName::RK356X)
|
||||||
.value("RK3588", fastdeploy::rknpu2::CpuName::RK3588)
|
.value("RK3588", fastdeploy::rknpu2::CpuName::RK3588)
|
||||||
.value("UNDEFINED", fastdeploy::rknpu2::CpuName::UNDEFINED);
|
.value("UNDEFINED", fastdeploy::rknpu2::CpuName::UNDEFINED);
|
||||||
pybind11::enum_<fastdeploy::rknpu2::CoreMask>(m, "CoreMask", pybind11::arithmetic(),
|
pybind11::enum_<fastdeploy::rknpu2::CoreMask>(
|
||||||
"CoreMask for inference.")
|
m, "CoreMask", pybind11::arithmetic(), "CoreMask for inference.")
|
||||||
.value("RKNN_NPU_CORE_AUTO", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO)
|
.value("RKNN_NPU_CORE_AUTO",
|
||||||
|
fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO)
|
||||||
.value("RKNN_NPU_CORE_0", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0)
|
.value("RKNN_NPU_CORE_0", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0)
|
||||||
.value("RKNN_NPU_CORE_1", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_1)
|
.value("RKNN_NPU_CORE_1", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_1)
|
||||||
.value("RKNN_NPU_CORE_2", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_2)
|
.value("RKNN_NPU_CORE_2", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_2)
|
||||||
.value("RKNN_NPU_CORE_0_1", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0_1)
|
.value("RKNN_NPU_CORE_0_1",
|
||||||
.value("RKNN_NPU_CORE_0_1_2", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0_1_2)
|
fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0_1)
|
||||||
.value("RKNN_NPU_CORE_UNDEFINED", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_UNDEFINED);
|
.value("RKNN_NPU_CORE_0_1_2",
|
||||||
|
fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0_1_2)
|
||||||
|
.value("RKNN_NPU_CORE_UNDEFINED",
|
||||||
|
fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_UNDEFINED);
|
||||||
}
|
}
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
File diff suppressed because it is too large
Load Diff
@@ -19,573 +19,5 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
#include "fastdeploy/core/config.h"
|
||||||
#include <algorithm>
|
#include "fastdeploy/runtime/runtime.h"
|
||||||
#include <map>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "backends/rknpu/rknpu2/rknpu2_config.h"
|
|
||||||
#include "fastdeploy/backends/backend.h"
|
|
||||||
#include "fastdeploy/utils/perf.h"
|
|
||||||
|
|
||||||
/** \brief All C++ FastDeploy APIs are defined inside this namespace
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
namespace fastdeploy {
|
|
||||||
|
|
||||||
/*! Inference backend supported in FastDeploy */
|
|
||||||
enum Backend {
|
|
||||||
UNKNOWN, ///< Unknown inference backend
|
|
||||||
ORT, ///< ONNX Runtime, support Paddle/ONNX format model, CPU / Nvidia GPU
|
|
||||||
TRT, ///< TensorRT, support Paddle/ONNX format model, Nvidia GPU only
|
|
||||||
PDINFER, ///< Paddle Inference, support Paddle format model, CPU / Nvidia GPU
|
|
||||||
POROS, ///< Poros, support TorchScript format model, CPU / Nvidia GPU
|
|
||||||
OPENVINO, ///< Intel OpenVINO, support Paddle/ONNX format, CPU only
|
|
||||||
LITE, ///< Paddle Lite, support Paddle format model, ARM CPU only
|
|
||||||
RKNPU2, ///< RKNPU2, support RKNN format model, Rockchip NPU only
|
|
||||||
SOPHGOTPU, ///< SOPHGOTPU, support SOPHGO format model, Sophgo TPU only
|
|
||||||
};
|
|
||||||
|
|
||||||
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
|
|
||||||
const Backend& backend);
|
|
||||||
|
|
||||||
/*! Paddle Lite power mode for mobile device. */
|
|
||||||
enum LitePowerMode {
|
|
||||||
LITE_POWER_HIGH = 0, ///< Use Lite Backend with high power mode
|
|
||||||
LITE_POWER_LOW = 1, ///< Use Lite Backend with low power mode
|
|
||||||
LITE_POWER_FULL = 2, ///< Use Lite Backend with full power mode
|
|
||||||
LITE_POWER_NO_BIND = 3, ///< Use Lite Backend with no bind power mode
|
|
||||||
LITE_POWER_RAND_HIGH = 4, ///< Use Lite Backend with rand high mode
|
|
||||||
LITE_POWER_RAND_LOW = 5 ///< Use Lite Backend with rand low power mode
|
|
||||||
};
|
|
||||||
|
|
||||||
FASTDEPLOY_DECL std::string Str(const Backend& b);
|
|
||||||
FASTDEPLOY_DECL std::string Str(const ModelFormat& f);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Get all the available inference backend in FastDeploy
|
|
||||||
*/
|
|
||||||
FASTDEPLOY_DECL std::vector<Backend> GetAvailableBackends();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Check if the inference backend available
|
|
||||||
*/
|
|
||||||
FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend);
|
|
||||||
|
|
||||||
bool CheckModelFormat(const std::string& model_file,
|
|
||||||
const ModelFormat& model_format);
|
|
||||||
ModelFormat GuessModelFormat(const std::string& model_file);
|
|
||||||
|
|
||||||
/*! @brief Option object used when create a new Runtime object
|
|
||||||
*/
|
|
||||||
struct FASTDEPLOY_DECL RuntimeOption {
|
|
||||||
/** \brief Set path of model file and parameter file
|
|
||||||
*
|
|
||||||
* \param[in] model_path Path of model file, e.g ResNet50/model.pdmodel for Paddle format model / ResNet50/model.onnx for ONNX format model
|
|
||||||
* \param[in] params_path Path of parameter file, this only used when the model format is Paddle, e.g Resnet50/model.pdiparams
|
|
||||||
* \param[in] format Format of the loaded model
|
|
||||||
*/
|
|
||||||
void SetModelPath(const std::string& model_path,
|
|
||||||
const std::string& params_path = "",
|
|
||||||
const ModelFormat& format = ModelFormat::PADDLE);
|
|
||||||
|
|
||||||
/** \brief Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory
|
|
||||||
*
|
|
||||||
* \param[in] model_buffer The memory buffer of model
|
|
||||||
* \param[in] model_buffer_size The size of the model data
|
|
||||||
* \param[in] params_buffer The memory buffer of the combined parameters file
|
|
||||||
* \param[in] params_buffer_size The size of the combined parameters data
|
|
||||||
* \param[in] format Format of the loaded model
|
|
||||||
*/
|
|
||||||
void SetModelBuffer(const char * model_buffer,
|
|
||||||
size_t model_buffer_size,
|
|
||||||
const char * params_buffer,
|
|
||||||
size_t params_buffer_size,
|
|
||||||
const ModelFormat& format = ModelFormat::PADDLE);
|
|
||||||
|
|
||||||
/// Use cpu to inference, the runtime will inference on CPU by default
|
|
||||||
void UseCpu();
|
|
||||||
|
|
||||||
/// Use Nvidia GPU to inference
|
|
||||||
void UseGpu(int gpu_id = 0);
|
|
||||||
|
|
||||||
void UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name =
|
|
||||||
fastdeploy::rknpu2::CpuName::RK3588,
|
|
||||||
fastdeploy::rknpu2::CoreMask rknpu2_core =
|
|
||||||
fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0);
|
|
||||||
|
|
||||||
/// Use TimVX to inference
|
|
||||||
void UseTimVX();
|
|
||||||
|
|
||||||
/// Use Huawei Ascend to inference
|
|
||||||
void UseAscend();
|
|
||||||
|
|
||||||
///
|
|
||||||
/// \brief Turn on KunlunXin XPU.
|
|
||||||
///
|
|
||||||
/// \param kunlunxin_id the KunlunXin XPU card to use (default is 0).
|
|
||||||
/// \param l3_workspace_size The size of the video memory allocated by the l3
|
|
||||||
/// cache, the maximum is 16M.
|
|
||||||
/// \param locked Whether the allocated L3 cache can be locked. If false,
|
|
||||||
/// it means that the L3 cache is not locked, and the allocated L3
|
|
||||||
/// cache can be shared by multiple models, and multiple models
|
|
||||||
/// sharing the L3 cache will be executed sequentially on the card.
|
|
||||||
/// \param autotune Whether to autotune the conv operator in the model. If
|
|
||||||
/// true, when the conv operator of a certain dimension is executed
|
|
||||||
/// for the first time, it will automatically search for a better
|
|
||||||
/// algorithm to improve the performance of subsequent conv operators
|
|
||||||
/// of the same dimension.
|
|
||||||
/// \param autotune_file Specify the path of the autotune file. If
|
|
||||||
/// autotune_file is specified, the algorithm specified in the
|
|
||||||
/// file will be used and autotune will not be performed again.
|
|
||||||
/// \param precision Calculation accuracy of multi_encoder
|
|
||||||
/// \param adaptive_seqlen Is the input of multi_encoder variable length
|
|
||||||
/// \param enable_multi_stream Whether to enable the multi stream of
|
|
||||||
/// KunlunXin XPU.
|
|
||||||
///
|
|
||||||
void UseKunlunXin(int kunlunxin_id = 0,
|
|
||||||
int l3_workspace_size = 0xfffc00,
|
|
||||||
bool locked = false,
|
|
||||||
bool autotune = true,
|
|
||||||
const std::string& autotune_file = "",
|
|
||||||
const std::string& precision = "int16",
|
|
||||||
bool adaptive_seqlen = false,
|
|
||||||
bool enable_multi_stream = false);
|
|
||||||
|
|
||||||
/// Use Sophgo to inference
|
|
||||||
void UseSophgo();
|
|
||||||
|
|
||||||
void SetExternalStream(void* external_stream);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* @brief Set number of cpu threads while inference on CPU, by default it will decided by the different backends
|
|
||||||
*/
|
|
||||||
void SetCpuThreadNum(int thread_num);
|
|
||||||
|
|
||||||
/// Set ORT graph opt level, default is decide by ONNX Runtime itself
|
|
||||||
void SetOrtGraphOptLevel(int level = -1);
|
|
||||||
|
|
||||||
/// Set Paddle Inference as inference backend, support CPU/GPU
|
|
||||||
void UsePaddleBackend();
|
|
||||||
|
|
||||||
/// Wrapper function of UsePaddleBackend()
|
|
||||||
void UsePaddleInferBackend() { return UsePaddleBackend(); }
|
|
||||||
|
|
||||||
/// Set ONNX Runtime as inference backend, support CPU/GPU
|
|
||||||
void UseOrtBackend();
|
|
||||||
|
|
||||||
/// Set SOPHGO Runtime as inference backend, support CPU/GPU
|
|
||||||
void UseSophgoBackend();
|
|
||||||
|
|
||||||
/// Set TensorRT as inference backend, only support GPU
|
|
||||||
void UseTrtBackend();
|
|
||||||
|
|
||||||
/// Set Poros backend as inference backend, support CPU/GPU
|
|
||||||
void UsePorosBackend();
|
|
||||||
|
|
||||||
/// Set OpenVINO as inference backend, only support CPU
|
|
||||||
void UseOpenVINOBackend();
|
|
||||||
|
|
||||||
/// Set Paddle Lite as inference backend, only support arm cpu
|
|
||||||
void UseLiteBackend();
|
|
||||||
|
|
||||||
/// Wrapper function of UseLiteBackend()
|
|
||||||
void UsePaddleLiteBackend() { return UseLiteBackend(); }
|
|
||||||
|
|
||||||
/// Set mkldnn switch while using Paddle Inference as inference backend
|
|
||||||
void SetPaddleMKLDNN(bool pd_mkldnn = true);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* @brief If TensorRT backend is used, EnablePaddleToTrt will change to use Paddle Inference backend, and use its integrated TensorRT instead.
|
|
||||||
*/
|
|
||||||
void EnablePaddleToTrt();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Delete pass by name while using Paddle Inference as inference backend, this can be called multiple times to delete a set of passes
|
|
||||||
*/
|
|
||||||
void DeletePaddleBackendPass(const std::string& delete_pass_name);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Enable print debug information while using Paddle Inference as inference backend, the backend disable the debug information by default
|
|
||||||
*/
|
|
||||||
void EnablePaddleLogInfo();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Disable print debug information while using Paddle Inference as inference backend
|
|
||||||
*/
|
|
||||||
void DisablePaddleLogInfo();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Set shape cache size while using Paddle Inference with mkldnn, by default it will cache all the difference shape
|
|
||||||
*/
|
|
||||||
void SetPaddleMKLDNNCacheSize(int size);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Set device name for OpenVINO, default 'CPU', can also be 'AUTO', 'GPU', 'GPU.1'....
|
|
||||||
*/
|
|
||||||
void SetOpenVINODevice(const std::string& name = "CPU");
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Set shape info for OpenVINO
|
|
||||||
*/
|
|
||||||
void SetOpenVINOShapeInfo(
|
|
||||||
const std::map<std::string, std::vector<int64_t>>& shape_info) {
|
|
||||||
ov_shape_infos = shape_info;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief While use OpenVINO backend with intel GPU, use this interface to specify operators run on CPU
|
|
||||||
*/
|
|
||||||
void SetOpenVINOCpuOperators(const std::vector<std::string>& operators) {
|
|
||||||
ov_cpu_operators = operators;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Set optimzed model dir for Paddle Lite backend.
|
|
||||||
*/
|
|
||||||
void SetLiteOptimizedModelDir(const std::string& optimized_model_dir);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Set subgraph partition path for Paddle Lite backend.
|
|
||||||
*/
|
|
||||||
void SetLiteSubgraphPartitionPath(
|
|
||||||
const std::string& nnadapter_subgraph_partition_config_path);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Set subgraph partition path for Paddle Lite backend.
|
|
||||||
*/
|
|
||||||
void SetLiteSubgraphPartitionConfigBuffer(
|
|
||||||
const std::string& nnadapter_subgraph_partition_config_buffer);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Set device name for Paddle Lite backend.
|
|
||||||
*/
|
|
||||||
void SetLiteDeviceNames(
|
|
||||||
const std::vector<std::string>& nnadapter_device_names);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Set context properties for Paddle Lite backend.
|
|
||||||
*/
|
|
||||||
void SetLiteContextProperties(
|
|
||||||
const std::string& nnadapter_context_properties);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Set model cache dir for Paddle Lite backend.
|
|
||||||
*/
|
|
||||||
void SetLiteModelCacheDir(
|
|
||||||
const std::string& nnadapter_model_cache_dir);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Set dynamic shape info for Paddle Lite backend.
|
|
||||||
*/
|
|
||||||
void SetLiteDynamicShapeInfo(
|
|
||||||
const std::map<std::string, std::vector<std::vector<int64_t>>>&
|
|
||||||
nnadapter_dynamic_shape_info);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Set mixed precision quantization config path for Paddle Lite backend.
|
|
||||||
*/
|
|
||||||
void SetLiteMixedPrecisionQuantizationConfigPath(
|
|
||||||
const std::string& nnadapter_mixed_precision_quantization_config_path);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief enable half precision while use paddle lite backend
|
|
||||||
*/
|
|
||||||
void EnableLiteFP16();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief disable half precision, change to full precision(float32)
|
|
||||||
*/
|
|
||||||
void DisableLiteFP16();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief enable int8 precision while use paddle lite backend
|
|
||||||
*/
|
|
||||||
void EnableLiteInt8();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief disable int8 precision, change to full precision(float32)
|
|
||||||
*/
|
|
||||||
void DisableLiteInt8();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Set power mode while using Paddle Lite as inference backend, mode(0: LITE_POWER_HIGH; 1: LITE_POWER_LOW; 2: LITE_POWER_FULL; 3: LITE_POWER_NO_BIND, 4: LITE_POWER_RAND_HIGH; 5: LITE_POWER_RAND_LOW, refer [paddle lite](https://paddle-lite.readthedocs.io/zh/latest/api_reference/cxx_api_doc.html#set-power-mode) for more details)
|
|
||||||
*/
|
|
||||||
void SetLitePowerMode(LitePowerMode mode);
|
|
||||||
|
|
||||||
/** \brief Set shape range of input tensor for the model that contain dynamic input shape while using TensorRT backend
|
|
||||||
*
|
|
||||||
* \param[in] input_name The name of input for the model which is dynamic shape
|
|
||||||
* \param[in] min_shape The minimal shape for the input tensor
|
|
||||||
* \param[in] opt_shape The optimized shape for the input tensor, just set the most common shape, if set as default value, it will keep same with min_shape
|
|
||||||
* \param[in] max_shape The maximum shape for the input tensor, if set as default value, it will keep same with min_shape
|
|
||||||
*/
|
|
||||||
void SetTrtInputShape(
|
|
||||||
const std::string& input_name, const std::vector<int32_t>& min_shape,
|
|
||||||
const std::vector<int32_t>& opt_shape = std::vector<int32_t>(),
|
|
||||||
const std::vector<int32_t>& max_shape = std::vector<int32_t>());
|
|
||||||
|
|
||||||
/// Set max_workspace_size for TensorRT, default 1<<30
|
|
||||||
void SetTrtMaxWorkspaceSize(size_t trt_max_workspace_size);
|
|
||||||
|
|
||||||
/// Set max_batch_size for TensorRT, default 32
|
|
||||||
void SetTrtMaxBatchSize(size_t max_batch_size);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Enable FP16 inference while using TensorRT backend. Notice: not all the GPU device support FP16, on those device doesn't support FP16, FastDeploy will fallback to FP32 automaticly
|
|
||||||
*/
|
|
||||||
void EnableTrtFP16();
|
|
||||||
|
|
||||||
/// Disable FP16 inference while using TensorRT backend
|
|
||||||
void DisableTrtFP16();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Set cache file path while use TensorRT backend. Loadding a Paddle/ONNX model and initialize TensorRT will take a long time, by this interface it will save the tensorrt engine to `cache_file_path`, and load it directly while execute the code again
|
|
||||||
*/
|
|
||||||
void SetTrtCacheFile(const std::string& cache_file_path);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Enable pinned memory. Pinned memory can be utilized to speedup the data transfer between CPU and GPU. Currently it's only suppurted in TRT backend and Paddle Inference backend.
|
|
||||||
*/
|
|
||||||
void EnablePinnedMemory();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Disable pinned memory
|
|
||||||
*/
|
|
||||||
void DisablePinnedMemory();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Enable to collect shape in paddle trt backend
|
|
||||||
*/
|
|
||||||
void EnablePaddleTrtCollectShape();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Disable to collect shape in paddle trt backend
|
|
||||||
*/
|
|
||||||
void DisablePaddleTrtCollectShape();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Prevent ops running in paddle trt backend
|
|
||||||
*/
|
|
||||||
void DisablePaddleTrtOPs(const std::vector<std::string>& ops);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* @brief Set number of streams by the OpenVINO backends
|
|
||||||
*/
|
|
||||||
void SetOpenVINOStreams(int num_streams);
|
|
||||||
|
|
||||||
/** \Use Graphcore IPU to inference.
|
|
||||||
*
|
|
||||||
* \param[in] device_num the number of IPUs.
|
|
||||||
* \param[in] micro_batch_size the batch size in the graph, only work when graph has no batch shape info.
|
|
||||||
* \param[in] enable_pipelining enable pipelining.
|
|
||||||
* \param[in] batches_per_step the number of batches per run in pipelining.
|
|
||||||
*/
|
|
||||||
void UseIpu(int device_num = 1, int micro_batch_size = 1,
|
|
||||||
bool enable_pipelining = false, int batches_per_step = 1);
|
|
||||||
|
|
||||||
/** \brief Set IPU config.
|
|
||||||
*
|
|
||||||
* \param[in] enable_fp16 enable fp16.
|
|
||||||
* \param[in] replica_num the number of graph replication.
|
|
||||||
* \param[in] available_memory_proportion the available memory proportion for matmul/conv.
|
|
||||||
* \param[in] enable_half_partial enable fp16 partial for matmul, only work with fp16.
|
|
||||||
*/
|
|
||||||
void SetIpuConfig(bool enable_fp16 = false, int replica_num = 1,
|
|
||||||
float available_memory_proportion = 1.0,
|
|
||||||
bool enable_half_partial = false);
|
|
||||||
|
|
||||||
Backend backend = Backend::UNKNOWN;
|
|
||||||
// for cpu inference and preprocess
|
|
||||||
// default will let the backend choose their own default value
|
|
||||||
int cpu_thread_num = -1;
|
|
||||||
int device_id = 0;
|
|
||||||
|
|
||||||
Device device = Device::CPU;
|
|
||||||
|
|
||||||
void* external_stream_ = nullptr;
|
|
||||||
|
|
||||||
bool enable_pinned_memory = false;
|
|
||||||
|
|
||||||
// ======Only for ORT Backend========
|
|
||||||
// -1 means use default value by ort
|
|
||||||
// 0: ORT_DISABLE_ALL 1: ORT_ENABLE_BASIC 2: ORT_ENABLE_EXTENDED 3:
|
|
||||||
// ORT_ENABLE_ALL
|
|
||||||
int ort_graph_opt_level = -1;
|
|
||||||
int ort_inter_op_num_threads = -1;
|
|
||||||
// 0: ORT_SEQUENTIAL 1: ORT_PARALLEL
|
|
||||||
int ort_execution_mode = -1;
|
|
||||||
|
|
||||||
// ======Only for Paddle Backend=====
|
|
||||||
bool pd_enable_mkldnn = true;
|
|
||||||
bool pd_enable_log_info = false;
|
|
||||||
bool pd_enable_trt = false;
|
|
||||||
bool pd_collect_shape = false;
|
|
||||||
int pd_mkldnn_cache_size = 1;
|
|
||||||
std::vector<std::string> pd_delete_pass_names;
|
|
||||||
|
|
||||||
// ======Only for Paddle IPU Backend =======
|
|
||||||
int ipu_device_num = 1;
|
|
||||||
int ipu_micro_batch_size = 1;
|
|
||||||
bool ipu_enable_pipelining = false;
|
|
||||||
int ipu_batches_per_step = 1;
|
|
||||||
bool ipu_enable_fp16 = false;
|
|
||||||
int ipu_replica_num = 1;
|
|
||||||
float ipu_available_memory_proportion = 1.0;
|
|
||||||
bool ipu_enable_half_partial = false;
|
|
||||||
|
|
||||||
// ======Only for Paddle Lite Backend=====
|
|
||||||
// 0: LITE_POWER_HIGH 1: LITE_POWER_LOW 2: LITE_POWER_FULL
|
|
||||||
// 3: LITE_POWER_NO_BIND 4: LITE_POWER_RAND_HIGH
|
|
||||||
// 5: LITE_POWER_RAND_LOW
|
|
||||||
LitePowerMode lite_power_mode = LitePowerMode::LITE_POWER_NO_BIND;
|
|
||||||
// enable int8 or not
|
|
||||||
bool lite_enable_int8 = false;
|
|
||||||
// enable fp16 or not
|
|
||||||
bool lite_enable_fp16 = false;
|
|
||||||
// optimized model dir for CxxConfig
|
|
||||||
std::string lite_optimized_model_dir = "";
|
|
||||||
std::string lite_nnadapter_subgraph_partition_config_path = "";
|
|
||||||
// and other nnadapter settings for CxxConfig
|
|
||||||
std::string lite_nnadapter_subgraph_partition_config_buffer = "";
|
|
||||||
std::string lite_nnadapter_context_properties = "";
|
|
||||||
std::string lite_nnadapter_model_cache_dir = "";
|
|
||||||
std::string lite_nnadapter_mixed_precision_quantization_config_path = "";
|
|
||||||
std::map<std::string, std::vector<std::vector<int64_t>>>
|
|
||||||
lite_nnadapter_dynamic_shape_info = {{"", {{0}}}};
|
|
||||||
std::vector<std::string> lite_nnadapter_device_names = {};
|
|
||||||
|
|
||||||
bool enable_timvx = false;
|
|
||||||
bool enable_ascend = false;
|
|
||||||
bool enable_kunlunxin = false;
|
|
||||||
|
|
||||||
// ======Only for Trt Backend=======
|
|
||||||
std::map<std::string, std::vector<int32_t>> trt_max_shape;
|
|
||||||
std::map<std::string, std::vector<int32_t>> trt_min_shape;
|
|
||||||
std::map<std::string, std::vector<int32_t>> trt_opt_shape;
|
|
||||||
std::string trt_serialize_file = "";
|
|
||||||
bool trt_enable_fp16 = false;
|
|
||||||
bool trt_enable_int8 = false;
|
|
||||||
size_t trt_max_batch_size = 1;
|
|
||||||
size_t trt_max_workspace_size = 1 << 30;
|
|
||||||
// ======Only for PaddleTrt Backend=======
|
|
||||||
std::vector<std::string> trt_disabled_ops_{};
|
|
||||||
|
|
||||||
// ======Only for Poros Backend=======
|
|
||||||
bool is_dynamic = false;
|
|
||||||
bool long_to_int = true;
|
|
||||||
bool use_nvidia_tf32 = false;
|
|
||||||
int unconst_ops_thres = -1;
|
|
||||||
std::string poros_file = "";
|
|
||||||
|
|
||||||
// ======Only for OpenVINO Backend=======
|
|
||||||
int ov_num_streams = 0;
|
|
||||||
std::string openvino_device = "CPU";
|
|
||||||
std::map<std::string, std::vector<int64_t>> ov_shape_infos;
|
|
||||||
std::vector<std::string> ov_cpu_operators;
|
|
||||||
|
|
||||||
// ======Only for RKNPU2 Backend=======
|
|
||||||
fastdeploy::rknpu2::CpuName rknpu2_cpu_name_ =
|
|
||||||
fastdeploy::rknpu2::CpuName::RK3588;
|
|
||||||
fastdeploy::rknpu2::CoreMask rknpu2_core_mask_ =
|
|
||||||
fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO;
|
|
||||||
|
|
||||||
// ======Only for KunlunXin XPU Backend=======
|
|
||||||
int kunlunxin_l3_workspace_size = 0xfffc00;
|
|
||||||
bool kunlunxin_locked = false;
|
|
||||||
bool kunlunxin_autotune = true;
|
|
||||||
std::string kunlunxin_autotune_file = "";
|
|
||||||
std::string kunlunxin_precision = "int16";
|
|
||||||
bool kunlunxin_adaptive_seqlen = false;
|
|
||||||
bool kunlunxin_enable_multi_stream = false;
|
|
||||||
|
|
||||||
std::string model_file = ""; // Path of model file
|
|
||||||
std::string params_file = ""; // Path of parameters file, can be empty
|
|
||||||
// format of input model
|
|
||||||
ModelFormat model_format = ModelFormat::AUTOREC;
|
|
||||||
|
|
||||||
std::string model_buffer_ = "";
|
|
||||||
std::string params_buffer_ = "";
|
|
||||||
size_t model_buffer_size_ = 0;
|
|
||||||
size_t params_buffer_size_ = 0;
|
|
||||||
bool model_from_memory_ = false;
|
|
||||||
};
|
|
||||||
|
|
||||||
/*! @brief Runtime object used to inference the loaded model on different devices
|
|
||||||
*/
|
|
||||||
struct FASTDEPLOY_DECL Runtime {
|
|
||||||
public:
|
|
||||||
/// Intialize a Runtime object with RuntimeOption
|
|
||||||
bool Init(const RuntimeOption& _option);
|
|
||||||
|
|
||||||
/** \brief Inference the model by the input data, and write to the output
|
|
||||||
*
|
|
||||||
* \param[in] input_tensors Notice the FDTensor::name should keep same with the model's input
|
|
||||||
* \param[in] output_tensors Inference results
|
|
||||||
* \return true if the inference successed, otherwise false
|
|
||||||
*/
|
|
||||||
bool Infer(std::vector<FDTensor>& input_tensors,
|
|
||||||
std::vector<FDTensor>* output_tensors);
|
|
||||||
|
|
||||||
/** \brief No params inference the model.
|
|
||||||
*
|
|
||||||
* the input and output data need to pass through the BindInputTensor and GetOutputTensor interfaces.
|
|
||||||
*/
|
|
||||||
bool Infer();
|
|
||||||
|
|
||||||
/** \brief Compile TorchScript Module, only for Poros backend
|
|
||||||
*
|
|
||||||
* \param[in] prewarm_tensors Prewarm datas for compile
|
|
||||||
* \param[in] _option Runtime option
|
|
||||||
* \return true if compile successed, otherwise false
|
|
||||||
*/
|
|
||||||
bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
|
|
||||||
const RuntimeOption& _option);
|
|
||||||
|
|
||||||
/** \brief Get number of inputs
|
|
||||||
*/
|
|
||||||
int NumInputs() { return backend_->NumInputs(); }
|
|
||||||
/** \brief Get number of outputs
|
|
||||||
*/
|
|
||||||
int NumOutputs() { return backend_->NumOutputs(); }
|
|
||||||
/** \brief Get input information by index
|
|
||||||
*/
|
|
||||||
TensorInfo GetInputInfo(int index);
|
|
||||||
/** \brief Get output information by index
|
|
||||||
*/
|
|
||||||
TensorInfo GetOutputInfo(int index);
|
|
||||||
/** \brief Get all the input information
|
|
||||||
*/
|
|
||||||
std::vector<TensorInfo> GetInputInfos();
|
|
||||||
/** \brief Get all the output information
|
|
||||||
*/
|
|
||||||
std::vector<TensorInfo> GetOutputInfos();
|
|
||||||
/** \brief Bind FDTensor by name, no copy and share input memory
|
|
||||||
*/
|
|
||||||
void BindInputTensor(const std::string& name, FDTensor& input);
|
|
||||||
/** \brief Get output FDTensor by name, no copy and share backend output memory
|
|
||||||
*/
|
|
||||||
FDTensor* GetOutputTensor(const std::string& name);
|
|
||||||
|
|
||||||
/** \brief Clone new Runtime when multiple instances of the same model are created
|
|
||||||
*
|
|
||||||
* \param[in] stream CUDA Stream, defualt param is nullptr
|
|
||||||
* \return new Runtime* by this clone
|
|
||||||
*/
|
|
||||||
Runtime* Clone(void* stream = nullptr, int device_id = -1);
|
|
||||||
|
|
||||||
RuntimeOption option;
|
|
||||||
|
|
||||||
private:
|
|
||||||
void CreateOrtBackend();
|
|
||||||
void CreatePaddleBackend();
|
|
||||||
void CreateTrtBackend();
|
|
||||||
void CreateOpenVINOBackend();
|
|
||||||
void CreateLiteBackend();
|
|
||||||
void CreateRKNPU2Backend();
|
|
||||||
void CreateSophgoNPUBackend();
|
|
||||||
std::unique_ptr<BaseBackend> backend_;
|
|
||||||
std::vector<FDTensor> input_tensors_;
|
|
||||||
std::vector<FDTensor> output_tensors_;
|
|
||||||
};
|
|
||||||
} // namespace fastdeploy
|
|
||||||
|
85
fastdeploy/runtime/enum_variables.cc
Normal file
85
fastdeploy/runtime/enum_variables.cc
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "fastdeploy/runtime/enum_variables.h"
|
||||||
|
|
||||||
|
namespace fastdeploy {
|
||||||
|
std::ostream& operator<<(std::ostream& out, const Backend& backend) {
|
||||||
|
if (backend == Backend::ORT) {
|
||||||
|
out << "Backend::ORT";
|
||||||
|
} else if (backend == Backend::TRT) {
|
||||||
|
out << "Backend::TRT";
|
||||||
|
} else if (backend == Backend::PDINFER) {
|
||||||
|
out << "Backend::PDINFER";
|
||||||
|
} else if (backend == Backend::OPENVINO) {
|
||||||
|
out << "Backend::OPENVINO";
|
||||||
|
} else if (backend == Backend::RKNPU2) {
|
||||||
|
out << "Backend::RKNPU2";
|
||||||
|
} else if (backend == Backend::SOPHGOTPU) {
|
||||||
|
out << "Backend::SOPHGOTPU";
|
||||||
|
} else if (backend == Backend::POROS) {
|
||||||
|
out << "Backend::POROS";
|
||||||
|
} else if (backend == Backend::LITE) {
|
||||||
|
out << "Backend::PDLITE";
|
||||||
|
} else {
|
||||||
|
out << "UNKNOWN-Backend";
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ostream& operator<<(std::ostream& out, const Device& d) {
|
||||||
|
switch (d) {
|
||||||
|
case Device::CPU:
|
||||||
|
out << "Device::CPU";
|
||||||
|
break;
|
||||||
|
case Device::GPU:
|
||||||
|
out << "Device::GPU";
|
||||||
|
break;
|
||||||
|
case Device::RKNPU:
|
||||||
|
out << "Device::RKNPU";
|
||||||
|
break;
|
||||||
|
case Device::SOPHGOTPUD:
|
||||||
|
out << "Device::SOPHGOTPUD";
|
||||||
|
break;
|
||||||
|
case Device::TIMVX:
|
||||||
|
out << "Device::TIMVX";
|
||||||
|
break;
|
||||||
|
case Device::KUNLUNXIN:
|
||||||
|
out << "Device::KUNLUNXIN";
|
||||||
|
break;
|
||||||
|
case Device::ASCEND:
|
||||||
|
out << "Device::ASCEND";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
out << "Device::UNKOWN";
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
|
||||||
|
if (format == ModelFormat::PADDLE) {
|
||||||
|
out << "ModelFormat::PADDLE";
|
||||||
|
} else if (format == ModelFormat::ONNX) {
|
||||||
|
out << "ModelFormat::ONNX";
|
||||||
|
} else if (format == ModelFormat::RKNN) {
|
||||||
|
out << "ModelFormat::RKNN";
|
||||||
|
} else if (format == ModelFormat::SOPHGO) {
|
||||||
|
out << "ModelFormat::SOPHGO";
|
||||||
|
} else if (format == ModelFormat::TORCHSCRIPT) {
|
||||||
|
out << "ModelFormat::TORCHSCRIPT";
|
||||||
|
}
|
||||||
|
out << "UNKNOWN-ModelFormat";
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
} // namespace fastdeploy
|
79
fastdeploy/runtime/enum_variables.h
Normal file
79
fastdeploy/runtime/enum_variables.h
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
/*! \file enum_variables.h
|
||||||
|
\brief A brief file description.
|
||||||
|
|
||||||
|
More details
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#include "fastdeploy/utils/utils.h"
|
||||||
|
#include <ostream>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
namespace fastdeploy {
|
||||||
|
|
||||||
|
/*! Inference backend supported in FastDeploy */
|
||||||
|
enum Backend {
|
||||||
|
UNKNOWN, ///< Unknown inference backend
|
||||||
|
ORT, ///< ONNX Runtime, support Paddle/ONNX format model, CPU / Nvidia GPU
|
||||||
|
TRT, ///< TensorRT, support Paddle/ONNX format model, Nvidia GPU only
|
||||||
|
PDINFER, ///< Paddle Inference, support Paddle format model, CPU / Nvidia GPU
|
||||||
|
POROS, ///< Poros, support TorchScript format model, CPU / Nvidia GPU
|
||||||
|
OPENVINO, ///< Intel OpenVINO, support Paddle/ONNX format, CPU only
|
||||||
|
LITE, ///< Paddle Lite, support Paddle format model, ARM CPU only
|
||||||
|
RKNPU2, ///< RKNPU2, support RKNN format model, Rockchip NPU only
|
||||||
|
SOPHGOTPU, ///< SOPHGOTPU, support SOPHGO format model, Sophgo TPU only
|
||||||
|
};
|
||||||
|
|
||||||
|
enum FASTDEPLOY_DECL Device {
|
||||||
|
CPU,
|
||||||
|
GPU,
|
||||||
|
RKNPU,
|
||||||
|
IPU,
|
||||||
|
TIMVX,
|
||||||
|
KUNLUNXIN,
|
||||||
|
ASCEND,
|
||||||
|
SOPHGOTPUD
|
||||||
|
};
|
||||||
|
|
||||||
|
/*! Deep learning model format */
|
||||||
|
enum ModelFormat {
|
||||||
|
AUTOREC, ///< Auto recognize the model format by model file name
|
||||||
|
PADDLE, ///< Model with paddlepaddle format
|
||||||
|
ONNX, ///< Model with ONNX format
|
||||||
|
RKNN, ///< Model with RKNN format
|
||||||
|
TORCHSCRIPT, ///< Model with TorchScript format
|
||||||
|
SOPHGO, ///< Model with SOPHGO format
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Describle all the supported backends for specified model format
|
||||||
|
static std::map<ModelFormat, std::vector<Backend>> s_default_backends_cfg = {
|
||||||
|
{ModelFormat::PADDLE, {Backend::PDINFER, Backend::LITE,
|
||||||
|
Backend::ORT, Backend::OPENVINO, Backend::TRT}},
|
||||||
|
{ModelFormat::ONNX, {Backend::ORT, Backend::OPENVINO, Backend::TRT}},
|
||||||
|
{ModelFormat::RKNN, {Backend::RKNPU2}},
|
||||||
|
{ModelFormat::TORCHSCRIPT, {Backend::POROS}},
|
||||||
|
{ModelFormat::SOPHGO, {Backend::SOPHGOTPU}}
|
||||||
|
};
|
||||||
|
|
||||||
|
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, const Backend& b);
|
||||||
|
|
||||||
|
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, const Device& d);
|
||||||
|
|
||||||
|
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
|
||||||
|
const ModelFormat& f);
|
||||||
|
|
||||||
|
} // namespace fastdeploy
|
492
fastdeploy/runtime/runtime.cc
Normal file
492
fastdeploy/runtime/runtime.cc
Normal file
@@ -0,0 +1,492 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "fastdeploy/runtime/runtime.h"
|
||||||
|
|
||||||
|
#include "fastdeploy/utils/unique_ptr.h"
|
||||||
|
#include "fastdeploy/utils/utils.h"
|
||||||
|
|
||||||
|
#ifdef ENABLE_ORT_BACKEND
|
||||||
|
#include "fastdeploy/backends/ort/ort_backend.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef ENABLE_TRT_BACKEND
|
||||||
|
#include "fastdeploy/backends/tensorrt/trt_backend.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef ENABLE_PADDLE_BACKEND
|
||||||
|
#include "fastdeploy/backends/paddle/paddle_backend.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef ENABLE_POROS_BACKEND
|
||||||
|
#include "fastdeploy/backends/poros/poros_backend.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef ENABLE_OPENVINO_BACKEND
|
||||||
|
#include "fastdeploy/backends/openvino/ov_backend.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef ENABLE_LITE_BACKEND
|
||||||
|
#include "fastdeploy/backends/lite/lite_backend.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef ENABLE_RKNPU2_BACKEND
|
||||||
|
#include "fastdeploy/backends/rknpu2/rknpu2_backend.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef ENABLE_SOPHGO_BACKEND
|
||||||
|
#include "fastdeploy/backends/sophgo/sophgo_backend.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace fastdeploy {
|
||||||
|
|
||||||
|
bool Runtime::Init(const RuntimeOption& _option) {
|
||||||
|
option = _option;
|
||||||
|
// Choose default backend by model format
|
||||||
|
if (option.backend == Backend::UNKNOWN) {
|
||||||
|
auto iter = s_default_backends_cfg.find(option.model_format);
|
||||||
|
if (iter == s_default_backends_cfg.end()) {
|
||||||
|
FDERROR << "Cannot found a default backend for model format: "
|
||||||
|
<< option.model_format
|
||||||
|
<< ", please define the inference backend in RuntimeOption."
|
||||||
|
<< std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
for (const auto& b : iter->second) {
|
||||||
|
if (IsBackendAvailable(b)) {
|
||||||
|
option.backend = b;
|
||||||
|
FDINFO << "FastDeploy will choose " << b << " to inference this model."
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (option.backend == Backend::UNKNOWN) {
|
||||||
|
FDERROR << "Cannot found available backends for model format: "
|
||||||
|
<< option.model_format << "." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (option.backend == Backend::ORT) {
|
||||||
|
FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
|
||||||
|
"Backend::ORT only supports Device::CPU/Device::GPU.");
|
||||||
|
CreateOrtBackend();
|
||||||
|
FDINFO << "Runtime initialized with Backend::ORT in " << option.device
|
||||||
|
<< "." << std::endl;
|
||||||
|
} else if (option.backend == Backend::TRT) {
|
||||||
|
FDASSERT(option.device == Device::GPU,
|
||||||
|
"Backend::TRT only supports Device::GPU.");
|
||||||
|
CreateTrtBackend();
|
||||||
|
FDINFO << "Runtime initialized with Backend::TRT in " << option.device
|
||||||
|
<< "." << std::endl;
|
||||||
|
} else if (option.backend == Backend::PDINFER) {
|
||||||
|
FDASSERT(
|
||||||
|
option.device == Device::CPU || option.device == Device::GPU ||
|
||||||
|
option.device == Device::IPU,
|
||||||
|
"Backend::PDINFER only supports Device::CPU/Device::GPU/Device::IPU.");
|
||||||
|
FDASSERT(
|
||||||
|
option.model_format == ModelFormat::PADDLE,
|
||||||
|
"Backend::PDINFER only supports model format of ModelFormat::PADDLE.");
|
||||||
|
CreatePaddleBackend();
|
||||||
|
FDINFO << "Runtime initialized with Backend::PDINFER in " << option.device
|
||||||
|
<< "." << std::endl;
|
||||||
|
} else if (option.backend == Backend::POROS) {
|
||||||
|
FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
|
||||||
|
"Backend::POROS only supports Device::CPU/Device::GPU.");
|
||||||
|
FDASSERT(option.model_format == ModelFormat::TORCHSCRIPT,
|
||||||
|
"Backend::POROS only supports model format of "
|
||||||
|
"ModelFormat::TORCHSCRIPT.");
|
||||||
|
FDINFO << "Runtime initialized with Backend::POROS in " << option.device
|
||||||
|
<< "." << std::endl;
|
||||||
|
return true;
|
||||||
|
} else if (option.backend == Backend::OPENVINO) {
|
||||||
|
FDASSERT(option.device == Device::CPU,
|
||||||
|
"Backend::OPENVINO only supports Device::CPU");
|
||||||
|
CreateOpenVINOBackend();
|
||||||
|
FDINFO << "Runtime initialized with Backend::OPENVINO in " << option.device
|
||||||
|
<< "." << std::endl;
|
||||||
|
} else if (option.backend == Backend::LITE) {
|
||||||
|
FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX ||
|
||||||
|
option.device == Device::KUNLUNXIN ||
|
||||||
|
option.device == Device::ASCEND,
|
||||||
|
"Backend::LITE only supports "
|
||||||
|
"Device::CPU/Device::TIMVX/Device::KUNLUNXIN.");
|
||||||
|
CreateLiteBackend();
|
||||||
|
FDINFO << "Runtime initialized with Backend::LITE in " << option.device
|
||||||
|
<< "." << std::endl;
|
||||||
|
} else if (option.backend == Backend::RKNPU2) {
|
||||||
|
FDASSERT(option.device == Device::RKNPU,
|
||||||
|
"Backend::RKNPU2 only supports Device::RKNPU2");
|
||||||
|
CreateRKNPU2Backend();
|
||||||
|
|
||||||
|
FDINFO << "Runtime initialized with Backend::RKNPU2 in " << option.device
|
||||||
|
<< "." << std::endl;
|
||||||
|
} else if (option.backend == Backend::SOPHGOTPU) {
|
||||||
|
FDASSERT(option.device == Device::SOPHGOTPUD,
|
||||||
|
"Backend::SOPHGO only supports Device::SOPHGO");
|
||||||
|
CreateSophgoNPUBackend();
|
||||||
|
|
||||||
|
FDINFO << "Runtime initialized with Backend::SOPHGO in " << option.device
|
||||||
|
<< "." << std::endl;
|
||||||
|
} else {
|
||||||
|
FDERROR << "Runtime only support "
|
||||||
|
"Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as "
|
||||||
|
"backend now."
|
||||||
|
<< std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
TensorInfo Runtime::GetInputInfo(int index) {
|
||||||
|
return backend_->GetInputInfo(index);
|
||||||
|
}
|
||||||
|
|
||||||
|
TensorInfo Runtime::GetOutputInfo(int index) {
|
||||||
|
return backend_->GetOutputInfo(index);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<TensorInfo> Runtime::GetInputInfos() {
|
||||||
|
return backend_->GetInputInfos();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<TensorInfo> Runtime::GetOutputInfos() {
|
||||||
|
return backend_->GetOutputInfos();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Runtime::Infer(std::vector<FDTensor>& input_tensors,
|
||||||
|
std::vector<FDTensor>* output_tensors) {
|
||||||
|
for (auto& tensor : input_tensors) {
|
||||||
|
FDASSERT(tensor.device_id < 0 || tensor.device_id == option.device_id,
|
||||||
|
"Device id of input tensor(%d) and runtime(%d) are not same.",
|
||||||
|
tensor.device_id, option.device_id);
|
||||||
|
}
|
||||||
|
return backend_->Infer(input_tensors, output_tensors);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Runtime::Infer() {
|
||||||
|
bool result = backend_->Infer(input_tensors_, &output_tensors_, false);
|
||||||
|
for (auto& tensor : output_tensors_) {
|
||||||
|
tensor.device_id = option.device_id;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Runtime::BindInputTensor(const std::string& name, FDTensor& input) {
|
||||||
|
bool is_exist = false;
|
||||||
|
for (auto& t : input_tensors_) {
|
||||||
|
if (t.name == name) {
|
||||||
|
is_exist = true;
|
||||||
|
t.SetExternalData(input.shape, input.dtype, input.MutableData(),
|
||||||
|
input.device, input.device_id);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!is_exist) {
|
||||||
|
FDTensor new_tensor(name);
|
||||||
|
new_tensor.SetExternalData(input.shape, input.dtype, input.MutableData(),
|
||||||
|
input.device, input.device_id);
|
||||||
|
input_tensors_.emplace_back(std::move(new_tensor));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
FDTensor* Runtime::GetOutputTensor(const std::string& name) {
|
||||||
|
for (auto& t : output_tensors_) {
|
||||||
|
if (t.name == name) {
|
||||||
|
return &t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FDWARNING << "The output name [" << name << "] don't exist." << std::endl;
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Runtime::CreatePaddleBackend() {
|
||||||
|
#ifdef ENABLE_PADDLE_BACKEND
|
||||||
|
auto pd_option = PaddleBackendOption();
|
||||||
|
pd_option.model_file = option.model_file;
|
||||||
|
pd_option.params_file = option.params_file;
|
||||||
|
pd_option.enable_mkldnn = option.pd_enable_mkldnn;
|
||||||
|
pd_option.enable_log_info = option.pd_enable_log_info;
|
||||||
|
pd_option.mkldnn_cache_size = option.pd_mkldnn_cache_size;
|
||||||
|
pd_option.use_gpu = (option.device == Device::GPU) ? true : false;
|
||||||
|
pd_option.use_ipu = (option.device == Device::IPU) ? true : false;
|
||||||
|
pd_option.gpu_id = option.device_id;
|
||||||
|
pd_option.delete_pass_names = option.pd_delete_pass_names;
|
||||||
|
pd_option.cpu_thread_num = option.cpu_thread_num;
|
||||||
|
pd_option.enable_pinned_memory = option.enable_pinned_memory;
|
||||||
|
pd_option.external_stream_ = option.external_stream_;
|
||||||
|
pd_option.model_from_memory_ = option.model_from_memory_;
|
||||||
|
if (pd_option.model_from_memory_) {
|
||||||
|
pd_option.model_buffer_ = option.model_buffer_;
|
||||||
|
pd_option.params_buffer_ = option.params_buffer_;
|
||||||
|
pd_option.model_buffer_size_ = option.model_buffer_size_;
|
||||||
|
pd_option.params_buffer_size_ = option.params_buffer_size_;
|
||||||
|
}
|
||||||
|
#ifdef ENABLE_TRT_BACKEND
|
||||||
|
if (pd_option.use_gpu && option.pd_enable_trt) {
|
||||||
|
pd_option.enable_trt = true;
|
||||||
|
pd_option.collect_shape = option.pd_collect_shape;
|
||||||
|
auto trt_option = TrtBackendOption();
|
||||||
|
trt_option.gpu_id = option.device_id;
|
||||||
|
trt_option.enable_fp16 = option.trt_enable_fp16;
|
||||||
|
trt_option.max_batch_size = option.trt_max_batch_size;
|
||||||
|
trt_option.max_workspace_size = option.trt_max_workspace_size;
|
||||||
|
trt_option.max_shape = option.trt_max_shape;
|
||||||
|
trt_option.min_shape = option.trt_min_shape;
|
||||||
|
trt_option.opt_shape = option.trt_opt_shape;
|
||||||
|
trt_option.serialize_file = option.trt_serialize_file;
|
||||||
|
trt_option.enable_pinned_memory = option.enable_pinned_memory;
|
||||||
|
pd_option.trt_option = trt_option;
|
||||||
|
pd_option.trt_disabled_ops_ = option.trt_disabled_ops_;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#ifdef WITH_IPU
|
||||||
|
if (pd_option.use_ipu) {
|
||||||
|
auto ipu_option = IpuOption();
|
||||||
|
ipu_option.ipu_device_num = option.ipu_device_num;
|
||||||
|
ipu_option.ipu_micro_batch_size = option.ipu_micro_batch_size;
|
||||||
|
ipu_option.ipu_enable_pipelining = option.ipu_enable_pipelining;
|
||||||
|
ipu_option.ipu_batches_per_step = option.ipu_batches_per_step;
|
||||||
|
ipu_option.ipu_enable_fp16 = option.ipu_enable_fp16;
|
||||||
|
ipu_option.ipu_replica_num = option.ipu_replica_num;
|
||||||
|
ipu_option.ipu_available_memory_proportion =
|
||||||
|
option.ipu_available_memory_proportion;
|
||||||
|
ipu_option.ipu_enable_half_partial = option.ipu_enable_half_partial;
|
||||||
|
pd_option.ipu_option = ipu_option;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
FDASSERT(option.model_format == ModelFormat::PADDLE,
|
||||||
|
"PaddleBackend only support model format of ModelFormat::PADDLE.");
|
||||||
|
backend_ = utils::make_unique<PaddleBackend>();
|
||||||
|
auto casted_backend = dynamic_cast<PaddleBackend*>(backend_.get());
|
||||||
|
if (pd_option.model_from_memory_) {
|
||||||
|
FDASSERT(casted_backend->InitFromPaddle(option.model_buffer_,
|
||||||
|
option.params_buffer_, pd_option),
|
||||||
|
"Load model from Paddle failed while initliazing PaddleBackend.");
|
||||||
|
} else {
|
||||||
|
FDASSERT(casted_backend->InitFromPaddle(option.model_file,
|
||||||
|
option.params_file, pd_option),
|
||||||
|
"Load model from Paddle failed while initliazing PaddleBackend.");
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
FDASSERT(false,
|
||||||
|
"PaddleBackend is not available, please compiled with "
|
||||||
|
"ENABLE_PADDLE_BACKEND=ON.");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void Runtime::CreateOpenVINOBackend() {
|
||||||
|
#ifdef ENABLE_OPENVINO_BACKEND
|
||||||
|
auto ov_option = OpenVINOBackendOption();
|
||||||
|
ov_option.cpu_thread_num = option.cpu_thread_num;
|
||||||
|
ov_option.device = option.openvino_device;
|
||||||
|
ov_option.shape_infos = option.ov_shape_infos;
|
||||||
|
ov_option.num_streams = option.ov_num_streams;
|
||||||
|
for (const auto& op : option.ov_cpu_operators) {
|
||||||
|
ov_option.cpu_operators.insert(op);
|
||||||
|
}
|
||||||
|
FDASSERT(option.model_format == ModelFormat::PADDLE ||
|
||||||
|
option.model_format == ModelFormat::ONNX,
|
||||||
|
"OpenVINOBackend only support model format of ModelFormat::PADDLE / "
|
||||||
|
"ModelFormat::ONNX.");
|
||||||
|
backend_ = utils::make_unique<OpenVINOBackend>();
|
||||||
|
auto casted_backend = dynamic_cast<OpenVINOBackend*>(backend_.get());
|
||||||
|
|
||||||
|
if (option.model_format == ModelFormat::ONNX) {
|
||||||
|
FDASSERT(casted_backend->InitFromOnnx(option.model_file, ov_option),
|
||||||
|
"Load model from ONNX failed while initliazing OrtBackend.");
|
||||||
|
} else {
|
||||||
|
FDASSERT(casted_backend->InitFromPaddle(option.model_file,
|
||||||
|
option.params_file, ov_option),
|
||||||
|
"Load model from Paddle failed while initliazing OrtBackend.");
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
FDASSERT(false,
|
||||||
|
"OpenVINOBackend is not available, please compiled with "
|
||||||
|
"ENABLE_OPENVINO_BACKEND=ON.");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void Runtime::CreateOrtBackend() {
|
||||||
|
#ifdef ENABLE_ORT_BACKEND
|
||||||
|
auto ort_option = OrtBackendOption();
|
||||||
|
ort_option.graph_optimization_level = option.ort_graph_opt_level;
|
||||||
|
ort_option.intra_op_num_threads = option.cpu_thread_num;
|
||||||
|
ort_option.inter_op_num_threads = option.ort_inter_op_num_threads;
|
||||||
|
ort_option.execution_mode = option.ort_execution_mode;
|
||||||
|
ort_option.use_gpu = (option.device == Device::GPU) ? true : false;
|
||||||
|
ort_option.gpu_id = option.device_id;
|
||||||
|
ort_option.external_stream_ = option.external_stream_;
|
||||||
|
|
||||||
|
FDASSERT(option.model_format == ModelFormat::PADDLE ||
|
||||||
|
option.model_format == ModelFormat::ONNX,
|
||||||
|
"OrtBackend only support model format of ModelFormat::PADDLE / "
|
||||||
|
"ModelFormat::ONNX.");
|
||||||
|
backend_ = utils::make_unique<OrtBackend>();
|
||||||
|
auto casted_backend = dynamic_cast<OrtBackend*>(backend_.get());
|
||||||
|
if (option.model_format == ModelFormat::ONNX) {
|
||||||
|
FDASSERT(casted_backend->InitFromOnnx(option.model_file, ort_option),
|
||||||
|
"Load model from ONNX failed while initliazing OrtBackend.");
|
||||||
|
} else {
|
||||||
|
FDASSERT(casted_backend->InitFromPaddle(option.model_file,
|
||||||
|
option.params_file, ort_option),
|
||||||
|
"Load model from Paddle failed while initliazing OrtBackend.");
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
FDASSERT(false,
|
||||||
|
"OrtBackend is not available, please compiled with "
|
||||||
|
"ENABLE_ORT_BACKEND=ON.");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void Runtime::CreateTrtBackend() {
|
||||||
|
#ifdef ENABLE_TRT_BACKEND
|
||||||
|
auto trt_option = TrtBackendOption();
|
||||||
|
trt_option.model_file = option.model_file;
|
||||||
|
trt_option.params_file = option.params_file;
|
||||||
|
trt_option.model_format = option.model_format;
|
||||||
|
trt_option.gpu_id = option.device_id;
|
||||||
|
trt_option.enable_fp16 = option.trt_enable_fp16;
|
||||||
|
trt_option.enable_int8 = option.trt_enable_int8;
|
||||||
|
trt_option.max_batch_size = option.trt_max_batch_size;
|
||||||
|
trt_option.max_workspace_size = option.trt_max_workspace_size;
|
||||||
|
trt_option.max_shape = option.trt_max_shape;
|
||||||
|
trt_option.min_shape = option.trt_min_shape;
|
||||||
|
trt_option.opt_shape = option.trt_opt_shape;
|
||||||
|
trt_option.serialize_file = option.trt_serialize_file;
|
||||||
|
trt_option.enable_pinned_memory = option.enable_pinned_memory;
|
||||||
|
trt_option.external_stream_ = option.external_stream_;
|
||||||
|
|
||||||
|
FDASSERT(option.model_format == ModelFormat::PADDLE ||
|
||||||
|
option.model_format == ModelFormat::ONNX,
|
||||||
|
"TrtBackend only support model format of ModelFormat::PADDLE / "
|
||||||
|
"ModelFormat::ONNX.");
|
||||||
|
backend_ = utils::make_unique<TrtBackend>();
|
||||||
|
auto casted_backend = dynamic_cast<TrtBackend*>(backend_.get());
|
||||||
|
if (option.model_format == ModelFormat::ONNX) {
|
||||||
|
FDASSERT(casted_backend->InitFromOnnx(option.model_file, trt_option),
|
||||||
|
"Load model from ONNX failed while initliazing TrtBackend.");
|
||||||
|
} else {
|
||||||
|
FDASSERT(casted_backend->InitFromPaddle(option.model_file,
|
||||||
|
option.params_file, trt_option),
|
||||||
|
"Load model from Paddle failed while initliazing TrtBackend.");
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
FDASSERT(false,
|
||||||
|
"TrtBackend is not available, please compiled with "
|
||||||
|
"ENABLE_TRT_BACKEND=ON.");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void Runtime::CreateLiteBackend() {
|
||||||
|
#ifdef ENABLE_LITE_BACKEND
|
||||||
|
auto lite_option = LiteBackendOption();
|
||||||
|
lite_option.threads = option.cpu_thread_num;
|
||||||
|
lite_option.enable_int8 = option.lite_enable_int8;
|
||||||
|
lite_option.enable_fp16 = option.lite_enable_fp16;
|
||||||
|
lite_option.power_mode = static_cast<int>(option.lite_power_mode);
|
||||||
|
lite_option.optimized_model_dir = option.lite_optimized_model_dir;
|
||||||
|
lite_option.nnadapter_subgraph_partition_config_path =
|
||||||
|
option.lite_nnadapter_subgraph_partition_config_path;
|
||||||
|
lite_option.nnadapter_subgraph_partition_config_buffer =
|
||||||
|
option.lite_nnadapter_subgraph_partition_config_buffer;
|
||||||
|
lite_option.nnadapter_device_names = option.lite_nnadapter_device_names;
|
||||||
|
lite_option.nnadapter_context_properties =
|
||||||
|
option.lite_nnadapter_context_properties;
|
||||||
|
lite_option.nnadapter_model_cache_dir = option.lite_nnadapter_model_cache_dir;
|
||||||
|
lite_option.nnadapter_dynamic_shape_info =
|
||||||
|
option.lite_nnadapter_dynamic_shape_info;
|
||||||
|
lite_option.nnadapter_mixed_precision_quantization_config_path =
|
||||||
|
option.lite_nnadapter_mixed_precision_quantization_config_path;
|
||||||
|
lite_option.enable_timvx = option.enable_timvx;
|
||||||
|
lite_option.enable_ascend = option.enable_ascend;
|
||||||
|
lite_option.enable_kunlunxin = option.enable_kunlunxin;
|
||||||
|
lite_option.device_id = option.device_id;
|
||||||
|
lite_option.kunlunxin_l3_workspace_size = option.kunlunxin_l3_workspace_size;
|
||||||
|
lite_option.kunlunxin_locked = option.kunlunxin_locked;
|
||||||
|
lite_option.kunlunxin_autotune = option.kunlunxin_autotune;
|
||||||
|
lite_option.kunlunxin_autotune_file = option.kunlunxin_autotune_file;
|
||||||
|
lite_option.kunlunxin_precision = option.kunlunxin_precision;
|
||||||
|
lite_option.kunlunxin_adaptive_seqlen = option.kunlunxin_adaptive_seqlen;
|
||||||
|
lite_option.kunlunxin_enable_multi_stream =
|
||||||
|
option.kunlunxin_enable_multi_stream;
|
||||||
|
|
||||||
|
FDASSERT(option.model_format == ModelFormat::PADDLE,
|
||||||
|
"LiteBackend only support model format of ModelFormat::PADDLE");
|
||||||
|
backend_ = utils::make_unique<LiteBackend>();
|
||||||
|
auto casted_backend = dynamic_cast<LiteBackend*>(backend_.get());
|
||||||
|
FDASSERT(casted_backend->InitFromPaddle(option.model_file, option.params_file,
|
||||||
|
lite_option),
|
||||||
|
"Load model from nb file failed while initializing LiteBackend.");
|
||||||
|
#else
|
||||||
|
FDASSERT(false,
|
||||||
|
"LiteBackend is not available, please compiled with "
|
||||||
|
"ENABLE_LITE_BACKEND=ON.");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void Runtime::CreateRKNPU2Backend() {
|
||||||
|
#ifdef ENABLE_RKNPU2_BACKEND
|
||||||
|
auto rknpu2_option = RKNPU2BackendOption();
|
||||||
|
rknpu2_option.cpu_name = option.rknpu2_cpu_name_;
|
||||||
|
rknpu2_option.core_mask = option.rknpu2_core_mask_;
|
||||||
|
FDASSERT(option.model_format == ModelFormat::RKNN,
|
||||||
|
"RKNPU2Backend only support model format of ModelFormat::RKNN");
|
||||||
|
backend_ = utils::make_unique<RKNPU2Backend>();
|
||||||
|
auto casted_backend = dynamic_cast<RKNPU2Backend*>(backend_.get());
|
||||||
|
FDASSERT(casted_backend->InitFromRKNN(option.model_file, rknpu2_option),
|
||||||
|
"Load model from nb file failed while initializing LiteBackend.");
|
||||||
|
#else
|
||||||
|
FDASSERT(false,
|
||||||
|
"RKNPU2Backend is not available, please compiled with "
|
||||||
|
"ENABLE_RKNPU2_BACKEND=ON.");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void Runtime::CreateSophgoNPUBackend() {
|
||||||
|
#ifdef ENABLE_SOPHGO_BACKEND
|
||||||
|
auto sophgo_option = SophgoBackendOption();
|
||||||
|
FDASSERT(option.model_format == ModelFormat::SOPHGO,
|
||||||
|
"SophgoBackend only support model format of ModelFormat::SOPHGO");
|
||||||
|
backend_ = utils::make_unique<SophgoBackend>();
|
||||||
|
auto casted_backend = dynamic_cast<SophgoBackend*>(backend_.get());
|
||||||
|
FDASSERT(casted_backend->InitFromSophgo(option.model_file, sophgo_option),
|
||||||
|
"Load model from nb file failed while initializing LiteBackend.");
|
||||||
|
#else
|
||||||
|
FDASSERT(false,
|
||||||
|
"SophgoBackend is not available, please compiled with "
|
||||||
|
"ENABLE_SOPHGO_BACKEND=ON.");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
Runtime* Runtime::Clone(void* stream, int device_id) {
|
||||||
|
Runtime* runtime = new Runtime();
|
||||||
|
if (option.backend != Backend::OPENVINO &&
|
||||||
|
option.backend != Backend::PDINFER && option.backend != Backend::TRT) {
|
||||||
|
runtime->Init(option);
|
||||||
|
FDWARNING << "Only OpenVINO/Paddle Inference/TensorRT support \
|
||||||
|
clone engine to reduce CPU/GPU memory usage now. For "
|
||||||
|
<< option.backend
|
||||||
|
<< ", FastDeploy will create a new engine which \
|
||||||
|
will not share memory with the current runtime."
|
||||||
|
<< std::endl;
|
||||||
|
return runtime;
|
||||||
|
}
|
||||||
|
FDINFO << "Runtime Clone with Backend:: " << option.backend << " in "
|
||||||
|
<< option.device << "." << std::endl;
|
||||||
|
runtime->option = option;
|
||||||
|
runtime->backend_ = backend_->Clone(stream, device_id);
|
||||||
|
return runtime;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace fastdeploy
|
109
fastdeploy/runtime/runtime.h
Executable file
109
fastdeploy/runtime/runtime.h
Executable file
@@ -0,0 +1,109 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
/*! \file runtime.h
|
||||||
|
\brief A brief file description.
|
||||||
|
|
||||||
|
More details
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#include "fastdeploy/backends/backend.h"
|
||||||
|
#include "fastdeploy/core/fd_tensor.h"
|
||||||
|
#include "fastdeploy/runtime/runtime_option.h"
|
||||||
|
#include "fastdeploy/utils/perf.h"
|
||||||
|
|
||||||
|
/** \brief All C++ FastDeploy APIs are defined inside this namespace
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
namespace fastdeploy {
|
||||||
|
|
||||||
|
/*! @brief Runtime object used to inference the loaded model on different devices
|
||||||
|
*/
|
||||||
|
struct FASTDEPLOY_DECL Runtime {
|
||||||
|
public:
|
||||||
|
/// Intialize a Runtime object with RuntimeOption
|
||||||
|
bool Init(const RuntimeOption& _option);
|
||||||
|
|
||||||
|
/** \brief Inference the model by the input data, and write to the output
|
||||||
|
*
|
||||||
|
* \param[in] input_tensors Notice the FDTensor::name should keep same with the model's input
|
||||||
|
* \param[in] output_tensors Inference results
|
||||||
|
* \return true if the inference successed, otherwise false
|
||||||
|
*/
|
||||||
|
bool Infer(std::vector<FDTensor>& input_tensors,
|
||||||
|
std::vector<FDTensor>* output_tensors);
|
||||||
|
|
||||||
|
/** \brief No params inference the model.
|
||||||
|
*
|
||||||
|
* the input and output data need to pass through the BindInputTensor and GetOutputTensor interfaces.
|
||||||
|
*/
|
||||||
|
bool Infer();
|
||||||
|
|
||||||
|
/** \brief Compile TorchScript Module, only for Poros backend
|
||||||
|
*
|
||||||
|
* \param[in] prewarm_tensors Prewarm datas for compile
|
||||||
|
* \param[in] _option Runtime option
|
||||||
|
* \return true if compile successed, otherwise false
|
||||||
|
*/
|
||||||
|
bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
|
||||||
|
const RuntimeOption& _option);
|
||||||
|
|
||||||
|
/** \brief Get number of inputs
|
||||||
|
*/
|
||||||
|
int NumInputs() { return backend_->NumInputs(); }
|
||||||
|
/** \brief Get number of outputs
|
||||||
|
*/
|
||||||
|
int NumOutputs() { return backend_->NumOutputs(); }
|
||||||
|
/** \brief Get input information by index
|
||||||
|
*/
|
||||||
|
TensorInfo GetInputInfo(int index);
|
||||||
|
/** \brief Get output information by index
|
||||||
|
*/
|
||||||
|
TensorInfo GetOutputInfo(int index);
|
||||||
|
/** \brief Get all the input information
|
||||||
|
*/
|
||||||
|
std::vector<TensorInfo> GetInputInfos();
|
||||||
|
/** \brief Get all the output information
|
||||||
|
*/
|
||||||
|
std::vector<TensorInfo> GetOutputInfos();
|
||||||
|
/** \brief Bind FDTensor by name, no copy and share input memory
|
||||||
|
*/
|
||||||
|
void BindInputTensor(const std::string& name, FDTensor& input);
|
||||||
|
/** \brief Get output FDTensor by name, no copy and share backend output memory
|
||||||
|
*/
|
||||||
|
FDTensor* GetOutputTensor(const std::string& name);
|
||||||
|
|
||||||
|
/** \brief Clone new Runtime when multiple instances of the same model are created
|
||||||
|
*
|
||||||
|
* \param[in] stream CUDA Stream, defualt param is nullptr
|
||||||
|
* \return new Runtime* by this clone
|
||||||
|
*/
|
||||||
|
Runtime* Clone(void* stream = nullptr, int device_id = -1);
|
||||||
|
|
||||||
|
RuntimeOption option;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void CreateOrtBackend();
|
||||||
|
void CreatePaddleBackend();
|
||||||
|
void CreateTrtBackend();
|
||||||
|
void CreateOpenVINOBackend();
|
||||||
|
void CreateLiteBackend();
|
||||||
|
void CreateRKNPU2Backend();
|
||||||
|
void CreateSophgoNPUBackend();
|
||||||
|
std::unique_ptr<BaseBackend> backend_;
|
||||||
|
std::vector<FDTensor> input_tensors_;
|
||||||
|
std::vector<FDTensor> output_tensors_;
|
||||||
|
};
|
||||||
|
} // namespace fastdeploy
|
515
fastdeploy/runtime/runtime_option.cc
Normal file
515
fastdeploy/runtime/runtime_option.cc
Normal file
@@ -0,0 +1,515 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "fastdeploy/runtime/runtime.h"
|
||||||
|
#include "fastdeploy/utils/unique_ptr.h"
|
||||||
|
#include "fastdeploy/utils/utils.h"
|
||||||
|
|
||||||
|
namespace fastdeploy {
|
||||||
|
|
||||||
|
std::vector<Backend> GetAvailableBackends() {
|
||||||
|
std::vector<Backend> backends;
|
||||||
|
#ifdef ENABLE_ORT_BACKEND
|
||||||
|
backends.push_back(Backend::ORT);
|
||||||
|
#endif
|
||||||
|
#ifdef ENABLE_TRT_BACKEND
|
||||||
|
backends.push_back(Backend::TRT);
|
||||||
|
#endif
|
||||||
|
#ifdef ENABLE_PADDLE_BACKEND
|
||||||
|
backends.push_back(Backend::PDINFER);
|
||||||
|
#endif
|
||||||
|
#ifdef ENABLE_POROS_BACKEND
|
||||||
|
backends.push_back(Backend::POROS);
|
||||||
|
#endif
|
||||||
|
#ifdef ENABLE_OPENVINO_BACKEND
|
||||||
|
backends.push_back(Backend::OPENVINO);
|
||||||
|
#endif
|
||||||
|
#ifdef ENABLE_LITE_BACKEND
|
||||||
|
backends.push_back(Backend::LITE);
|
||||||
|
#endif
|
||||||
|
#ifdef ENABLE_RKNPU2_BACKEND
|
||||||
|
backends.push_back(Backend::RKNPU2);
|
||||||
|
#endif
|
||||||
|
#ifdef ENABLE_SOPHGO_BACKEND
|
||||||
|
backends.push_back(Backend::SOPHGOTPU);
|
||||||
|
#endif
|
||||||
|
return backends;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsBackendAvailable(const Backend& backend) {
|
||||||
|
std::vector<Backend> backends = GetAvailableBackends();
|
||||||
|
for (size_t i = 0; i < backends.size(); ++i) {
|
||||||
|
if (backend == backends[i]) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CheckModelFormat(const std::string& model_file,
|
||||||
|
const ModelFormat& model_format) {
|
||||||
|
if (model_format == ModelFormat::PADDLE) {
|
||||||
|
if (model_file.size() < 8 ||
|
||||||
|
model_file.substr(model_file.size() - 8, 8) != ".pdmodel") {
|
||||||
|
FDERROR << "With model format of ModelFormat::PADDLE, the model file "
|
||||||
|
"should ends with `.pdmodel`, but now it's "
|
||||||
|
<< model_file << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else if (model_format == ModelFormat::ONNX) {
|
||||||
|
if (model_file.size() < 5 ||
|
||||||
|
model_file.substr(model_file.size() - 5, 5) != ".onnx") {
|
||||||
|
FDERROR << "With model format of ModelFormat::ONNX, the model file "
|
||||||
|
"should ends with `.onnx`, but now it's "
|
||||||
|
<< model_file << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else if (model_format == ModelFormat::RKNN) {
|
||||||
|
if (model_file.size() < 5 ||
|
||||||
|
model_file.substr(model_file.size() - 5, 5) != ".rknn") {
|
||||||
|
FDERROR << "With model format of ModelFormat::RKNN, the model file "
|
||||||
|
"should ends with `.rknn`, but now it's "
|
||||||
|
<< model_file << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else if (model_format == ModelFormat::TORCHSCRIPT) {
|
||||||
|
if (model_file.size() < 3 ||
|
||||||
|
model_file.substr(model_file.size() - 3, 3) != ".pt") {
|
||||||
|
FDERROR
|
||||||
|
<< "With model format of ModelFormat::TORCHSCRIPT, the model file "
|
||||||
|
"should ends with `.pt`, but now it's "
|
||||||
|
<< model_file << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else if (model_format == ModelFormat::SOPHGO) {
|
||||||
|
if (model_file.size() < 7 ||
|
||||||
|
model_file.substr(model_file.size() - 7, 7) != ".bmodel") {
|
||||||
|
FDERROR << "With model format of ModelFormat::SOPHGO, the model file "
|
||||||
|
"should ends with `.bmodel`, but now it's "
|
||||||
|
<< model_file << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
FDERROR
|
||||||
|
<< "Only support model format with frontend ModelFormat::PADDLE / "
|
||||||
|
"ModelFormat::ONNX / ModelFormat::RKNN / ModelFormat::TORCHSCRIPT."
|
||||||
|
<< std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
ModelFormat GuessModelFormat(const std::string& model_file) {
|
||||||
|
if (model_file.size() > 8 &&
|
||||||
|
model_file.substr(model_file.size() - 8, 8) == ".pdmodel") {
|
||||||
|
FDINFO << "Model Format: PaddlePaddle." << std::endl;
|
||||||
|
return ModelFormat::PADDLE;
|
||||||
|
} else if (model_file.size() > 5 &&
|
||||||
|
model_file.substr(model_file.size() - 5, 5) == ".onnx") {
|
||||||
|
FDINFO << "Model Format: ONNX." << std::endl;
|
||||||
|
return ModelFormat::ONNX;
|
||||||
|
} else if (model_file.size() > 3 &&
|
||||||
|
model_file.substr(model_file.size() - 3, 3) == ".pt") {
|
||||||
|
FDINFO << "Model Format: Torchscript." << std::endl;
|
||||||
|
return ModelFormat::TORCHSCRIPT;
|
||||||
|
} else if (model_file.size() > 5 &&
|
||||||
|
model_file.substr(model_file.size() - 5, 5) == ".rknn") {
|
||||||
|
FDINFO << "Model Format: RKNN." << std::endl;
|
||||||
|
return ModelFormat::RKNN;
|
||||||
|
} else if (model_file.size() > 7 &&
|
||||||
|
model_file.substr(model_file.size() - 7, 7) == ".bmodel") {
|
||||||
|
FDINFO << "Model Format: SOPHGO." << std::endl;
|
||||||
|
return ModelFormat::SOPHGO;
|
||||||
|
}
|
||||||
|
|
||||||
|
FDERROR << "Cannot guess which model format you are using, please set "
|
||||||
|
"RuntimeOption::model_format manually."
|
||||||
|
<< std::endl;
|
||||||
|
return ModelFormat::PADDLE;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::SetModelPath(const std::string& model_path,
|
||||||
|
const std::string& params_path,
|
||||||
|
const ModelFormat& format) {
|
||||||
|
if (format == ModelFormat::PADDLE) {
|
||||||
|
model_file = model_path;
|
||||||
|
params_file = params_path;
|
||||||
|
model_format = ModelFormat::PADDLE;
|
||||||
|
} else if (format == ModelFormat::ONNX) {
|
||||||
|
model_file = model_path;
|
||||||
|
model_format = ModelFormat::ONNX;
|
||||||
|
} else if (format == ModelFormat::TORCHSCRIPT) {
|
||||||
|
model_file = model_path;
|
||||||
|
model_format = ModelFormat::TORCHSCRIPT;
|
||||||
|
} else {
|
||||||
|
FDASSERT(false,
|
||||||
|
"The model format only can be "
|
||||||
|
"ModelFormat::PADDLE/ModelFormat::ONNX/ModelFormat::TORCHSCRIPT.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::SetModelBuffer(const char* model_buffer,
|
||||||
|
size_t model_buffer_size,
|
||||||
|
const char* params_buffer,
|
||||||
|
size_t params_buffer_size,
|
||||||
|
const ModelFormat& format) {
|
||||||
|
model_buffer_size_ = model_buffer_size;
|
||||||
|
params_buffer_size_ = params_buffer_size;
|
||||||
|
model_from_memory_ = true;
|
||||||
|
if (format == ModelFormat::PADDLE) {
|
||||||
|
model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size);
|
||||||
|
params_buffer_ =
|
||||||
|
std::string(params_buffer, params_buffer + params_buffer_size);
|
||||||
|
model_format = ModelFormat::PADDLE;
|
||||||
|
} else if (format == ModelFormat::ONNX) {
|
||||||
|
model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size);
|
||||||
|
model_format = ModelFormat::ONNX;
|
||||||
|
} else if (format == ModelFormat::TORCHSCRIPT) {
|
||||||
|
model_buffer_ = std::string(model_buffer, model_buffer + model_buffer_size);
|
||||||
|
model_format = ModelFormat::TORCHSCRIPT;
|
||||||
|
} else {
|
||||||
|
FDASSERT(false,
|
||||||
|
"The model format only can be "
|
||||||
|
"ModelFormat::PADDLE/ModelFormat::ONNX/ModelFormat::TORCHSCRIPT.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::UseGpu(int gpu_id) {
|
||||||
|
#ifdef WITH_GPU
|
||||||
|
device = Device::GPU;
|
||||||
|
device_id = gpu_id;
|
||||||
|
#else
|
||||||
|
FDWARNING << "The FastDeploy didn't compile with GPU, will force to use CPU."
|
||||||
|
<< std::endl;
|
||||||
|
device = Device::CPU;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::UseCpu() { device = Device::CPU; }
|
||||||
|
|
||||||
|
void RuntimeOption::UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name,
|
||||||
|
fastdeploy::rknpu2::CoreMask rknpu2_core) {
|
||||||
|
rknpu2_cpu_name_ = rknpu2_name;
|
||||||
|
rknpu2_core_mask_ = rknpu2_core;
|
||||||
|
device = Device::RKNPU;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::UseTimVX() {
|
||||||
|
enable_timvx = true;
|
||||||
|
device = Device::TIMVX;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::UseKunlunXin(int kunlunxin_id, int l3_workspace_size,
|
||||||
|
bool locked, bool autotune,
|
||||||
|
const std::string& autotune_file,
|
||||||
|
const std::string& precision,
|
||||||
|
bool adaptive_seqlen,
|
||||||
|
bool enable_multi_stream) {
|
||||||
|
enable_kunlunxin = true;
|
||||||
|
device_id = kunlunxin_id;
|
||||||
|
kunlunxin_l3_workspace_size = l3_workspace_size;
|
||||||
|
kunlunxin_locked = locked;
|
||||||
|
kunlunxin_autotune = autotune;
|
||||||
|
kunlunxin_autotune_file = autotune_file;
|
||||||
|
kunlunxin_precision = precision;
|
||||||
|
kunlunxin_adaptive_seqlen = adaptive_seqlen;
|
||||||
|
kunlunxin_enable_multi_stream = enable_multi_stream;
|
||||||
|
device = Device::KUNLUNXIN;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::UseAscend() {
|
||||||
|
enable_ascend = true;
|
||||||
|
device = Device::ASCEND;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::UseSophgo() {
|
||||||
|
device = Device::SOPHGOTPUD;
|
||||||
|
UseSophgoBackend();
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::SetExternalStream(void* external_stream) {
|
||||||
|
external_stream_ = external_stream;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::SetCpuThreadNum(int thread_num) {
|
||||||
|
FDASSERT(thread_num > 0, "The thread_num must be greater than 0.");
|
||||||
|
cpu_thread_num = thread_num;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::SetOrtGraphOptLevel(int level) {
|
||||||
|
std::vector<int> supported_level{-1, 0, 1, 2};
|
||||||
|
auto valid_level = std::find(supported_level.begin(), supported_level.end(),
|
||||||
|
level) != supported_level.end();
|
||||||
|
FDASSERT(valid_level, "The level must be -1, 0, 1, 2.");
|
||||||
|
ort_graph_opt_level = level;
|
||||||
|
}
|
||||||
|
|
||||||
|
// use paddle inference backend
|
||||||
|
void RuntimeOption::UsePaddleBackend() {
|
||||||
|
#ifdef ENABLE_PADDLE_BACKEND
|
||||||
|
backend = Backend::PDINFER;
|
||||||
|
#else
|
||||||
|
FDASSERT(false, "The FastDeploy didn't compile with Paddle Inference.");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// use onnxruntime backend
|
||||||
|
void RuntimeOption::UseOrtBackend() {
|
||||||
|
#ifdef ENABLE_ORT_BACKEND
|
||||||
|
backend = Backend::ORT;
|
||||||
|
#else
|
||||||
|
FDASSERT(false, "The FastDeploy didn't compile with OrtBackend.");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// use sophgoruntime backend
|
||||||
|
void RuntimeOption::UseSophgoBackend() {
|
||||||
|
#ifdef ENABLE_SOPHGO_BACKEND
|
||||||
|
backend = Backend::SOPHGOTPU;
|
||||||
|
#else
|
||||||
|
FDASSERT(false, "The FastDeploy didn't compile with SophgoBackend.");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// use poros backend
|
||||||
|
void RuntimeOption::UsePorosBackend() {
|
||||||
|
#ifdef ENABLE_POROS_BACKEND
|
||||||
|
backend = Backend::POROS;
|
||||||
|
#else
|
||||||
|
FDASSERT(false, "The FastDeploy didn't compile with PorosBackend.");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::UseTrtBackend() {
|
||||||
|
#ifdef ENABLE_TRT_BACKEND
|
||||||
|
backend = Backend::TRT;
|
||||||
|
#else
|
||||||
|
FDASSERT(false, "The FastDeploy didn't compile with TrtBackend.");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::UseOpenVINOBackend() {
|
||||||
|
#ifdef ENABLE_OPENVINO_BACKEND
|
||||||
|
backend = Backend::OPENVINO;
|
||||||
|
#else
|
||||||
|
FDASSERT(false, "The FastDeploy didn't compile with OpenVINO.");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::UseLiteBackend() {
|
||||||
|
#ifdef ENABLE_LITE_BACKEND
|
||||||
|
backend = Backend::LITE;
|
||||||
|
#else
|
||||||
|
FDASSERT(false, "The FastDeploy didn't compile with Paddle Lite.");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::SetPaddleMKLDNN(bool pd_mkldnn) {
|
||||||
|
pd_enable_mkldnn = pd_mkldnn;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::DeletePaddleBackendPass(const std::string& pass_name) {
|
||||||
|
pd_delete_pass_names.push_back(pass_name);
|
||||||
|
}
|
||||||
|
void RuntimeOption::EnablePaddleLogInfo() { pd_enable_log_info = true; }
|
||||||
|
|
||||||
|
void RuntimeOption::DisablePaddleLogInfo() { pd_enable_log_info = false; }
|
||||||
|
|
||||||
|
void RuntimeOption::EnablePaddleToTrt() {
|
||||||
|
FDASSERT(backend == Backend::TRT,
|
||||||
|
"Should call UseTrtBackend() before call EnablePaddleToTrt().");
|
||||||
|
#ifdef ENABLE_PADDLE_BACKEND
|
||||||
|
FDINFO << "While using TrtBackend with EnablePaddleToTrt, FastDeploy will "
|
||||||
|
"change to use Paddle Inference Backend."
|
||||||
|
<< std::endl;
|
||||||
|
backend = Backend::PDINFER;
|
||||||
|
pd_enable_trt = true;
|
||||||
|
#else
|
||||||
|
FDASSERT(false,
|
||||||
|
"While using TrtBackend with EnablePaddleToTrt, require the "
|
||||||
|
"FastDeploy is compiled with Paddle Inference Backend, "
|
||||||
|
"please rebuild your FastDeploy.");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::SetPaddleMKLDNNCacheSize(int size) {
|
||||||
|
FDASSERT(size > 0, "Parameter size must greater than 0.");
|
||||||
|
pd_mkldnn_cache_size = size;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::SetOpenVINODevice(const std::string& name) {
|
||||||
|
openvino_device = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::EnableLiteFP16() { lite_enable_fp16 = true; }
|
||||||
|
|
||||||
|
void RuntimeOption::DisableLiteFP16() { lite_enable_fp16 = false; }
|
||||||
|
void RuntimeOption::EnableLiteInt8() { lite_enable_int8 = true; }
|
||||||
|
|
||||||
|
void RuntimeOption::DisableLiteInt8() { lite_enable_int8 = false; }
|
||||||
|
void RuntimeOption::SetLitePowerMode(LitePowerMode mode) {
|
||||||
|
lite_power_mode = mode;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::SetLiteOptimizedModelDir(
|
||||||
|
const std::string& optimized_model_dir) {
|
||||||
|
lite_optimized_model_dir = optimized_model_dir;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::SetLiteSubgraphPartitionPath(
|
||||||
|
const std::string& nnadapter_subgraph_partition_config_path) {
|
||||||
|
lite_nnadapter_subgraph_partition_config_path =
|
||||||
|
nnadapter_subgraph_partition_config_path;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::SetLiteSubgraphPartitionConfigBuffer(
|
||||||
|
const std::string& nnadapter_subgraph_partition_config_buffer) {
|
||||||
|
lite_nnadapter_subgraph_partition_config_buffer =
|
||||||
|
nnadapter_subgraph_partition_config_buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::SetLiteDeviceNames(
|
||||||
|
const std::vector<std::string>& nnadapter_device_names) {
|
||||||
|
lite_nnadapter_device_names = nnadapter_device_names;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::SetLiteContextProperties(
|
||||||
|
const std::string& nnadapter_context_properties) {
|
||||||
|
lite_nnadapter_context_properties = nnadapter_context_properties;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::SetLiteModelCacheDir(
|
||||||
|
const std::string& nnadapter_model_cache_dir) {
|
||||||
|
lite_nnadapter_model_cache_dir = nnadapter_model_cache_dir;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::SetLiteDynamicShapeInfo(
|
||||||
|
const std::map<std::string, std::vector<std::vector<int64_t>>>&
|
||||||
|
nnadapter_dynamic_shape_info) {
|
||||||
|
lite_nnadapter_dynamic_shape_info = nnadapter_dynamic_shape_info;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::SetLiteMixedPrecisionQuantizationConfigPath(
|
||||||
|
const std::string& nnadapter_mixed_precision_quantization_config_path) {
|
||||||
|
lite_nnadapter_mixed_precision_quantization_config_path =
|
||||||
|
nnadapter_mixed_precision_quantization_config_path;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::SetTrtInputShape(const std::string& input_name,
|
||||||
|
const std::vector<int32_t>& min_shape,
|
||||||
|
const std::vector<int32_t>& opt_shape,
|
||||||
|
const std::vector<int32_t>& max_shape) {
|
||||||
|
trt_min_shape[input_name].clear();
|
||||||
|
trt_max_shape[input_name].clear();
|
||||||
|
trt_opt_shape[input_name].clear();
|
||||||
|
trt_min_shape[input_name].assign(min_shape.begin(), min_shape.end());
|
||||||
|
if (opt_shape.size() == 0) {
|
||||||
|
trt_opt_shape[input_name].assign(min_shape.begin(), min_shape.end());
|
||||||
|
} else {
|
||||||
|
trt_opt_shape[input_name].assign(opt_shape.begin(), opt_shape.end());
|
||||||
|
}
|
||||||
|
if (max_shape.size() == 0) {
|
||||||
|
trt_max_shape[input_name].assign(min_shape.begin(), min_shape.end());
|
||||||
|
} else {
|
||||||
|
trt_max_shape[input_name].assign(max_shape.begin(), max_shape.end());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::SetTrtMaxWorkspaceSize(size_t max_workspace_size) {
|
||||||
|
trt_max_workspace_size = max_workspace_size;
|
||||||
|
}
|
||||||
|
void RuntimeOption::SetTrtMaxBatchSize(size_t max_batch_size) {
|
||||||
|
trt_max_batch_size = max_batch_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::EnableTrtFP16() { trt_enable_fp16 = true; }
|
||||||
|
|
||||||
|
void RuntimeOption::DisableTrtFP16() { trt_enable_fp16 = false; }
|
||||||
|
|
||||||
|
void RuntimeOption::EnablePinnedMemory() { enable_pinned_memory = true; }
|
||||||
|
|
||||||
|
void RuntimeOption::DisablePinnedMemory() { enable_pinned_memory = false; }
|
||||||
|
|
||||||
|
void RuntimeOption::SetTrtCacheFile(const std::string& cache_file_path) {
|
||||||
|
trt_serialize_file = cache_file_path;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::SetOpenVINOStreams(int num_streams) {
|
||||||
|
ov_num_streams = num_streams;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
|
||||||
|
const RuntimeOption& _option) {
|
||||||
|
#ifdef ENABLE_POROS_BACKEND
|
||||||
|
option = _option;
|
||||||
|
auto poros_option = PorosBackendOption();
|
||||||
|
poros_option.use_gpu = (option.device == Device::GPU) ? true : false;
|
||||||
|
poros_option.gpu_id = option.device_id;
|
||||||
|
poros_option.long_to_int = option.long_to_int;
|
||||||
|
poros_option.use_nvidia_tf32 = option.use_nvidia_tf32;
|
||||||
|
poros_option.unconst_ops_thres = option.unconst_ops_thres;
|
||||||
|
poros_option.poros_file = option.poros_file;
|
||||||
|
poros_option.is_dynamic = option.is_dynamic;
|
||||||
|
poros_option.enable_fp16 = option.trt_enable_fp16;
|
||||||
|
poros_option.max_batch_size = option.trt_max_batch_size;
|
||||||
|
poros_option.max_workspace_size = option.trt_max_workspace_size;
|
||||||
|
FDASSERT(
|
||||||
|
option.model_format == ModelFormat::TORCHSCRIPT,
|
||||||
|
"PorosBackend only support model format of ModelFormat::TORCHSCRIPT.");
|
||||||
|
backend_ = utils::make_unique<PorosBackend>();
|
||||||
|
auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get());
|
||||||
|
FDASSERT(
|
||||||
|
casted_backend->Compile(option.model_file, prewarm_tensors, poros_option),
|
||||||
|
"Load model from Torchscript failed while initliazing PorosBackend.");
|
||||||
|
#else
|
||||||
|
FDASSERT(false,
|
||||||
|
"PorosBackend is not available, please compiled with "
|
||||||
|
"ENABLE_POROS_BACKEND=ON.");
|
||||||
|
#endif
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::EnablePaddleTrtCollectShape() { pd_collect_shape = true; }
|
||||||
|
|
||||||
|
void RuntimeOption::DisablePaddleTrtCollectShape() { pd_collect_shape = false; }
|
||||||
|
|
||||||
|
void RuntimeOption::DisablePaddleTrtOPs(const std::vector<std::string>& ops) {
|
||||||
|
trt_disabled_ops_.insert(trt_disabled_ops_.end(), ops.begin(), ops.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::UseIpu(int device_num, int micro_batch_size,
|
||||||
|
bool enable_pipelining, int batches_per_step) {
|
||||||
|
#ifdef WITH_IPU
|
||||||
|
device = Device::IPU;
|
||||||
|
ipu_device_num = device_num;
|
||||||
|
ipu_micro_batch_size = micro_batch_size;
|
||||||
|
ipu_enable_pipelining = enable_pipelining;
|
||||||
|
ipu_batches_per_step = batches_per_step;
|
||||||
|
#else
|
||||||
|
FDWARNING << "The FastDeploy didn't compile with IPU, will force to use CPU."
|
||||||
|
<< std::endl;
|
||||||
|
device = Device::CPU;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::SetIpuConfig(bool enable_fp16, int replica_num,
|
||||||
|
float available_memory_proportion,
|
||||||
|
bool enable_half_partial) {
|
||||||
|
ipu_enable_fp16 = enable_fp16;
|
||||||
|
ipu_replica_num = replica_num;
|
||||||
|
ipu_available_memory_proportion = available_memory_proportion;
|
||||||
|
ipu_enable_half_partial = enable_half_partial;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace fastdeploy
|
482
fastdeploy/runtime/runtime_option.h
Normal file
482
fastdeploy/runtime/runtime_option.h
Normal file
@@ -0,0 +1,482 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
/*! \file runtime_option.h
|
||||||
|
\brief A brief file description.
|
||||||
|
|
||||||
|
More details
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <map>
|
||||||
|
#include <vector>
|
||||||
|
#include "fastdeploy/runtime/enum_variables.h"
|
||||||
|
#include "fastdeploy/backends/lite/option.h"
|
||||||
|
#include "fastdeploy/backends/openvino/option.h"
|
||||||
|
#include "fastdeploy/backends/ort/option.h"
|
||||||
|
#include "fastdeploy/backends/paddle/option.h"
|
||||||
|
#include "fastdeploy/backends/poros/option.h"
|
||||||
|
#include "fastdeploy/backends/rknpu2/option.h"
|
||||||
|
#include "fastdeploy/backends/sophgo/option.h"
|
||||||
|
#include "fastdeploy/backends/tensorrt/option.h"
|
||||||
|
|
||||||
|
namespace fastdeploy {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get all the available inference backend in FastDeploy
|
||||||
|
*/
|
||||||
|
FASTDEPLOY_DECL std::vector<Backend> GetAvailableBackends();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Check if the inference backend available
|
||||||
|
*/
|
||||||
|
FASTDEPLOY_DECL bool IsBackendAvailable(const Backend& backend);
|
||||||
|
|
||||||
|
bool CheckModelFormat(const std::string& model_file,
|
||||||
|
const ModelFormat& model_format);
|
||||||
|
ModelFormat GuessModelFormat(const std::string& model_file);
|
||||||
|
|
||||||
|
/*! @brief Option object used when create a new Runtime object
|
||||||
|
*/
|
||||||
|
struct FASTDEPLOY_DECL RuntimeOption {
|
||||||
|
/** \brief Set path of model file and parameter file
|
||||||
|
*
|
||||||
|
* \param[in] model_path Path of model file, e.g ResNet50/model.pdmodel for Paddle format model / ResNet50/model.onnx for ONNX format model
|
||||||
|
* \param[in] params_path Path of parameter file, this only used when the model format is Paddle, e.g Resnet50/model.pdiparams
|
||||||
|
* \param[in] format Format of the loaded model
|
||||||
|
*/
|
||||||
|
void SetModelPath(const std::string& model_path,
|
||||||
|
const std::string& params_path = "",
|
||||||
|
const ModelFormat& format = ModelFormat::PADDLE);
|
||||||
|
|
||||||
|
/** \brief Specify the memory buffer of model and parameter. Used when model and params are loaded directly from memory
|
||||||
|
*
|
||||||
|
* \param[in] model_buffer The memory buffer of model
|
||||||
|
* \param[in] model_buffer_size The size of the model data
|
||||||
|
* \param[in] params_buffer The memory buffer of the combined parameters file
|
||||||
|
* \param[in] params_buffer_size The size of the combined parameters data
|
||||||
|
* \param[in] format Format of the loaded model
|
||||||
|
*/
|
||||||
|
void SetModelBuffer(const char* model_buffer, size_t model_buffer_size,
|
||||||
|
const char* params_buffer, size_t params_buffer_size,
|
||||||
|
const ModelFormat& format = ModelFormat::PADDLE);
|
||||||
|
|
||||||
|
/// Use cpu to inference, the runtime will inference on CPU by default
|
||||||
|
void UseCpu();
|
||||||
|
|
||||||
|
/// Use Nvidia GPU to inference
|
||||||
|
void UseGpu(int gpu_id = 0);
|
||||||
|
|
||||||
|
void UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name =
|
||||||
|
fastdeploy::rknpu2::CpuName::RK3588,
|
||||||
|
fastdeploy::rknpu2::CoreMask rknpu2_core =
|
||||||
|
fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0);
|
||||||
|
|
||||||
|
/// Use TimVX to inference
|
||||||
|
void UseTimVX();
|
||||||
|
|
||||||
|
/// Use Huawei Ascend to inference
|
||||||
|
void UseAscend();
|
||||||
|
|
||||||
|
///
|
||||||
|
/// \brief Turn on KunlunXin XPU.
|
||||||
|
///
|
||||||
|
/// \param kunlunxin_id the KunlunXin XPU card to use (default is 0).
|
||||||
|
/// \param l3_workspace_size The size of the video memory allocated by the l3
|
||||||
|
/// cache, the maximum is 16M.
|
||||||
|
/// \param locked Whether the allocated L3 cache can be locked. If false,
|
||||||
|
/// it means that the L3 cache is not locked, and the allocated L3
|
||||||
|
/// cache can be shared by multiple models, and multiple models
|
||||||
|
/// sharing the L3 cache will be executed sequentially on the card.
|
||||||
|
/// \param autotune Whether to autotune the conv operator in the model. If
|
||||||
|
/// true, when the conv operator of a certain dimension is executed
|
||||||
|
/// for the first time, it will automatically search for a better
|
||||||
|
/// algorithm to improve the performance of subsequent conv operators
|
||||||
|
/// of the same dimension.
|
||||||
|
/// \param autotune_file Specify the path of the autotune file. If
|
||||||
|
/// autotune_file is specified, the algorithm specified in the
|
||||||
|
/// file will be used and autotune will not be performed again.
|
||||||
|
/// \param precision Calculation accuracy of multi_encoder
|
||||||
|
/// \param adaptive_seqlen Is the input of multi_encoder variable length
|
||||||
|
/// \param enable_multi_stream Whether to enable the multi stream of
|
||||||
|
/// KunlunXin XPU.
|
||||||
|
///
|
||||||
|
void UseKunlunXin(int kunlunxin_id = 0, int l3_workspace_size = 0xfffc00,
|
||||||
|
bool locked = false, bool autotune = true,
|
||||||
|
const std::string& autotune_file = "",
|
||||||
|
const std::string& precision = "int16",
|
||||||
|
bool adaptive_seqlen = false,
|
||||||
|
bool enable_multi_stream = false);
|
||||||
|
|
||||||
|
/// Use Sophgo to inference
|
||||||
|
void UseSophgo();
|
||||||
|
|
||||||
|
void SetExternalStream(void* external_stream);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @brief Set number of cpu threads while inference on CPU, by default it will decided by the different backends
|
||||||
|
*/
|
||||||
|
void SetCpuThreadNum(int thread_num);
|
||||||
|
|
||||||
|
/// Set ORT graph opt level, default is decide by ONNX Runtime itself
|
||||||
|
void SetOrtGraphOptLevel(int level = -1);
|
||||||
|
|
||||||
|
/// Set Paddle Inference as inference backend, support CPU/GPU
|
||||||
|
void UsePaddleBackend();
|
||||||
|
|
||||||
|
/// Wrapper function of UsePaddleBackend()
|
||||||
|
void UsePaddleInferBackend() { return UsePaddleBackend(); }
|
||||||
|
|
||||||
|
/// Set ONNX Runtime as inference backend, support CPU/GPU
|
||||||
|
void UseOrtBackend();
|
||||||
|
|
||||||
|
/// Set SOPHGO Runtime as inference backend, support CPU/GPU
|
||||||
|
void UseSophgoBackend();
|
||||||
|
|
||||||
|
/// Set TensorRT as inference backend, only support GPU
|
||||||
|
void UseTrtBackend();
|
||||||
|
|
||||||
|
/// Set Poros backend as inference backend, support CPU/GPU
|
||||||
|
void UsePorosBackend();
|
||||||
|
|
||||||
|
/// Set OpenVINO as inference backend, only support CPU
|
||||||
|
void UseOpenVINOBackend();
|
||||||
|
|
||||||
|
/// Set Paddle Lite as inference backend, only support arm cpu
|
||||||
|
void UseLiteBackend();
|
||||||
|
|
||||||
|
/// Wrapper function of UseLiteBackend()
|
||||||
|
void UsePaddleLiteBackend() { return UseLiteBackend(); }
|
||||||
|
|
||||||
|
/// Set mkldnn switch while using Paddle Inference as inference backend
|
||||||
|
void SetPaddleMKLDNN(bool pd_mkldnn = true);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @brief If TensorRT backend is used, EnablePaddleToTrt will change to use Paddle Inference backend, and use its integrated TensorRT instead.
|
||||||
|
*/
|
||||||
|
void EnablePaddleToTrt();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Delete pass by name while using Paddle Inference as inference backend, this can be called multiple times to delete a set of passes
|
||||||
|
*/
|
||||||
|
void DeletePaddleBackendPass(const std::string& delete_pass_name);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Enable print debug information while using Paddle Inference as inference backend, the backend disable the debug information by default
|
||||||
|
*/
|
||||||
|
void EnablePaddleLogInfo();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Disable print debug information while using Paddle Inference as inference backend
|
||||||
|
*/
|
||||||
|
void DisablePaddleLogInfo();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set shape cache size while using Paddle Inference with mkldnn, by default it will cache all the difference shape
|
||||||
|
*/
|
||||||
|
void SetPaddleMKLDNNCacheSize(int size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set device name for OpenVINO, default 'CPU', can also be 'AUTO', 'GPU', 'GPU.1'....
|
||||||
|
*/
|
||||||
|
void SetOpenVINODevice(const std::string& name = "CPU");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set shape info for OpenVINO
|
||||||
|
*/
|
||||||
|
void SetOpenVINOShapeInfo(
|
||||||
|
const std::map<std::string, std::vector<int64_t>>& shape_info) {
|
||||||
|
ov_shape_infos = shape_info;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief While use OpenVINO backend with intel GPU, use this interface to specify operators run on CPU
|
||||||
|
*/
|
||||||
|
void SetOpenVINOCpuOperators(const std::vector<std::string>& operators) {
|
||||||
|
ov_cpu_operators = operators;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set optimzed model dir for Paddle Lite backend.
|
||||||
|
*/
|
||||||
|
void SetLiteOptimizedModelDir(const std::string& optimized_model_dir);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set subgraph partition path for Paddle Lite backend.
|
||||||
|
*/
|
||||||
|
void SetLiteSubgraphPartitionPath(
|
||||||
|
const std::string& nnadapter_subgraph_partition_config_path);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set subgraph partition path for Paddle Lite backend.
|
||||||
|
*/
|
||||||
|
void SetLiteSubgraphPartitionConfigBuffer(
|
||||||
|
const std::string& nnadapter_subgraph_partition_config_buffer);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set device name for Paddle Lite backend.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
SetLiteDeviceNames(const std::vector<std::string>& nnadapter_device_names);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set context properties for Paddle Lite backend.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
SetLiteContextProperties(const std::string& nnadapter_context_properties);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set model cache dir for Paddle Lite backend.
|
||||||
|
*/
|
||||||
|
void SetLiteModelCacheDir(const std::string& nnadapter_model_cache_dir);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set dynamic shape info for Paddle Lite backend.
|
||||||
|
*/
|
||||||
|
void SetLiteDynamicShapeInfo(
|
||||||
|
const std::map<std::string, std::vector<std::vector<int64_t>>>&
|
||||||
|
nnadapter_dynamic_shape_info);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set mixed precision quantization config path for Paddle Lite backend.
|
||||||
|
*/
|
||||||
|
void SetLiteMixedPrecisionQuantizationConfigPath(
|
||||||
|
const std::string& nnadapter_mixed_precision_quantization_config_path);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief enable half precision while use paddle lite backend
|
||||||
|
*/
|
||||||
|
void EnableLiteFP16();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief disable half precision, change to full precision(float32)
|
||||||
|
*/
|
||||||
|
void DisableLiteFP16();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief enable int8 precision while use paddle lite backend
|
||||||
|
*/
|
||||||
|
void EnableLiteInt8();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief disable int8 precision, change to full precision(float32)
|
||||||
|
*/
|
||||||
|
void DisableLiteInt8();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set power mode while using Paddle Lite as inference backend, mode(0: LITE_POWER_HIGH; 1: LITE_POWER_LOW; 2: LITE_POWER_FULL; 3: LITE_POWER_NO_BIND, 4: LITE_POWER_RAND_HIGH; 5: LITE_POWER_RAND_LOW, refer [paddle lite](https://paddle-lite.readthedocs.io/zh/latest/api_reference/cxx_api_doc.html#set-power-mode) for more details)
|
||||||
|
*/
|
||||||
|
void SetLitePowerMode(LitePowerMode mode);
|
||||||
|
|
||||||
|
/** \brief Set shape range of input tensor for the model that contain dynamic input shape while using TensorRT backend
|
||||||
|
*
|
||||||
|
* \param[in] input_name The name of input for the model which is dynamic shape
|
||||||
|
* \param[in] min_shape The minimal shape for the input tensor
|
||||||
|
* \param[in] opt_shape The optimized shape for the input tensor, just set the most common shape, if set as default value, it will keep same with min_shape
|
||||||
|
* \param[in] max_shape The maximum shape for the input tensor, if set as default value, it will keep same with min_shape
|
||||||
|
*/
|
||||||
|
void SetTrtInputShape(
|
||||||
|
const std::string& input_name, const std::vector<int32_t>& min_shape,
|
||||||
|
const std::vector<int32_t>& opt_shape = std::vector<int32_t>(),
|
||||||
|
const std::vector<int32_t>& max_shape = std::vector<int32_t>());
|
||||||
|
|
||||||
|
/// Set max_workspace_size for TensorRT, default 1<<30
|
||||||
|
void SetTrtMaxWorkspaceSize(size_t trt_max_workspace_size);
|
||||||
|
|
||||||
|
/// Set max_batch_size for TensorRT, default 32
|
||||||
|
void SetTrtMaxBatchSize(size_t max_batch_size);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Enable FP16 inference while using TensorRT backend. Notice: not all the GPU device support FP16, on those device doesn't support FP16, FastDeploy will fallback to FP32 automaticly
|
||||||
|
*/
|
||||||
|
void EnableTrtFP16();
|
||||||
|
|
||||||
|
/// Disable FP16 inference while using TensorRT backend
|
||||||
|
void DisableTrtFP16();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set cache file path while use TensorRT backend. Loadding a Paddle/ONNX model and initialize TensorRT will take a long time, by this interface it will save the tensorrt engine to `cache_file_path`, and load it directly while execute the code again
|
||||||
|
*/
|
||||||
|
void SetTrtCacheFile(const std::string& cache_file_path);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Enable pinned memory. Pinned memory can be utilized to speedup the data transfer between CPU and GPU. Currently it's only suppurted in TRT backend and Paddle Inference backend.
|
||||||
|
*/
|
||||||
|
void EnablePinnedMemory();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Disable pinned memory
|
||||||
|
*/
|
||||||
|
void DisablePinnedMemory();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Enable to collect shape in paddle trt backend
|
||||||
|
*/
|
||||||
|
void EnablePaddleTrtCollectShape();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Disable to collect shape in paddle trt backend
|
||||||
|
*/
|
||||||
|
void DisablePaddleTrtCollectShape();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Prevent ops running in paddle trt backend
|
||||||
|
*/
|
||||||
|
void DisablePaddleTrtOPs(const std::vector<std::string>& ops);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @brief Set number of streams by the OpenVINO backends
|
||||||
|
*/
|
||||||
|
void SetOpenVINOStreams(int num_streams);
|
||||||
|
|
||||||
|
/** \Use Graphcore IPU to inference.
|
||||||
|
*
|
||||||
|
* \param[in] device_num the number of IPUs.
|
||||||
|
* \param[in] micro_batch_size the batch size in the graph, only work when graph has no batch shape info.
|
||||||
|
* \param[in] enable_pipelining enable pipelining.
|
||||||
|
* \param[in] batches_per_step the number of batches per run in pipelining.
|
||||||
|
*/
|
||||||
|
void UseIpu(int device_num = 1, int micro_batch_size = 1,
|
||||||
|
bool enable_pipelining = false, int batches_per_step = 1);
|
||||||
|
|
||||||
|
/** \brief Set IPU config.
|
||||||
|
*
|
||||||
|
* \param[in] enable_fp16 enable fp16.
|
||||||
|
* \param[in] replica_num the number of graph replication.
|
||||||
|
* \param[in] available_memory_proportion the available memory proportion for matmul/conv.
|
||||||
|
* \param[in] enable_half_partial enable fp16 partial for matmul, only work with fp16.
|
||||||
|
*/
|
||||||
|
void SetIpuConfig(bool enable_fp16 = false, int replica_num = 1,
|
||||||
|
float available_memory_proportion = 1.0,
|
||||||
|
bool enable_half_partial = false);
|
||||||
|
|
||||||
|
Backend backend = Backend::UNKNOWN;
|
||||||
|
// for cpu inference and preprocess
|
||||||
|
// default will let the backend choose their own default value
|
||||||
|
int cpu_thread_num = -1;
|
||||||
|
int device_id = 0;
|
||||||
|
|
||||||
|
Device device = Device::CPU;
|
||||||
|
|
||||||
|
void* external_stream_ = nullptr;
|
||||||
|
|
||||||
|
bool enable_pinned_memory = false;
|
||||||
|
|
||||||
|
// ======Only for ORT Backend========
|
||||||
|
// -1 means use default value by ort
|
||||||
|
// 0: ORT_DISABLE_ALL 1: ORT_ENABLE_BASIC 2: ORT_ENABLE_EXTENDED 3:
|
||||||
|
// ORT_ENABLE_ALL
|
||||||
|
int ort_graph_opt_level = -1;
|
||||||
|
int ort_inter_op_num_threads = -1;
|
||||||
|
// 0: ORT_SEQUENTIAL 1: ORT_PARALLEL
|
||||||
|
int ort_execution_mode = -1;
|
||||||
|
|
||||||
|
// ======Only for Paddle Backend=====
|
||||||
|
bool pd_enable_mkldnn = true;
|
||||||
|
bool pd_enable_log_info = false;
|
||||||
|
bool pd_enable_trt = false;
|
||||||
|
bool pd_collect_shape = false;
|
||||||
|
int pd_mkldnn_cache_size = 1;
|
||||||
|
std::vector<std::string> pd_delete_pass_names;
|
||||||
|
|
||||||
|
// ======Only for Paddle IPU Backend =======
|
||||||
|
int ipu_device_num = 1;
|
||||||
|
int ipu_micro_batch_size = 1;
|
||||||
|
bool ipu_enable_pipelining = false;
|
||||||
|
int ipu_batches_per_step = 1;
|
||||||
|
bool ipu_enable_fp16 = false;
|
||||||
|
int ipu_replica_num = 1;
|
||||||
|
float ipu_available_memory_proportion = 1.0;
|
||||||
|
bool ipu_enable_half_partial = false;
|
||||||
|
|
||||||
|
// ======Only for Paddle Lite Backend=====
|
||||||
|
// 0: LITE_POWER_HIGH 1: LITE_POWER_LOW 2: LITE_POWER_FULL
|
||||||
|
// 3: LITE_POWER_NO_BIND 4: LITE_POWER_RAND_HIGH
|
||||||
|
// 5: LITE_POWER_RAND_LOW
|
||||||
|
LitePowerMode lite_power_mode = LitePowerMode::LITE_POWER_NO_BIND;
|
||||||
|
// enable int8 or not
|
||||||
|
bool lite_enable_int8 = false;
|
||||||
|
// enable fp16 or not
|
||||||
|
bool lite_enable_fp16 = false;
|
||||||
|
// optimized model dir for CxxConfig
|
||||||
|
std::string lite_optimized_model_dir = "";
|
||||||
|
std::string lite_nnadapter_subgraph_partition_config_path = "";
|
||||||
|
// and other nnadapter settings for CxxConfig
|
||||||
|
std::string lite_nnadapter_subgraph_partition_config_buffer = "";
|
||||||
|
std::string lite_nnadapter_context_properties = "";
|
||||||
|
std::string lite_nnadapter_model_cache_dir = "";
|
||||||
|
std::string lite_nnadapter_mixed_precision_quantization_config_path = "";
|
||||||
|
std::map<std::string, std::vector<std::vector<int64_t>>>
|
||||||
|
lite_nnadapter_dynamic_shape_info = {{"", {{0}}}};
|
||||||
|
std::vector<std::string> lite_nnadapter_device_names = {};
|
||||||
|
|
||||||
|
bool enable_timvx = false;
|
||||||
|
bool enable_ascend = false;
|
||||||
|
bool enable_kunlunxin = false;
|
||||||
|
|
||||||
|
// ======Only for Trt Backend=======
|
||||||
|
std::map<std::string, std::vector<int32_t>> trt_max_shape;
|
||||||
|
std::map<std::string, std::vector<int32_t>> trt_min_shape;
|
||||||
|
std::map<std::string, std::vector<int32_t>> trt_opt_shape;
|
||||||
|
std::string trt_serialize_file = "";
|
||||||
|
bool trt_enable_fp16 = false;
|
||||||
|
bool trt_enable_int8 = false;
|
||||||
|
size_t trt_max_batch_size = 1;
|
||||||
|
size_t trt_max_workspace_size = 1 << 30;
|
||||||
|
// ======Only for PaddleTrt Backend=======
|
||||||
|
std::vector<std::string> trt_disabled_ops_{};
|
||||||
|
|
||||||
|
// ======Only for Poros Backend=======
|
||||||
|
bool is_dynamic = false;
|
||||||
|
bool long_to_int = true;
|
||||||
|
bool use_nvidia_tf32 = false;
|
||||||
|
int unconst_ops_thres = -1;
|
||||||
|
std::string poros_file = "";
|
||||||
|
|
||||||
|
// ======Only for OpenVINO Backend=======
|
||||||
|
int ov_num_streams = 0;
|
||||||
|
std::string openvino_device = "CPU";
|
||||||
|
std::map<std::string, std::vector<int64_t>> ov_shape_infos;
|
||||||
|
std::vector<std::string> ov_cpu_operators;
|
||||||
|
|
||||||
|
// ======Only for RKNPU2 Backend=======
|
||||||
|
fastdeploy::rknpu2::CpuName rknpu2_cpu_name_ =
|
||||||
|
fastdeploy::rknpu2::CpuName::RK3588;
|
||||||
|
fastdeploy::rknpu2::CoreMask rknpu2_core_mask_ =
|
||||||
|
fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO;
|
||||||
|
|
||||||
|
// ======Only for KunlunXin XPU Backend=======
|
||||||
|
int kunlunxin_l3_workspace_size = 0xfffc00;
|
||||||
|
bool kunlunxin_locked = false;
|
||||||
|
bool kunlunxin_autotune = true;
|
||||||
|
std::string kunlunxin_autotune_file = "";
|
||||||
|
std::string kunlunxin_precision = "int16";
|
||||||
|
bool kunlunxin_adaptive_seqlen = false;
|
||||||
|
bool kunlunxin_enable_multi_stream = false;
|
||||||
|
|
||||||
|
std::string model_file = ""; // Path of model file
|
||||||
|
std::string params_file = ""; // Path of parameters file, can be empty
|
||||||
|
// format of input model
|
||||||
|
ModelFormat model_format = ModelFormat::PADDLE;
|
||||||
|
|
||||||
|
std::string model_buffer_ = "";
|
||||||
|
std::string params_buffer_ = "";
|
||||||
|
size_t model_buffer_size_ = 0;
|
||||||
|
size_t params_buffer_size_ = 0;
|
||||||
|
bool model_from_memory_ = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace fastdeploy
|
@@ -75,14 +75,14 @@ YOLOv7End2EndTRT::YOLOv7End2EndTRT(const std::string& model_file,
|
|||||||
runtime_option.model_format = model_format;
|
runtime_option.model_format = model_format;
|
||||||
runtime_option.model_file = model_file;
|
runtime_option.model_file = model_file;
|
||||||
if (runtime_option.device != Device::GPU) {
|
if (runtime_option.device != Device::GPU) {
|
||||||
FDWARNING << Str(runtime_option.device)
|
FDWARNING << runtime_option.device
|
||||||
<< " is not support for YOLOv7End2EndTRT,"
|
<< " is not support for YOLOv7End2EndTRT,"
|
||||||
<< "will fallback to Device::GPU." << std::endl;
|
<< "will fallback to Device::GPU." << std::endl;
|
||||||
runtime_option.device = Device::GPU;
|
runtime_option.device = Device::GPU;
|
||||||
}
|
}
|
||||||
if (runtime_option.backend != Backend::UNKNOWN) {
|
if (runtime_option.backend != Backend::UNKNOWN) {
|
||||||
if (runtime_option.backend != Backend::TRT) {
|
if (runtime_option.backend != Backend::TRT) {
|
||||||
FDWARNING << Str(runtime_option.backend)
|
FDWARNING << runtime_option.backend
|
||||||
<< " is not support for YOLOv7End2EndTRT,"
|
<< " is not support for YOLOv7End2EndTRT,"
|
||||||
<< "will fallback to Backend::TRT." << std::endl;
|
<< "will fallback to Backend::TRT." << std::endl;
|
||||||
runtime_option.backend = Backend::TRT;
|
runtime_option.backend = Backend::TRT;
|
||||||
@@ -347,4 +347,4 @@ bool YOLOv7End2EndTRT::Predict(cv::Mat* im, DetectionResult* result,
|
|||||||
|
|
||||||
} // namespace detection
|
} // namespace detection
|
||||||
} // namespace vision
|
} // namespace vision
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
Reference in New Issue
Block a user