mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-07 01:22:59 +08:00
[Backend] Enable TensorRT BatchedNMSDynamic_TRT plugin (#449)
* Enable TensorRT EfficientNMS plugin * remove some temporary code * Update trt_backend.cc * Update utils.h
This commit is contained in:
@@ -50,7 +50,6 @@ if(ANDROID)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
############################# Basic Options for FastDeploy ################################
|
############################# Basic Options for FastDeploy ################################
|
||||||
option(ENABLE_PADDLE_FRONTEND "Whether to enable PaddlePaddle frontend to support load paddle model in fastdeploy." ON)
|
|
||||||
option(WITH_GPU "Whether WITH_GPU=ON, will enable onnxruntime-gpu/paddle-infernce-gpu/poros-gpu" OFF)
|
option(WITH_GPU "Whether WITH_GPU=ON, will enable onnxruntime-gpu/paddle-infernce-gpu/poros-gpu" OFF)
|
||||||
option(WITH_IPU "Whether WITH_IPU=ON, will enable paddle-infernce-ipu" OFF)
|
option(WITH_IPU "Whether WITH_IPU=ON, will enable paddle-infernce-ipu" OFF)
|
||||||
option(ENABLE_ORT_BACKEND "Whether to enable onnxruntime backend." OFF)
|
option(ENABLE_ORT_BACKEND "Whether to enable onnxruntime backend." OFF)
|
||||||
@@ -190,13 +189,8 @@ if(WITH_SW)
|
|||||||
add_definitions(-DEIGEN_AVOID_THREAD_LOCAL)
|
add_definitions(-DEIGEN_AVOID_THREAD_LOCAL)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(ENABLE_PADDLE_FRONTEND)
|
|
||||||
add_definitions(-DENABLE_PADDLE_FRONTEND)
|
|
||||||
include(${PROJECT_SOURCE_DIR}/cmake/paddle2onnx.cmake)
|
|
||||||
list(APPEND DEPEND_LIBS external_paddle2onnx)
|
|
||||||
endif(ENABLE_PADDLE_FRONTEND)
|
|
||||||
|
|
||||||
if(ENABLE_ORT_BACKEND)
|
if(ENABLE_ORT_BACKEND)
|
||||||
|
set(ENABLE_PADDLE_FRONTEND ON)
|
||||||
add_definitions(-DENABLE_ORT_BACKEND)
|
add_definitions(-DENABLE_ORT_BACKEND)
|
||||||
list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS})
|
list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS})
|
||||||
include(${PROJECT_SOURCE_DIR}/cmake/onnxruntime.cmake)
|
include(${PROJECT_SOURCE_DIR}/cmake/onnxruntime.cmake)
|
||||||
@@ -224,6 +218,7 @@ if(ENABLE_PADDLE_BACKEND)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(ENABLE_OPENVINO_BACKEND)
|
if(ENABLE_OPENVINO_BACKEND)
|
||||||
|
set(ENABLE_PADDLE_FRONTEND ON)
|
||||||
add_definitions(-DENABLE_OPENVINO_BACKEND)
|
add_definitions(-DENABLE_OPENVINO_BACKEND)
|
||||||
list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_OPENVINO_SRCS})
|
list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_OPENVINO_SRCS})
|
||||||
include(${PROJECT_SOURCE_DIR}/cmake/openvino.cmake)
|
include(${PROJECT_SOURCE_DIR}/cmake/openvino.cmake)
|
||||||
@@ -329,6 +324,7 @@ if(WITH_IPU)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(ENABLE_TRT_BACKEND)
|
if(ENABLE_TRT_BACKEND)
|
||||||
|
set(ENABLE_PADDLE_FRONTEND ON)
|
||||||
if(APPLE OR ANDROID OR IOS)
|
if(APPLE OR ANDROID OR IOS)
|
||||||
message(FATAL_ERROR "Cannot enable tensorrt backend in mac/ios/android os, please set -DENABLE_TRT_BACKEND=OFF.")
|
message(FATAL_ERROR "Cannot enable tensorrt backend in mac/ios/android os, please set -DENABLE_TRT_BACKEND=OFF.")
|
||||||
endif()
|
endif()
|
||||||
@@ -382,7 +378,6 @@ endif()
|
|||||||
|
|
||||||
if(ENABLE_VISION)
|
if(ENABLE_VISION)
|
||||||
add_definitions(-DENABLE_VISION)
|
add_definitions(-DENABLE_VISION)
|
||||||
# set(ENABLE_VISION_VISUALIZE ON)
|
|
||||||
add_definitions(-DENABLE_VISION_VISUALIZE)
|
add_definitions(-DENABLE_VISION_VISUALIZE)
|
||||||
if(ENABLE_OPENCV_CUDA)
|
if(ENABLE_OPENCV_CUDA)
|
||||||
if(NOT WITH_GPU)
|
if(NOT WITH_GPU)
|
||||||
@@ -424,6 +419,13 @@ if(ENABLE_TEXT)
|
|||||||
include(${PROJECT_SOURCE_DIR}/cmake/faster_tokenizer.cmake)
|
include(${PROJECT_SOURCE_DIR}/cmake/faster_tokenizer.cmake)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(ENABLE_PADDLE_FRONTEND)
|
||||||
|
add_definitions(-DENABLE_PADDLE_FRONTEND)
|
||||||
|
include(${PROJECT_SOURCE_DIR}/cmake/paddle2onnx.cmake)
|
||||||
|
list(APPEND DEPEND_LIBS external_paddle2onnx)
|
||||||
|
endif(ENABLE_PADDLE_FRONTEND)
|
||||||
|
|
||||||
|
|
||||||
configure_file(${PROJECT_SOURCE_DIR}/FastDeploy.cmake.in ${PROJECT_SOURCE_DIR}/FastDeploy.cmake @ONLY)
|
configure_file(${PROJECT_SOURCE_DIR}/FastDeploy.cmake.in ${PROJECT_SOURCE_DIR}/FastDeploy.cmake @ONLY)
|
||||||
configure_file(${PROJECT_SOURCE_DIR}/python/fastdeploy/c_lib_wrap.py.in ${PROJECT_SOURCE_DIR}/python/fastdeploy/c_lib_wrap.py)
|
configure_file(${PROJECT_SOURCE_DIR}/python/fastdeploy/c_lib_wrap.py.in ${PROJECT_SOURCE_DIR}/python/fastdeploy/c_lib_wrap.py)
|
||||||
configure_file(${PROJECT_SOURCE_DIR}/python/scripts/process_libraries.py.in ${PROJECT_SOURCE_DIR}/python/scripts/process_libraries.py)
|
configure_file(${PROJECT_SOURCE_DIR}/python/scripts/process_libraries.py.in ${PROJECT_SOURCE_DIR}/python/scripts/process_libraries.py)
|
||||||
|
@@ -43,13 +43,14 @@ else()
|
|||||||
endif(WIN32)
|
endif(WIN32)
|
||||||
|
|
||||||
set(PADDLE2ONNX_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
|
set(PADDLE2ONNX_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
|
||||||
set(PADDLE2ONNX_VERSION "1.0.1")
|
set(PADDLE2ONNX_VERSION "1.0.2rc")
|
||||||
if(WIN32)
|
if(WIN32)
|
||||||
set(PADDLE2ONNX_FILE "paddle2onnx-win-x64-${PADDLE2ONNX_VERSION}.zip")
|
set(PADDLE2ONNX_FILE "paddle2onnx-win-x64-${PADDLE2ONNX_VERSION}.zip")
|
||||||
if(NOT CMAKE_CL_64)
|
if(NOT CMAKE_CL_64)
|
||||||
set(PADDLE2ONNX_FILE "paddle2onnx-win-x86-${PADDLE2ONNX_VERSION}.zip")
|
set(PADDLE2ONNX_FILE "paddle2onnx-win-x86-${PADDLE2ONNX_VERSION}.zip")
|
||||||
endif()
|
endif()
|
||||||
elseif(APPLE)
|
elseif(APPLE)
|
||||||
|
set(PADDLE2ONNX_VERSION "1.0.1")
|
||||||
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64")
|
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "arm64")
|
||||||
set(PADDLE2ONNX_FILE "paddle2onnx-osx-arm64-${PADDLE2ONNX_VERSION}.tgz")
|
set(PADDLE2ONNX_FILE "paddle2onnx-osx-arm64-${PADDLE2ONNX_VERSION}.tgz")
|
||||||
else()
|
else()
|
||||||
|
@@ -80,21 +80,18 @@ bool OrtBackend::InitFromPaddle(const std::string& model_file,
|
|||||||
<< std::endl;
|
<< std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
#ifdef ENABLE_PADDLE_FRONTEND
|
|
||||||
char* model_content_ptr;
|
char* model_content_ptr;
|
||||||
int model_content_size = 0;
|
int model_content_size = 0;
|
||||||
|
|
||||||
std::vector<paddle2onnx::CustomOp> custom_ops;
|
#ifdef ENABLE_PADDLE_FRONTEND
|
||||||
for (auto& item : option.custom_op_info_) {
|
paddle2onnx::CustomOp op;
|
||||||
paddle2onnx::CustomOp op;
|
strcpy(op.op_name, "multiclass_nms3");
|
||||||
strcpy(op.op_name, item.first.c_str());
|
strcpy(op.export_op_name, "MultiClassNMS");
|
||||||
strcpy(op.export_op_name, item.second.c_str());
|
|
||||||
custom_ops.emplace_back(op);
|
|
||||||
}
|
|
||||||
if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
|
if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
|
||||||
&model_content_ptr, &model_content_size, 11, true,
|
&model_content_ptr, &model_content_size, 11, true,
|
||||||
verbose, true, true, true, custom_ops.data(),
|
verbose, true, true, true, &op,
|
||||||
custom_ops.size())) {
|
1)) {
|
||||||
FDERROR << "Error occured while export PaddlePaddle to ONNX format."
|
FDERROR << "Error occured while export PaddlePaddle to ONNX format."
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
return false;
|
return false;
|
||||||
@@ -106,7 +103,7 @@ bool OrtBackend::InitFromPaddle(const std::string& model_file,
|
|||||||
model_content_ptr = nullptr;
|
model_content_ptr = nullptr;
|
||||||
return InitFromOnnx(onnx_model_proto, option, true);
|
return InitFromOnnx(onnx_model_proto, option, true);
|
||||||
#else
|
#else
|
||||||
FDERROR << "Didn't compile with PaddlePaddle frontend, you can try to "
|
FDERROR << "Didn't compile with PaddlePaddle Frontend, you can try to "
|
||||||
"call `InitFromOnnx` instead."
|
"call `InitFromOnnx` instead."
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
#endif
|
#endif
|
||||||
|
@@ -124,48 +124,20 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file,
|
|||||||
option_ = option;
|
option_ = option;
|
||||||
|
|
||||||
#ifdef ENABLE_PADDLE_FRONTEND
|
#ifdef ENABLE_PADDLE_FRONTEND
|
||||||
std::vector<paddle2onnx::CustomOp> custom_ops;
|
|
||||||
for (auto& item : option_.custom_op_info_) {
|
|
||||||
paddle2onnx::CustomOp op;
|
|
||||||
std::strcpy(op.op_name, item.first.c_str());
|
|
||||||
std::strcpy(op.export_op_name, item.second.c_str());
|
|
||||||
custom_ops.emplace_back(op);
|
|
||||||
}
|
|
||||||
char* model_content_ptr;
|
char* model_content_ptr;
|
||||||
int model_content_size = 0;
|
int model_content_size = 0;
|
||||||
char* calibration_cache_ptr;
|
char* calibration_cache_ptr;
|
||||||
int calibration_cache_size = 0;
|
int calibration_cache_size = 0;
|
||||||
if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
|
if (!paddle2onnx::Export(model_file.c_str(), params_file.c_str(),
|
||||||
&model_content_ptr, &model_content_size, 11, true,
|
&model_content_ptr, &model_content_size, 11, true,
|
||||||
verbose, true, true, true, custom_ops.data(),
|
verbose, true, true, true, nullptr,
|
||||||
custom_ops.size(), "tensorrt",
|
0, "tensorrt",
|
||||||
&calibration_cache_ptr, &calibration_cache_size)) {
|
&calibration_cache_ptr, &calibration_cache_size)) {
|
||||||
FDERROR << "Error occured while export PaddlePaddle to ONNX format."
|
FDERROR << "Error occured while export PaddlePaddle to ONNX format."
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (option_.remove_multiclass_nms_) {
|
|
||||||
char* new_model = nullptr;
|
|
||||||
int new_model_size = 0;
|
|
||||||
if (!paddle2onnx::RemoveMultiClassNMS(model_content_ptr, model_content_size,
|
|
||||||
&new_model, &new_model_size)) {
|
|
||||||
FDERROR << "Try to remove MultiClassNMS failed." << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
delete[] model_content_ptr;
|
|
||||||
std::string onnx_model_proto(new_model, new_model + new_model_size);
|
|
||||||
delete[] new_model;
|
|
||||||
if (calibration_cache_size) {
|
|
||||||
std::string calibration_str(
|
|
||||||
calibration_cache_ptr,
|
|
||||||
calibration_cache_ptr + calibration_cache_size);
|
|
||||||
calibration_str_ = calibration_str;
|
|
||||||
delete[] calibration_cache_ptr;
|
|
||||||
}
|
|
||||||
return InitFromOnnx(onnx_model_proto, option, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string onnx_model_proto(model_content_ptr,
|
std::string onnx_model_proto(model_content_ptr,
|
||||||
model_content_ptr + model_content_size);
|
model_content_ptr + model_content_size);
|
||||||
delete[] model_content_ptr;
|
delete[] model_content_ptr;
|
||||||
|
@@ -73,10 +73,6 @@ struct TrtBackendOption {
|
|||||||
std::string serialize_file = "";
|
std::string serialize_file = "";
|
||||||
bool enable_pinned_memory = false;
|
bool enable_pinned_memory = false;
|
||||||
void* external_stream_ = nullptr;
|
void* external_stream_ = nullptr;
|
||||||
|
|
||||||
// inside parameter, maybe remove next version
|
|
||||||
bool remove_multiclass_nms_ = false;
|
|
||||||
std::map<std::string, std::string> custom_op_info_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<int> toVec(const nvinfer1::Dims& dim);
|
std::vector<int> toVec(const nvinfer1::Dims& dim);
|
||||||
|
@@ -675,10 +675,6 @@ void Runtime::CreateOrtBackend() {
|
|||||||
ort_option.gpu_id = option.device_id;
|
ort_option.gpu_id = option.device_id;
|
||||||
ort_option.external_stream_ = option.external_stream_;
|
ort_option.external_stream_ = option.external_stream_;
|
||||||
|
|
||||||
// TODO(jiangjiajun): inside usage, maybe remove this later
|
|
||||||
ort_option.remove_multiclass_nms_ = option.remove_multiclass_nms_;
|
|
||||||
ort_option.custom_op_info_ = option.custom_op_info_;
|
|
||||||
|
|
||||||
FDASSERT(option.model_format == ModelFormat::PADDLE ||
|
FDASSERT(option.model_format == ModelFormat::PADDLE ||
|
||||||
option.model_format == ModelFormat::ONNX,
|
option.model_format == ModelFormat::ONNX,
|
||||||
"OrtBackend only support model format of ModelFormat::PADDLE / "
|
"OrtBackend only support model format of ModelFormat::PADDLE / "
|
||||||
@@ -715,10 +711,6 @@ void Runtime::CreateTrtBackend() {
|
|||||||
trt_option.enable_pinned_memory = option.enable_pinned_memory;
|
trt_option.enable_pinned_memory = option.enable_pinned_memory;
|
||||||
trt_option.external_stream_ = option.external_stream_;
|
trt_option.external_stream_ = option.external_stream_;
|
||||||
|
|
||||||
// TODO(jiangjiajun): inside usage, maybe remove this later
|
|
||||||
trt_option.remove_multiclass_nms_ = option.remove_multiclass_nms_;
|
|
||||||
trt_option.custom_op_info_ = option.custom_op_info_;
|
|
||||||
|
|
||||||
FDASSERT(option.model_format == ModelFormat::PADDLE ||
|
FDASSERT(option.model_format == ModelFormat::PADDLE ||
|
||||||
option.model_format == ModelFormat::ONNX,
|
option.model_format == ModelFormat::ONNX,
|
||||||
"TrtBackend only support model format of ModelFormat::PADDLE / "
|
"TrtBackend only support model format of ModelFormat::PADDLE / "
|
||||||
|
6
fastdeploy/runtime.h
Executable file → Normal file
6
fastdeploy/runtime.h
Executable file → Normal file
@@ -338,12 +338,6 @@ struct FASTDEPLOY_DECL RuntimeOption {
|
|||||||
std::string model_file = ""; // Path of model file
|
std::string model_file = ""; // Path of model file
|
||||||
std::string params_file = ""; // Path of parameters file, can be empty
|
std::string params_file = ""; // Path of parameters file, can be empty
|
||||||
ModelFormat model_format = ModelFormat::AUTOREC; // format of input model
|
ModelFormat model_format = ModelFormat::AUTOREC; // format of input model
|
||||||
|
|
||||||
// inside parameters, only for inside usage
|
|
||||||
// remove multiclass_nms in Paddle2ONNX
|
|
||||||
bool remove_multiclass_nms_ = false;
|
|
||||||
// for Paddle2ONNX to export custom operators
|
|
||||||
std::map<std::string, std::string> custom_op_info_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/*! @brief Runtime object used to inference the loaded model on different devices
|
/*! @brief Runtime object used to inference the loaded model on different devices
|
||||||
|
@@ -46,13 +46,6 @@ void PPYOLOE::GetNmsInfo() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool PPYOLOE::Initialize() {
|
bool PPYOLOE::Initialize() {
|
||||||
#ifdef ENABLE_PADDLE_FRONTEND
|
|
||||||
// remove multiclass_nms3 now
|
|
||||||
// this is a trick operation for ppyoloe while inference on trt
|
|
||||||
GetNmsInfo();
|
|
||||||
runtime_option.remove_multiclass_nms_ = true;
|
|
||||||
runtime_option.custom_op_info_["multiclass_nms3"] = "MultiClassNMS";
|
|
||||||
#endif
|
|
||||||
if (!BuildPreprocessPipelineFromConfig()) {
|
if (!BuildPreprocessPipelineFromConfig()) {
|
||||||
FDERROR << "Failed to build preprocess pipeline from configuration file."
|
FDERROR << "Failed to build preprocess pipeline from configuration file."
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
@@ -63,16 +56,6 @@ bool PPYOLOE::Initialize() {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (has_nms_ && runtime_option.backend == Backend::TRT) {
|
|
||||||
FDINFO << "Detected operator multiclass_nms3 in your model, will replace "
|
|
||||||
"it with fastdeploy::backend::MultiClassNMS(background_label="
|
|
||||||
<< background_label << ", keep_top_k=" << keep_top_k
|
|
||||||
<< ", nms_eta=" << nms_eta << ", nms_threshold=" << nms_threshold
|
|
||||||
<< ", score_threshold=" << score_threshold
|
|
||||||
<< ", nms_top_k=" << nms_top_k << ", normalized=" << normalized
|
|
||||||
<< ")." << std::endl;
|
|
||||||
has_nms_ = false;
|
|
||||||
}
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -198,6 +181,7 @@ bool PPYOLOE::Postprocess(std::vector<FDTensor>& infer_result,
|
|||||||
FDASSERT(infer_result[1].shape[0] == 1,
|
FDASSERT(infer_result[1].shape[0] == 1,
|
||||||
"Only support batch = 1 in FastDeploy now.");
|
"Only support batch = 1 in FastDeploy now.");
|
||||||
|
|
||||||
|
has_nms_ = true;
|
||||||
if (!has_nms_) {
|
if (!has_nms_) {
|
||||||
int boxes_index = 0;
|
int boxes_index = 0;
|
||||||
int scores_index = 1;
|
int scores_index = 1;
|
||||||
@@ -237,19 +221,23 @@ bool PPYOLOE::Postprocess(std::vector<FDTensor>& infer_result,
|
|||||||
nms.out_box_data[i * 6 + 4], nms.out_box_data[i * 6 + 5]});
|
nms.out_box_data[i * 6 + 4], nms.out_box_data[i * 6 + 5]});
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
int box_num = 0;
|
std::vector<int> num_boxes(infer_result[1].shape[0]);
|
||||||
if (infer_result[1].dtype == FDDataType::INT32) {
|
if (infer_result[1].dtype == FDDataType::INT32) {
|
||||||
box_num = *(static_cast<int32_t*>(infer_result[1].Data()));
|
int32_t* data = static_cast<int32_t*>(infer_result[1].Data());
|
||||||
|
for (size_t i = 0; i < infer_result[1].shape[0]; ++i) {
|
||||||
|
num_boxes[i] = static_cast<int>(data[i]);
|
||||||
|
}
|
||||||
} else if (infer_result[1].dtype == FDDataType::INT64) {
|
} else if (infer_result[1].dtype == FDDataType::INT64) {
|
||||||
box_num = *(static_cast<int64_t*>(infer_result[1].Data()));
|
int64_t* data = static_cast<int64_t*>(infer_result[1].Data());
|
||||||
} else {
|
for (size_t i = 0; i < infer_result[1].shape[0]; ++i) {
|
||||||
FDASSERT(
|
num_boxes[i] = static_cast<int>(data[i]);
|
||||||
false,
|
}
|
||||||
"The output box_num of PPYOLOE model should be type of int32/int64.");
|
|
||||||
}
|
}
|
||||||
result->Reserve(box_num);
|
|
||||||
|
// Only support batch = 1 now
|
||||||
|
result->Reserve(num_boxes[0]);
|
||||||
float* box_data = static_cast<float*>(infer_result[0].Data());
|
float* box_data = static_cast<float*>(infer_result[0].Data());
|
||||||
for (size_t i = 0; i < box_num; ++i) {
|
for (size_t i = 0; i < num_boxes[0]; ++i) {
|
||||||
result->label_ids.push_back(box_data[i * 6]);
|
result->label_ids.push_back(box_data[i * 6]);
|
||||||
result->scores.push_back(box_data[i * 6 + 1]);
|
result->scores.push_back(box_data[i * 6 + 1]);
|
||||||
result->boxes.emplace_back(
|
result->boxes.emplace_back(
|
||||||
|
Reference in New Issue
Block a user