Fix outputs order of tensorrt (#18)

* fix trt output order

* Update trt_backend.cc
This commit is contained in:
Jason
2022-07-14 19:19:56 +08:00
committed by GitHub
parent de7c06a309
commit 90061e11f5
4 changed files with 50 additions and 24 deletions

View File

@@ -43,7 +43,7 @@ else()
endif(WIN32) endif(WIN32)
set(PADDLE2ONNX_URL_BASE "https://bj.bcebos.com/paddle2onnx/libs/") set(PADDLE2ONNX_URL_BASE "https://bj.bcebos.com/paddle2onnx/libs/")
set(PADDLE2ONNX_VERSION "0.9.9") set(PADDLE2ONNX_VERSION "1.0.0rc1")
if(WIN32) if(WIN32)
set(PADDLE2ONNX_FILE "paddle2onnx-win-x64-${PADDLE2ONNX_VERSION}.zip") set(PADDLE2ONNX_FILE "paddle2onnx-win-x64-${PADDLE2ONNX_VERSION}.zip")
elseif(APPLE) elseif(APPLE)

View File

@@ -52,7 +52,7 @@ std::vector<int> toVec(const nvinfer1::Dims& dim) {
return out; return out;
} }
bool TrtBackend::InitFromTrt(const std::string& trt_engine_file, bool TrtBackend::InitFromTrt(const std::string& trt_engine_file,
const TrtBackendOption& option) { const TrtBackendOption& option) {
if (initialized_) { if (initialized_) {
FDERROR << "TrtBackend is already initlized, cannot initialize again." FDERROR << "TrtBackend is already initlized, cannot initialize again."
@@ -139,17 +139,6 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file,
} }
cudaSetDevice(option.gpu_id); cudaSetDevice(option.gpu_id);
if (option.serialize_file != "") {
std::ifstream fin(option.serialize_file, std::ios::binary | std::ios::in);
if (fin) {
FDLogger() << "Detect serialized TensorRT Engine file in "
<< option.serialize_file << ", will load it directly."
<< std::endl;
fin.close();
return InitFromTrt(option.serialize_file);
}
}
std::string onnx_content = ""; std::string onnx_content = "";
if (!from_memory_buffer) { if (!from_memory_buffer) {
std::ifstream fin(model_file.c_str(), std::ios::binary | std::ios::in); std::ifstream fin(model_file.c_str(), std::ios::binary | std::ios::in);
@@ -167,6 +156,29 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file,
onnx_content = model_file; onnx_content = model_file;
} }
// This part of code will record the original outputs order
// because the converted tensorrt network may exist wrong order of outputs
outputs_order_.clear();
auto onnx_reader =
paddle2onnx::OnnxReader(onnx_content.c_str(), onnx_content.size());
for (int i = 0; i < onnx_reader.NumOutputs(); ++i) {
std::string name(
onnx_reader.output_names[i],
onnx_reader.output_names[i] + strlen(onnx_reader.output_names[i]));
outputs_order_[name] = i;
}
if (option.serialize_file != "") {
std::ifstream fin(option.serialize_file, std::ios::binary | std::ios::in);
if (fin) {
FDLogger() << "Detect serialized TensorRT Engine file in "
<< option.serialize_file << ", will load it directly."
<< std::endl;
fin.close();
return InitFromTrt(option.serialize_file);
}
}
if (!CreateTrtEngine(onnx_content, option)) { if (!CreateTrtEngine(onnx_content, option)) {
return false; return false;
} }
@@ -251,13 +263,20 @@ void TrtBackend::AllocateBufferInDynamicShape(
for (size_t i = 0; i < outputs_desc_.size(); ++i) { for (size_t i = 0; i < outputs_desc_.size(); ++i) {
auto idx = engine_->getBindingIndex(outputs_desc_[i].name.c_str()); auto idx = engine_->getBindingIndex(outputs_desc_[i].name.c_str());
auto output_dims = context_->getBindingDimensions(idx); auto output_dims = context_->getBindingDimensions(idx);
(*outputs)[i].dtype = GetFDDataType(outputs_desc_[i].dtype);
(*outputs)[i].shape.assign(output_dims.d, // find the original index of output
output_dims.d + output_dims.nbDims); auto iter = outputs_order_.find(outputs_desc_[i].name);
(*outputs)[i].name = outputs_desc_[i].name; FDASSERT(iter != outputs_order_.end(),
(*outputs)[i].data.resize(volume(output_dims) * "Cannot find output:" + outputs_desc_[i].name +
TrtDataTypeSize(outputs_desc_[i].dtype)); " of tensorrt network from the original model.");
if ((*outputs)[i].Nbytes() > auto ori_idx = iter->second;
(*outputs)[ori_idx].dtype = GetFDDataType(outputs_desc_[i].dtype);
(*outputs)[ori_idx].shape.assign(output_dims.d,
output_dims.d + output_dims.nbDims);
(*outputs)[ori_idx].name = outputs_desc_[i].name;
(*outputs)[ori_idx].data.resize(volume(output_dims) *
TrtDataTypeSize(outputs_desc_[i].dtype));
if ((*outputs)[ori_idx].Nbytes() >
outputs_buffer_[outputs_desc_[i].name].nbBytes()) { outputs_buffer_[outputs_desc_[i].name].nbBytes()) {
outputs_buffer_[outputs_desc_[i].name].resize(output_dims); outputs_buffer_[outputs_desc_[i].name].resize(output_dims);
bindings_[idx] = outputs_buffer_[outputs_desc_[i].name].data(); bindings_[idx] = outputs_buffer_[outputs_desc_[i].name].data();

View File

@@ -28,8 +28,8 @@
#include "fastdeploy/backends/tensorrt/common/parserOnnxConfig.h" #include "fastdeploy/backends/tensorrt/common/parserOnnxConfig.h"
#include "fastdeploy/backends/tensorrt/common/sampleUtils.h" #include "fastdeploy/backends/tensorrt/common/sampleUtils.h"
#include "NvInfer.h"
#include <cuda_runtime_api.h> #include <cuda_runtime_api.h>
#include "NvInfer.h"
namespace fastdeploy { namespace fastdeploy {
using namespace samplesCommon; using namespace samplesCommon;
@@ -69,7 +69,7 @@ class TrtBackend : public BaseBackend {
bool InitFromOnnx(const std::string& model_file, bool InitFromOnnx(const std::string& model_file,
const TrtBackendOption& option = TrtBackendOption(), const TrtBackendOption& option = TrtBackendOption(),
bool from_memory_buffer = false); bool from_memory_buffer = false);
bool InitFromTrt(const std::string& trt_engine_file, bool InitFromTrt(const std::string& trt_engine_file,
const TrtBackendOption& option = TrtBackendOption()); const TrtBackendOption& option = TrtBackendOption());
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs); bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs);
@@ -89,6 +89,13 @@ class TrtBackend : public BaseBackend {
std::map<std::string, DeviceBuffer> inputs_buffer_; std::map<std::string, DeviceBuffer> inputs_buffer_;
std::map<std::string, DeviceBuffer> outputs_buffer_; std::map<std::string, DeviceBuffer> outputs_buffer_;
// Sometimes while the number of outputs > 1
// the output order of tensorrt may not be same
// with the original onnx model
// So this parameter will record to origin outputs
// order, to help recover the rigt order
std::map<std::string, int> outputs_order_;
void GetInputOutputInfo(); void GetInputOutputInfo();
void AllocateBufferInDynamicShape(const std::vector<FDTensor>& inputs, void AllocateBufferInDynamicShape(const std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs); std::vector<FDTensor>* outputs);
@@ -96,4 +103,4 @@ class TrtBackend : public BaseBackend {
const TrtBackendOption& option); const TrtBackendOption& option);
}; };
} // namespace fastdeploy } // namespace fastdeploy

View File

@@ -5,7 +5,7 @@ CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) # add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
# 指定下载解压后的fastdeploy库路径 # 指定下载解压后的fastdeploy库路径
set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.0.3/) set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/)
include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)