mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 09:07:10 +08:00
Fix outputs order of tensorrt (#18)
* fix trt output order * Update trt_backend.cc
This commit is contained in:
2
external/paddle2onnx.cmake
vendored
2
external/paddle2onnx.cmake
vendored
@@ -43,7 +43,7 @@ else()
|
||||
endif(WIN32)
|
||||
|
||||
set(PADDLE2ONNX_URL_BASE "https://bj.bcebos.com/paddle2onnx/libs/")
|
||||
set(PADDLE2ONNX_VERSION "0.9.9")
|
||||
set(PADDLE2ONNX_VERSION "1.0.0rc1")
|
||||
if(WIN32)
|
||||
set(PADDLE2ONNX_FILE "paddle2onnx-win-x64-${PADDLE2ONNX_VERSION}.zip")
|
||||
elseif(APPLE)
|
||||
|
@@ -139,17 +139,6 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file,
|
||||
}
|
||||
cudaSetDevice(option.gpu_id);
|
||||
|
||||
if (option.serialize_file != "") {
|
||||
std::ifstream fin(option.serialize_file, std::ios::binary | std::ios::in);
|
||||
if (fin) {
|
||||
FDLogger() << "Detect serialized TensorRT Engine file in "
|
||||
<< option.serialize_file << ", will load it directly."
|
||||
<< std::endl;
|
||||
fin.close();
|
||||
return InitFromTrt(option.serialize_file);
|
||||
}
|
||||
}
|
||||
|
||||
std::string onnx_content = "";
|
||||
if (!from_memory_buffer) {
|
||||
std::ifstream fin(model_file.c_str(), std::ios::binary | std::ios::in);
|
||||
@@ -167,6 +156,29 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file,
|
||||
onnx_content = model_file;
|
||||
}
|
||||
|
||||
// This part of code will record the original outputs order
|
||||
// because the converted tensorrt network may exist wrong order of outputs
|
||||
outputs_order_.clear();
|
||||
auto onnx_reader =
|
||||
paddle2onnx::OnnxReader(onnx_content.c_str(), onnx_content.size());
|
||||
for (int i = 0; i < onnx_reader.NumOutputs(); ++i) {
|
||||
std::string name(
|
||||
onnx_reader.output_names[i],
|
||||
onnx_reader.output_names[i] + strlen(onnx_reader.output_names[i]));
|
||||
outputs_order_[name] = i;
|
||||
}
|
||||
|
||||
if (option.serialize_file != "") {
|
||||
std::ifstream fin(option.serialize_file, std::ios::binary | std::ios::in);
|
||||
if (fin) {
|
||||
FDLogger() << "Detect serialized TensorRT Engine file in "
|
||||
<< option.serialize_file << ", will load it directly."
|
||||
<< std::endl;
|
||||
fin.close();
|
||||
return InitFromTrt(option.serialize_file);
|
||||
}
|
||||
}
|
||||
|
||||
if (!CreateTrtEngine(onnx_content, option)) {
|
||||
return false;
|
||||
}
|
||||
@@ -251,13 +263,20 @@ void TrtBackend::AllocateBufferInDynamicShape(
|
||||
for (size_t i = 0; i < outputs_desc_.size(); ++i) {
|
||||
auto idx = engine_->getBindingIndex(outputs_desc_[i].name.c_str());
|
||||
auto output_dims = context_->getBindingDimensions(idx);
|
||||
(*outputs)[i].dtype = GetFDDataType(outputs_desc_[i].dtype);
|
||||
(*outputs)[i].shape.assign(output_dims.d,
|
||||
|
||||
// find the original index of output
|
||||
auto iter = outputs_order_.find(outputs_desc_[i].name);
|
||||
FDASSERT(iter != outputs_order_.end(),
|
||||
"Cannot find output:" + outputs_desc_[i].name +
|
||||
" of tensorrt network from the original model.");
|
||||
auto ori_idx = iter->second;
|
||||
(*outputs)[ori_idx].dtype = GetFDDataType(outputs_desc_[i].dtype);
|
||||
(*outputs)[ori_idx].shape.assign(output_dims.d,
|
||||
output_dims.d + output_dims.nbDims);
|
||||
(*outputs)[i].name = outputs_desc_[i].name;
|
||||
(*outputs)[i].data.resize(volume(output_dims) *
|
||||
(*outputs)[ori_idx].name = outputs_desc_[i].name;
|
||||
(*outputs)[ori_idx].data.resize(volume(output_dims) *
|
||||
TrtDataTypeSize(outputs_desc_[i].dtype));
|
||||
if ((*outputs)[i].Nbytes() >
|
||||
if ((*outputs)[ori_idx].Nbytes() >
|
||||
outputs_buffer_[outputs_desc_[i].name].nbBytes()) {
|
||||
outputs_buffer_[outputs_desc_[i].name].resize(output_dims);
|
||||
bindings_[idx] = outputs_buffer_[outputs_desc_[i].name].data();
|
||||
|
@@ -28,8 +28,8 @@
|
||||
#include "fastdeploy/backends/tensorrt/common/parserOnnxConfig.h"
|
||||
#include "fastdeploy/backends/tensorrt/common/sampleUtils.h"
|
||||
|
||||
#include "NvInfer.h"
|
||||
#include <cuda_runtime_api.h>
|
||||
#include "NvInfer.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
using namespace samplesCommon;
|
||||
@@ -89,6 +89,13 @@ class TrtBackend : public BaseBackend {
|
||||
std::map<std::string, DeviceBuffer> inputs_buffer_;
|
||||
std::map<std::string, DeviceBuffer> outputs_buffer_;
|
||||
|
||||
// Sometimes while the number of outputs > 1
|
||||
// the output order of tensorrt may not be same
|
||||
// with the original onnx model
|
||||
// So this parameter will record to origin outputs
|
||||
// order, to help recover the rigt order
|
||||
std::map<std::string, int> outputs_order_;
|
||||
|
||||
void GetInputOutputInfo();
|
||||
void AllocateBufferInDynamicShape(const std::vector<FDTensor>& inputs,
|
||||
std::vector<FDTensor>* outputs);
|
||||
|
@@ -5,7 +5,7 @@ CMAKE_MINIMUM_REQUIRED (VERSION 3.16)
|
||||
# add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
|
||||
|
||||
# 指定下载解压后的fastdeploy库路径
|
||||
set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.0.3/)
|
||||
set(FASTDEPLOY_INSTALL_DIR ${PROJECT_SOURCE_DIR}/fastdeploy-linux-x64-0.3.0/)
|
||||
|
||||
include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
|
||||
|
||||
|
Reference in New Issue
Block a user