mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-07 01:22:59 +08:00
Optimize TensorRT backend to support rebuild engine (#189)
* optimize tensorrt usage * format code * fix input shape error for onnx model Co-authored-by: root <root@bjyz-sys-gpu-kongming3.bjyz.baidu.com>
This commit is contained in:
@@ -13,9 +13,9 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#include "fastdeploy/backends/tensorrt/trt_backend.h"
|
#include "fastdeploy/backends/tensorrt/trt_backend.h"
|
||||||
#include <cstring>
|
|
||||||
#include "NvInferSafeRuntime.h"
|
#include "NvInferSafeRuntime.h"
|
||||||
#include "fastdeploy/utils/utils.h"
|
#include "fastdeploy/utils/utils.h"
|
||||||
|
#include <cstring>
|
||||||
#ifdef ENABLE_PADDLE_FRONTEND
|
#ifdef ENABLE_PADDLE_FRONTEND
|
||||||
#include "paddle2onnx/converter.h"
|
#include "paddle2onnx/converter.h"
|
||||||
#endif
|
#endif
|
||||||
@@ -24,117 +24,46 @@ namespace fastdeploy {
|
|||||||
|
|
||||||
FDTrtLogger* FDTrtLogger::logger = nullptr;
|
FDTrtLogger* FDTrtLogger::logger = nullptr;
|
||||||
|
|
||||||
size_t TrtDataTypeSize(const nvinfer1::DataType& dtype) {
|
// Check if the model can build tensorrt engine now
|
||||||
if (dtype == nvinfer1::DataType::kFLOAT) {
|
// If the model has dynamic input shape, it will require defined shape
|
||||||
return sizeof(float);
|
// information We can set the shape range information by function
|
||||||
} else if (dtype == nvinfer1::DataType::kHALF) {
|
// SetTrtInputShape() But if the shape range is not defined, then the engine
|
||||||
return sizeof(float) / 2;
|
// cannot build, in this case, The engine will build once there's data feeded,
|
||||||
} else if (dtype == nvinfer1::DataType::kINT8) {
|
// and the shape range will be updated
|
||||||
return sizeof(int8_t);
|
bool CanBuildEngine(
|
||||||
} else if (dtype == nvinfer1::DataType::kINT32) {
|
const std::map<std::string, ShapeRangeInfo>& shape_range_info) {
|
||||||
return sizeof(int32_t);
|
for (auto iter = shape_range_info.begin(); iter != shape_range_info.end();
|
||||||
}
|
++iter) {
|
||||||
// kBOOL
|
bool is_full_static = true;
|
||||||
return sizeof(bool);
|
for (size_t i = 0; i < iter->second.shape.size(); ++i) {
|
||||||
}
|
if (iter->second.shape[i] < 0) {
|
||||||
|
is_full_static = false;
|
||||||
FDDataType GetFDDataType(const nvinfer1::DataType& dtype) {
|
break;
|
||||||
if (dtype == nvinfer1::DataType::kFLOAT) {
|
|
||||||
return FDDataType::FP32;
|
|
||||||
} else if (dtype == nvinfer1::DataType::kHALF) {
|
|
||||||
return FDDataType::FP16;
|
|
||||||
} else if (dtype == nvinfer1::DataType::kINT8) {
|
|
||||||
return FDDataType::INT8;
|
|
||||||
} else if (dtype == nvinfer1::DataType::kINT32) {
|
|
||||||
return FDDataType::INT32;
|
|
||||||
}
|
|
||||||
// kBOOL
|
|
||||||
return FDDataType::BOOL;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<int> toVec(const nvinfer1::Dims& dim) {
|
|
||||||
std::vector<int> out(dim.d, dim.d + dim.nbDims);
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool CheckDynamicShapeConfig(const paddle2onnx::OnnxReader& reader,
|
|
||||||
const TrtBackendOption& option) {
|
|
||||||
// paddle2onnx::ModelTensorInfo inputs[reader.NumInputs()];
|
|
||||||
// std::string input_shapes[reader.NumInputs()];
|
|
||||||
std::vector<paddle2onnx::ModelTensorInfo> inputs(reader.NumInputs());
|
|
||||||
std::vector<std::string> input_shapes(reader.NumInputs());
|
|
||||||
for (int i = 0; i < reader.NumInputs(); ++i) {
|
|
||||||
reader.GetInputInfo(i, &inputs[i]);
|
|
||||||
|
|
||||||
// change 0 to -1, when input_dim is a string, onnx will make it to zero
|
|
||||||
for (int j = 0; j < inputs[i].rank; ++j) {
|
|
||||||
if (inputs[i].shape[j] <= 0) {
|
|
||||||
inputs[i].shape[j] = -1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
input_shapes[i] = "";
|
if (is_full_static) {
|
||||||
for (int j = 0; j < inputs[i].rank; ++j) {
|
continue;
|
||||||
if (j != inputs[i].rank - 1) {
|
}
|
||||||
input_shapes[i] += (std::to_string(inputs[i].shape[j]) + ", ");
|
for (size_t i = 0; i < iter->second.shape.size(); ++i) {
|
||||||
} else {
|
if (iter->second.min[i] < 0 || iter->second.max[i] < 0) {
|
||||||
input_shapes[i] += std::to_string(inputs[i].shape[j]);
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
bool all_check_passed = true;
|
|
||||||
for (int i = 0; i < reader.NumInputs(); ++i) {
|
|
||||||
bool contain_unknown_dim = false;
|
|
||||||
for (int j = 0; j < inputs[i].rank; ++j) {
|
|
||||||
if (inputs[i].shape[j] < 0) {
|
|
||||||
contain_unknown_dim = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string name(inputs[i].name, strlen(inputs[i].name));
|
|
||||||
FDINFO << "The loaded model's input tensor:" << name
|
|
||||||
<< " has shape [" + input_shapes[i] << "]." << std::endl;
|
|
||||||
if (contain_unknown_dim) {
|
|
||||||
auto iter1 = option.min_shape.find(name);
|
|
||||||
auto iter2 = option.max_shape.find(name);
|
|
||||||
auto iter3 = option.opt_shape.find(name);
|
|
||||||
if (iter1 == option.min_shape.end() || iter2 == option.max_shape.end() ||
|
|
||||||
iter3 == option.opt_shape.end()) {
|
|
||||||
FDERROR << "The loaded model's input tensor:" << name
|
|
||||||
<< " has dynamic shape [" + input_shapes[i] +
|
|
||||||
"], but didn't configure it's shape for tensorrt with "
|
|
||||||
"SetTrtInputShape correctly."
|
|
||||||
<< std::endl;
|
|
||||||
all_check_passed = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return all_check_passed;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TrtBackend::InitFromTrt(const std::string& trt_engine_file,
|
bool TrtBackend::LoadTrtCache(const std::string& trt_engine_file) {
|
||||||
const TrtBackendOption& option) {
|
cudaSetDevice(option_.gpu_id);
|
||||||
if (initialized_) {
|
|
||||||
FDERROR << "TrtBackend is already initlized, cannot initialize again."
|
|
||||||
<< std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
cudaSetDevice(option.gpu_id);
|
|
||||||
|
|
||||||
std::ifstream fin(trt_engine_file, std::ios::binary | std::ios::in);
|
|
||||||
if (!fin) {
|
|
||||||
FDERROR << "Failed to open TensorRT Engine file " << trt_engine_file
|
|
||||||
<< std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
fin.seekg(0, std::ios::end);
|
|
||||||
std::string engine_buffer;
|
std::string engine_buffer;
|
||||||
engine_buffer.resize(fin.tellg());
|
if (!ReadBinaryFromFile(trt_engine_file, &engine_buffer)) {
|
||||||
fin.seekg(0, std::ios::beg);
|
FDERROR << "Failed to load TensorRT Engine from " << trt_engine_file << "."
|
||||||
fin.read(&(engine_buffer.at(0)), engine_buffer.size());
|
<< std::endl;
|
||||||
fin.close();
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
FDUniquePtr<nvinfer1::IRuntime> runtime{
|
FDUniquePtr<nvinfer1::IRuntime> runtime{
|
||||||
nvinfer1::createInferRuntime(*FDTrtLogger::Get())};
|
nvinfer1::createInferRuntime(*FDTrtLogger::Get())};
|
||||||
if (!runtime) {
|
if (!runtime) {
|
||||||
@@ -152,10 +81,31 @@ bool TrtBackend::InitFromTrt(const std::string& trt_engine_file,
|
|||||||
|
|
||||||
context_ = std::shared_ptr<nvinfer1::IExecutionContext>(
|
context_ = std::shared_ptr<nvinfer1::IExecutionContext>(
|
||||||
engine_->createExecutionContext());
|
engine_->createExecutionContext());
|
||||||
FDASSERT(cudaStreamCreate(&stream_) == 0,
|
|
||||||
"[ERROR] Error occurs while calling cudaStreamCreate().");
|
|
||||||
GetInputOutputInfo();
|
GetInputOutputInfo();
|
||||||
initialized_ = true;
|
|
||||||
|
for (int32_t i = 0; i < engine_->getNbBindings(); ++i) {
|
||||||
|
if (!engine_->bindingIsInput(i)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto min = ToVec(engine_->getProfileDimensions(
|
||||||
|
i, 0, nvinfer1::OptProfileSelector::kMAX));
|
||||||
|
auto max = ToVec(engine_->getProfileDimensions(
|
||||||
|
i, 0, nvinfer1::OptProfileSelector::kMIN));
|
||||||
|
auto name = std::string(engine_->getBindingName(i));
|
||||||
|
auto iter = shape_range_info_.find(name);
|
||||||
|
if (iter == shape_range_info_.end()) {
|
||||||
|
FDERROR << "There's no input named '" << name << "' in loaded model."
|
||||||
|
<< std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
iter->second.Update(min);
|
||||||
|
iter->second.Update(max);
|
||||||
|
}
|
||||||
|
FDINFO << "Build TensorRT Engine from cache file: " << trt_engine_file
|
||||||
|
<< " with shape range information as below," << std::endl;
|
||||||
|
for (const auto& item : shape_range_info_) {
|
||||||
|
FDINFO << item.second << std::endl;
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -167,10 +117,11 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file,
|
|||||||
<< std::endl;
|
<< std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
option_ = option;
|
||||||
|
|
||||||
#ifdef ENABLE_PADDLE_FRONTEND
|
#ifdef ENABLE_PADDLE_FRONTEND
|
||||||
std::vector<paddle2onnx::CustomOp> custom_ops;
|
std::vector<paddle2onnx::CustomOp> custom_ops;
|
||||||
for (auto& item : option.custom_op_info_) {
|
for (auto& item : option_.custom_op_info_) {
|
||||||
paddle2onnx::CustomOp op;
|
paddle2onnx::CustomOp op;
|
||||||
std::strcpy(op.op_name, item.first.c_str());
|
std::strcpy(op.op_name, item.first.c_str());
|
||||||
std::strcpy(op.export_op_name, item.second.c_str());
|
std::strcpy(op.export_op_name, item.second.c_str());
|
||||||
@@ -187,7 +138,7 @@ bool TrtBackend::InitFromPaddle(const std::string& model_file,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (option.remove_multiclass_nms_) {
|
if (option_.remove_multiclass_nms_) {
|
||||||
char* new_model = nullptr;
|
char* new_model = nullptr;
|
||||||
int new_model_size = 0;
|
int new_model_size = 0;
|
||||||
if (!paddle2onnx::RemoveMultiClassNMS(model_content_ptr, model_content_size,
|
if (!paddle2onnx::RemoveMultiClassNMS(model_content_ptr, model_content_size,
|
||||||
@@ -222,7 +173,8 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file,
|
|||||||
<< std::endl;
|
<< std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
cudaSetDevice(option.gpu_id);
|
option_ = option;
|
||||||
|
cudaSetDevice(option_.gpu_id);
|
||||||
|
|
||||||
std::string onnx_content = "";
|
std::string onnx_content = "";
|
||||||
if (!from_memory_buffer) {
|
if (!from_memory_buffer) {
|
||||||
@@ -246,43 +198,94 @@ bool TrtBackend::InitFromOnnx(const std::string& model_file,
|
|||||||
outputs_order_.clear();
|
outputs_order_.clear();
|
||||||
auto onnx_reader =
|
auto onnx_reader =
|
||||||
paddle2onnx::OnnxReader(onnx_content.c_str(), onnx_content.size());
|
paddle2onnx::OnnxReader(onnx_content.c_str(), onnx_content.size());
|
||||||
for (int i = 0; i < onnx_reader.NumOutputs(); ++i) {
|
for (int i = 0; i < onnx_reader.num_outputs; ++i) {
|
||||||
std::string name(
|
std::string name(onnx_reader.outputs[i].name);
|
||||||
onnx_reader.output_names[i],
|
|
||||||
onnx_reader.output_names[i] + strlen(onnx_reader.output_names[i]));
|
|
||||||
outputs_order_[name] = i;
|
outputs_order_[name] = i;
|
||||||
}
|
}
|
||||||
if (!CheckDynamicShapeConfig(onnx_reader, option)) {
|
|
||||||
FDERROR << "TrtBackend::CheckDynamicShapeConfig failed." << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (option.serialize_file != "") {
|
shape_range_info_.clear();
|
||||||
std::ifstream fin(option.serialize_file, std::ios::binary | std::ios::in);
|
inputs_desc_.clear();
|
||||||
if (fin) {
|
outputs_desc_.clear();
|
||||||
FDINFO << "Detect serialized TensorRT Engine file in "
|
inputs_desc_.resize(onnx_reader.num_inputs);
|
||||||
<< option.serialize_file << ", will load it directly."
|
outputs_desc_.resize(onnx_reader.num_outputs);
|
||||||
<< std::endl;
|
for (int i = 0; i < onnx_reader.num_inputs; ++i) {
|
||||||
fin.close();
|
std::string name(onnx_reader.inputs[i].name);
|
||||||
return InitFromTrt(option.serialize_file, option);
|
std::vector<int64_t> shape(onnx_reader.inputs[i].shape,
|
||||||
|
onnx_reader.inputs[i].shape +
|
||||||
|
onnx_reader.inputs[i].rank);
|
||||||
|
inputs_desc_[i].name = name;
|
||||||
|
inputs_desc_[i].shape.assign(shape.begin(), shape.end());
|
||||||
|
inputs_desc_[i].dtype = ReaderDtypeToTrtDtype(onnx_reader.inputs[i].dtype);
|
||||||
|
auto info = ShapeRangeInfo(shape);
|
||||||
|
info.name = name;
|
||||||
|
auto iter_min = option.min_shape.find(name);
|
||||||
|
auto iter_max = option.max_shape.find(name);
|
||||||
|
auto iter_opt = option.opt_shape.find(name);
|
||||||
|
if (iter_min != option.min_shape.end()) {
|
||||||
|
info.min.assign(iter_min->second.begin(), iter_min->second.end());
|
||||||
|
info.max.assign(iter_max->second.begin(), iter_max->second.end());
|
||||||
|
info.opt.assign(iter_opt->second.begin(), iter_opt->second.end());
|
||||||
}
|
}
|
||||||
|
shape_range_info_.insert(std::make_pair(name, info));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!CreateTrtEngine(onnx_content, option)) {
|
for (int i = 0; i < onnx_reader.num_outputs; ++i) {
|
||||||
return false;
|
std::string name(onnx_reader.outputs[i].name);
|
||||||
|
std::vector<int64_t> shape(onnx_reader.outputs[i].shape,
|
||||||
|
onnx_reader.outputs[i].shape +
|
||||||
|
onnx_reader.outputs[i].rank);
|
||||||
|
outputs_desc_[i].name = name;
|
||||||
|
outputs_desc_[i].shape.assign(shape.begin(), shape.end());
|
||||||
|
outputs_desc_[i].dtype =
|
||||||
|
ReaderDtypeToTrtDtype(onnx_reader.outputs[i].dtype);
|
||||||
}
|
}
|
||||||
|
|
||||||
context_ = std::shared_ptr<nvinfer1::IExecutionContext>(
|
|
||||||
engine_->createExecutionContext());
|
|
||||||
FDASSERT(cudaStreamCreate(&stream_) == 0,
|
FDASSERT(cudaStreamCreate(&stream_) == 0,
|
||||||
"[ERROR] Error occurs while calling cudaStreamCreate().");
|
"[ERROR] Error occurs while calling cudaStreamCreate().");
|
||||||
GetInputOutputInfo();
|
|
||||||
|
if (!CreateTrtEngineFromOnnx(onnx_content)) {
|
||||||
|
FDERROR << "Failed to create tensorrt engine." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
initialized_ = true;
|
initialized_ = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int TrtBackend::ShapeRangeInfoUpdated(const std::vector<FDTensor>& inputs) {
|
||||||
|
bool need_update_engine = false;
|
||||||
|
for (size_t i = 0; i < inputs.size(); ++i) {
|
||||||
|
auto iter = shape_range_info_.find(inputs[i].name);
|
||||||
|
if (iter == shape_range_info_.end()) {
|
||||||
|
FDERROR << "There's no input named '" << inputs[i].name
|
||||||
|
<< "' in loaded model." << std::endl;
|
||||||
|
}
|
||||||
|
if (iter->second.Update(inputs[i].shape) == 1) {
|
||||||
|
need_update_engine = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return need_update_engine;
|
||||||
|
}
|
||||||
|
|
||||||
bool TrtBackend::Infer(std::vector<FDTensor>& inputs,
|
bool TrtBackend::Infer(std::vector<FDTensor>& inputs,
|
||||||
std::vector<FDTensor>* outputs) {
|
std::vector<FDTensor>* outputs) {
|
||||||
|
if (inputs.size() != NumInputs()) {
|
||||||
|
FDERROR << "Require " << NumInputs() << "inputs, but get " << inputs.size()
|
||||||
|
<< "." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (ShapeRangeInfoUpdated(inputs)) {
|
||||||
|
// meet new shape output of predefined max/min shape
|
||||||
|
// rebuild the tensorrt engine
|
||||||
|
FDWARNING
|
||||||
|
<< "TensorRT engine will be rebuilt once shape range information "
|
||||||
|
"changed, this may take lots of time, you can set a proper shape "
|
||||||
|
"range before loading model to avoid rebuilding process. refer "
|
||||||
|
"https://github.com/PaddlePaddle/FastDeploy/docs/backends/"
|
||||||
|
"tensorrt.md for more details."
|
||||||
|
<< std::endl;
|
||||||
|
BuildTrtEngine();
|
||||||
|
}
|
||||||
|
|
||||||
AllocateBufferInDynamicShape(inputs, outputs);
|
AllocateBufferInDynamicShape(inputs, outputs);
|
||||||
std::vector<void*> input_binds(inputs.size());
|
std::vector<void*> input_binds(inputs.size());
|
||||||
for (size_t i = 0; i < inputs.size(); ++i) {
|
for (size_t i = 0; i < inputs.size(); ++i) {
|
||||||
@@ -316,12 +319,14 @@ bool TrtBackend::Infer(std::vector<FDTensor>& inputs,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void TrtBackend::GetInputOutputInfo() {
|
void TrtBackend::GetInputOutputInfo() {
|
||||||
|
std::vector<TrtValueInfo>().swap(inputs_desc_);
|
||||||
|
std::vector<TrtValueInfo>().swap(outputs_desc_);
|
||||||
inputs_desc_.clear();
|
inputs_desc_.clear();
|
||||||
outputs_desc_.clear();
|
outputs_desc_.clear();
|
||||||
auto num_binds = engine_->getNbBindings();
|
auto num_binds = engine_->getNbBindings();
|
||||||
for (auto i = 0; i < num_binds; ++i) {
|
for (auto i = 0; i < num_binds; ++i) {
|
||||||
std::string name = std::string(engine_->getBindingName(i));
|
std::string name = std::string(engine_->getBindingName(i));
|
||||||
auto shape = toVec(engine_->getBindingDimensions(i));
|
auto shape = ToVec(engine_->getBindingDimensions(i));
|
||||||
auto dtype = engine_->getBindingDataType(i);
|
auto dtype = engine_->getBindingDataType(i);
|
||||||
if (engine_->bindingIsInput(i)) {
|
if (engine_->bindingIsInput(i)) {
|
||||||
inputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype});
|
inputs_desc_.emplace_back(TrtValueInfo{name, shape, dtype});
|
||||||
@@ -355,8 +360,10 @@ void TrtBackend::AllocateBufferInDynamicShape(
|
|||||||
|
|
||||||
// find the original index of output
|
// find the original index of output
|
||||||
auto iter = outputs_order_.find(outputs_desc_[i].name);
|
auto iter = outputs_order_.find(outputs_desc_[i].name);
|
||||||
FDASSERT(iter != outputs_order_.end(),
|
FDASSERT(
|
||||||
"Cannot find output: %s of tensorrt network from the original model.", outputs_desc_[i].name.c_str());
|
iter != outputs_order_.end(),
|
||||||
|
"Cannot find output: %s of tensorrt network from the original model.",
|
||||||
|
outputs_desc_[i].name.c_str());
|
||||||
auto ori_idx = iter->second;
|
auto ori_idx = iter->second;
|
||||||
(*outputs)[ori_idx].dtype = GetFDDataType(outputs_desc_[i].dtype);
|
(*outputs)[ori_idx].dtype = GetFDDataType(outputs_desc_[i].dtype);
|
||||||
(*outputs)[ori_idx].shape.assign(output_dims.d,
|
(*outputs)[ori_idx].shape.assign(output_dims.d,
|
||||||
@@ -372,32 +379,15 @@ void TrtBackend::AllocateBufferInDynamicShape(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TrtBackend::CreateTrtEngine(const std::string& onnx_model,
|
bool TrtBackend::BuildTrtEngine() {
|
||||||
const TrtBackendOption& option) {
|
auto config =
|
||||||
const auto explicitBatch =
|
FDUniquePtr<nvinfer1::IBuilderConfig>(builder_->createBuilderConfig());
|
||||||
1U << static_cast<uint32_t>(
|
|
||||||
nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
|
|
||||||
|
|
||||||
builder_ = FDUniquePtr<nvinfer1::IBuilder>(
|
|
||||||
nvinfer1::createInferBuilder(*FDTrtLogger::Get()));
|
|
||||||
if (!builder_) {
|
|
||||||
FDERROR << "Failed to call createInferBuilder()." << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
network_ = FDUniquePtr<nvinfer1::INetworkDefinition>(
|
|
||||||
builder_->createNetworkV2(explicitBatch));
|
|
||||||
if (!network_) {
|
|
||||||
FDERROR << "Failed to call createNetworkV2()." << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
auto config = FDUniquePtr<nvinfer1::IBuilderConfig>(
|
|
||||||
builder_->createBuilderConfig());
|
|
||||||
if (!config) {
|
if (!config) {
|
||||||
FDERROR << "Failed to call createBuilderConfig()." << std::endl;
|
FDERROR << "Failed to call createBuilderConfig()." << std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (option.enable_fp16) {
|
if (option_.enable_fp16) {
|
||||||
if (!builder_->platformHasFastFp16()) {
|
if (!builder_->platformHasFastFp16()) {
|
||||||
FDWARNING << "Detected FP16 is not supported in the current GPU, "
|
FDWARNING << "Detected FP16 is not supported in the current GPU, "
|
||||||
"will use FP32 instead."
|
"will use FP32 instead."
|
||||||
@@ -407,56 +397,52 @@ bool TrtBackend::CreateTrtEngine(const std::string& onnx_model,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
parser_ = FDUniquePtr<nvonnxparser::IParser>(
|
|
||||||
nvonnxparser::createParser(*network_, *FDTrtLogger::Get()));
|
|
||||||
if (!parser_) {
|
|
||||||
FDERROR << "Failed to call createParser()." << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!parser_->parse(onnx_model.data(), onnx_model.size())) {
|
|
||||||
FDERROR << "Failed to parse ONNX model by TensorRT." << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
FDINFO << "Start to building TensorRT Engine..." << std::endl;
|
FDINFO << "Start to building TensorRT Engine..." << std::endl;
|
||||||
bool fp16 = builder_->platformHasFastFp16();
|
|
||||||
builder_->setMaxBatchSize(option.max_batch_size);
|
|
||||||
|
|
||||||
config->setMaxWorkspaceSize(option.max_workspace_size);
|
if (context_) {
|
||||||
|
context_.reset();
|
||||||
if (option.max_shape.size() > 0) {
|
engine_.reset();
|
||||||
auto profile = builder_->createOptimizationProfile();
|
|
||||||
FDASSERT(option.max_shape.size() == option.min_shape.size() &&
|
|
||||||
option.min_shape.size() == option.opt_shape.size(),
|
|
||||||
"[TrtBackend] Size of max_shape/opt_shape/min_shape in "
|
|
||||||
"TrtBackendOption should keep same.");
|
|
||||||
for (const auto& item : option.min_shape) {
|
|
||||||
// set min shape
|
|
||||||
FDASSERT(profile->setDimensions(item.first.c_str(),
|
|
||||||
nvinfer1::OptProfileSelector::kMIN,
|
|
||||||
ToDims(item.second)),
|
|
||||||
"[TrtBackend] Failed to set min_shape for input: %s in TrtBackend.", item.first.c_str());
|
|
||||||
|
|
||||||
// set optimization shape
|
|
||||||
auto iter = option.opt_shape.find(item.first);
|
|
||||||
FDASSERT(iter != option.opt_shape.end(),
|
|
||||||
"[TrtBackend] Cannot find input name: %s in TrtBackendOption::opt_shape.", item.first.c_str());
|
|
||||||
FDASSERT(profile->setDimensions(item.first.c_str(),
|
|
||||||
nvinfer1::OptProfileSelector::kOPT,
|
|
||||||
ToDims(iter->second)),
|
|
||||||
"[TrtBackend] Failed to set opt_shape for input: %s in TrtBackend.", item.first.c_str());
|
|
||||||
// set max shape
|
|
||||||
iter = option.max_shape.find(item.first);
|
|
||||||
FDASSERT(iter != option.max_shape.end(),
|
|
||||||
"[TrtBackend] Cannot find input name: %s in TrtBackendOption::max_shape.", item.first);
|
|
||||||
FDASSERT(profile->setDimensions(item.first.c_str(),
|
|
||||||
nvinfer1::OptProfileSelector::kMAX,
|
|
||||||
ToDims(iter->second)),
|
|
||||||
"[TrtBackend] Failed to set max_shape for input: %s in TrtBackend.", item.first);
|
|
||||||
}
|
|
||||||
config->addOptimizationProfile(profile);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
builder_->setMaxBatchSize(option_.max_batch_size);
|
||||||
|
config->setMaxWorkspaceSize(option_.max_workspace_size);
|
||||||
|
auto profile = builder_->createOptimizationProfile();
|
||||||
|
for (const auto& item : shape_range_info_) {
|
||||||
|
FDASSERT(
|
||||||
|
profile->setDimensions(item.first.c_str(),
|
||||||
|
nvinfer1::OptProfileSelector::kMIN,
|
||||||
|
ToDims(item.second.min)),
|
||||||
|
"[TrtBackend] Failed to set min_shape for input: %s in TrtBackend.",
|
||||||
|
item.first.c_str());
|
||||||
|
FDASSERT(
|
||||||
|
profile->setDimensions(item.first.c_str(),
|
||||||
|
nvinfer1::OptProfileSelector::kMAX,
|
||||||
|
ToDims(item.second.max)),
|
||||||
|
"[TrtBackend] Failed to set max_shape for input: %s in TrtBackend.",
|
||||||
|
item.first.c_str());
|
||||||
|
if (item.second.opt.size() == 0) {
|
||||||
|
FDASSERT(
|
||||||
|
profile->setDimensions(item.first.c_str(),
|
||||||
|
nvinfer1::OptProfileSelector::kOPT,
|
||||||
|
ToDims(item.second.max)),
|
||||||
|
"[TrtBackend] Failed to set opt_shape for input: %s in TrtBackend.",
|
||||||
|
item.first.c_str());
|
||||||
|
} else {
|
||||||
|
FDASSERT(
|
||||||
|
item.second.opt.size() == item.second.shape.size(),
|
||||||
|
"Require the dimension of opt in shape range information equal to "
|
||||||
|
"dimension of input: %s in this model, but now it's %zu != %zu.",
|
||||||
|
item.first.c_str(), item.second.opt.size(), item.second.shape.size());
|
||||||
|
FDASSERT(
|
||||||
|
profile->setDimensions(item.first.c_str(),
|
||||||
|
nvinfer1::OptProfileSelector::kOPT,
|
||||||
|
ToDims(item.second.opt)),
|
||||||
|
"[TrtBackend] Failed to set opt_shape for input: %s in TrtBackend.",
|
||||||
|
item.first.c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
config->addOptimizationProfile(profile);
|
||||||
|
|
||||||
FDUniquePtr<nvinfer1::IHostMemory> plan{
|
FDUniquePtr<nvinfer1::IHostMemory> plan{
|
||||||
builder_->buildSerializedNetwork(*network_, *config)};
|
builder_->buildSerializedNetwork(*network_, *config)};
|
||||||
if (!plan) {
|
if (!plan) {
|
||||||
@@ -479,20 +465,24 @@ bool TrtBackend::CreateTrtEngine(const std::string& onnx_model,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
context_ = std::shared_ptr<nvinfer1::IExecutionContext>(
|
||||||
|
engine_->createExecutionContext());
|
||||||
|
GetInputOutputInfo();
|
||||||
|
|
||||||
FDINFO << "TensorRT Engine is built succussfully." << std::endl;
|
FDINFO << "TensorRT Engine is built succussfully." << std::endl;
|
||||||
if (option.serialize_file != "") {
|
if (option_.serialize_file != "") {
|
||||||
FDINFO << "Serialize TensorRTEngine to local file " << option.serialize_file
|
FDINFO << "Serialize TensorRTEngine to local file "
|
||||||
<< "." << std::endl;
|
<< option_.serialize_file << "." << std::endl;
|
||||||
std::ofstream engine_file(option.serialize_file.c_str());
|
std::ofstream engine_file(option_.serialize_file.c_str());
|
||||||
if (!engine_file) {
|
if (!engine_file) {
|
||||||
FDERROR << "Failed to open " << option.serialize_file << " to write."
|
FDERROR << "Failed to open " << option_.serialize_file << " to write."
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
engine_file.write(static_cast<char*>(plan->data()), plan->size());
|
engine_file.write(static_cast<char*>(plan->data()), plan->size());
|
||||||
engine_file.close();
|
engine_file.close();
|
||||||
FDINFO << "TensorRTEngine is serialized to local file "
|
FDINFO << "TensorRTEngine is serialized to local file "
|
||||||
<< option.serialize_file
|
<< option_.serialize_file
|
||||||
<< ", we can load this model from the seralized engine "
|
<< ", we can load this model from the seralized engine "
|
||||||
"directly next time."
|
"directly next time."
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
@@ -500,8 +490,81 @@ bool TrtBackend::CreateTrtEngine(const std::string& onnx_model,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool TrtBackend::CreateTrtEngineFromOnnx(const std::string& onnx_model_buffer) {
|
||||||
|
const auto explicitBatch =
|
||||||
|
1U << static_cast<uint32_t>(
|
||||||
|
nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
|
||||||
|
|
||||||
|
builder_ = FDUniquePtr<nvinfer1::IBuilder>(
|
||||||
|
nvinfer1::createInferBuilder(*FDTrtLogger::Get()));
|
||||||
|
if (!builder_) {
|
||||||
|
FDERROR << "Failed to call createInferBuilder()." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
network_ = FDUniquePtr<nvinfer1::INetworkDefinition>(
|
||||||
|
builder_->createNetworkV2(explicitBatch));
|
||||||
|
if (!network_) {
|
||||||
|
FDERROR << "Failed to call createNetworkV2()." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
parser_ = FDUniquePtr<nvonnxparser::IParser>(
|
||||||
|
nvonnxparser::createParser(*network_, *FDTrtLogger::Get()));
|
||||||
|
if (!parser_) {
|
||||||
|
FDERROR << "Failed to call createParser()." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!parser_->parse(onnx_model_buffer.data(), onnx_model_buffer.size())) {
|
||||||
|
FDERROR << "Failed to parse ONNX model by TensorRT." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (option_.serialize_file != "") {
|
||||||
|
std::ifstream fin(option_.serialize_file, std::ios::binary | std::ios::in);
|
||||||
|
if (fin) {
|
||||||
|
FDINFO << "Detect serialized TensorRT Engine file in "
|
||||||
|
<< option_.serialize_file << ", will load it directly."
|
||||||
|
<< std::endl;
|
||||||
|
fin.close();
|
||||||
|
// clear memory buffer of the temporary member
|
||||||
|
std::string().swap(onnx_model_buffer_);
|
||||||
|
return LoadTrtCache(option_.serialize_file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!CanBuildEngine(shape_range_info_)) {
|
||||||
|
onnx_model_buffer_ = onnx_model_buffer;
|
||||||
|
FDWARNING << "Cannot build engine right now, because there's dynamic input "
|
||||||
|
"shape exists, list as below,"
|
||||||
|
<< std::endl;
|
||||||
|
for (int i = 0; i < NumInputs(); ++i) {
|
||||||
|
FDWARNING << "Input " << i << ": " << GetInputInfo(i) << std::endl;
|
||||||
|
}
|
||||||
|
FDWARNING
|
||||||
|
<< "FastDeploy will build the engine while inference with input data, "
|
||||||
|
"and will also collect the input shape range information. You "
|
||||||
|
"should be noticed that FastDeploy will rebuild the engine while "
|
||||||
|
"new input shape is out of the collected shape range, this may "
|
||||||
|
"bring some time consuming problem, refer "
|
||||||
|
"https://github.com/PaddlePaddle/FastDeploy/docs/backends/"
|
||||||
|
"tensorrt.md for more details."
|
||||||
|
<< std::endl;
|
||||||
|
initialized_ = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!BuildTrtEngine()) {
|
||||||
|
FDERROR << "Failed to build tensorrt engine." << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// clear memory buffer of the temporary member
|
||||||
|
std::string().swap(onnx_model_buffer_);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
TensorInfo TrtBackend::GetInputInfo(int index) {
|
TensorInfo TrtBackend::GetInputInfo(int index) {
|
||||||
FDASSERT(index < NumInputs(), "The index: %d should less than the number of inputs: %d.", index, NumInputs());
|
FDASSERT(index < NumInputs(),
|
||||||
|
"The index: %d should less than the number of inputs: %d.", index,
|
||||||
|
NumInputs());
|
||||||
TensorInfo info;
|
TensorInfo info;
|
||||||
info.name = inputs_desc_[index].name;
|
info.name = inputs_desc_[index].name;
|
||||||
info.shape.assign(inputs_desc_[index].shape.begin(),
|
info.shape.assign(inputs_desc_[index].shape.begin(),
|
||||||
@@ -512,7 +575,8 @@ TensorInfo TrtBackend::GetInputInfo(int index) {
|
|||||||
|
|
||||||
TensorInfo TrtBackend::GetOutputInfo(int index) {
|
TensorInfo TrtBackend::GetOutputInfo(int index) {
|
||||||
FDASSERT(index < NumOutputs(),
|
FDASSERT(index < NumOutputs(),
|
||||||
"The index: %d should less than the number of outputs: %d.", index, NumOutputs());
|
"The index: %d should less than the number of outputs: %d.", index,
|
||||||
|
NumOutputs());
|
||||||
TensorInfo info;
|
TensorInfo info;
|
||||||
info.name = outputs_desc_[index].name;
|
info.name = outputs_desc_[index].name;
|
||||||
info.shape.assign(outputs_desc_[index].shape.begin(),
|
info.shape.assign(outputs_desc_[index].shape.begin(),
|
||||||
@@ -520,4 +584,4 @@ TensorInfo TrtBackend::GetOutputInfo(int index) {
|
|||||||
info.dtype = GetFDDataType(outputs_desc_[index].dtype);
|
info.dtype = GetFDDataType(outputs_desc_[index].dtype);
|
||||||
return info;
|
return info;
|
||||||
}
|
}
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -19,11 +19,11 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "NvInfer.h"
|
||||||
|
#include "NvOnnxParser.h"
|
||||||
#include "fastdeploy/backends/backend.h"
|
#include "fastdeploy/backends/backend.h"
|
||||||
#include "fastdeploy/backends/tensorrt/utils.h"
|
#include "fastdeploy/backends/tensorrt/utils.h"
|
||||||
#include <cuda_runtime_api.h>
|
#include <cuda_runtime_api.h>
|
||||||
#include "NvOnnxParser.h"
|
|
||||||
#include "NvInfer.h"
|
|
||||||
|
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
|
|
||||||
@@ -56,7 +56,6 @@ FDDataType GetFDDataType(const nvinfer1::DataType& dtype);
|
|||||||
class TrtBackend : public BaseBackend {
|
class TrtBackend : public BaseBackend {
|
||||||
public:
|
public:
|
||||||
TrtBackend() : engine_(nullptr), context_(nullptr) {}
|
TrtBackend() : engine_(nullptr), context_(nullptr) {}
|
||||||
virtual ~TrtBackend() = default;
|
|
||||||
void BuildOption(const TrtBackendOption& option);
|
void BuildOption(const TrtBackendOption& option);
|
||||||
|
|
||||||
bool InitFromPaddle(const std::string& model_file,
|
bool InitFromPaddle(const std::string& model_file,
|
||||||
@@ -66,9 +65,6 @@ class TrtBackend : public BaseBackend {
|
|||||||
bool InitFromOnnx(const std::string& model_file,
|
bool InitFromOnnx(const std::string& model_file,
|
||||||
const TrtBackendOption& option = TrtBackendOption(),
|
const TrtBackendOption& option = TrtBackendOption(),
|
||||||
bool from_memory_buffer = false);
|
bool from_memory_buffer = false);
|
||||||
bool InitFromTrt(const std::string& trt_engine_file,
|
|
||||||
const TrtBackendOption& option = TrtBackendOption());
|
|
||||||
|
|
||||||
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs);
|
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs);
|
||||||
|
|
||||||
int NumInputs() const { return inputs_desc_.size(); }
|
int NumInputs() const { return inputs_desc_.size(); }
|
||||||
@@ -76,7 +72,14 @@ class TrtBackend : public BaseBackend {
|
|||||||
TensorInfo GetInputInfo(int index);
|
TensorInfo GetInputInfo(int index);
|
||||||
TensorInfo GetOutputInfo(int index);
|
TensorInfo GetOutputInfo(int index);
|
||||||
|
|
||||||
|
~TrtBackend() {
|
||||||
|
if (parser_) {
|
||||||
|
parser_.reset();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
TrtBackendOption option_;
|
||||||
std::shared_ptr<nvinfer1::ICudaEngine> engine_;
|
std::shared_ptr<nvinfer1::ICudaEngine> engine_;
|
||||||
std::shared_ptr<nvinfer1::IExecutionContext> context_;
|
std::shared_ptr<nvinfer1::IExecutionContext> context_;
|
||||||
FDUniquePtr<nvonnxparser::IParser> parser_;
|
FDUniquePtr<nvonnxparser::IParser> parser_;
|
||||||
@@ -96,11 +99,22 @@ class TrtBackend : public BaseBackend {
|
|||||||
// order, to help recover the rigt order
|
// order, to help recover the rigt order
|
||||||
std::map<std::string, int> outputs_order_;
|
std::map<std::string, int> outputs_order_;
|
||||||
|
|
||||||
|
// temporary store onnx model content
|
||||||
|
// once it used to build trt egnine done
|
||||||
|
// it will be released
|
||||||
|
std::string onnx_model_buffer_;
|
||||||
|
// Stores shape information of the loaded model
|
||||||
|
// For dynmaic shape will record its range information
|
||||||
|
// Also will update the range information while inferencing
|
||||||
|
std::map<std::string, ShapeRangeInfo> shape_range_info_;
|
||||||
|
|
||||||
void GetInputOutputInfo();
|
void GetInputOutputInfo();
|
||||||
void AllocateBufferInDynamicShape(const std::vector<FDTensor>& inputs,
|
void AllocateBufferInDynamicShape(const std::vector<FDTensor>& inputs,
|
||||||
std::vector<FDTensor>* outputs);
|
std::vector<FDTensor>* outputs);
|
||||||
bool CreateTrtEngine(const std::string& onnx_model,
|
bool CreateTrtEngineFromOnnx(const std::string& onnx_model_buffer);
|
||||||
const TrtBackendOption& option);
|
bool BuildTrtEngine();
|
||||||
|
bool LoadTrtCache(const std::string& trt_engine_file);
|
||||||
|
int ShapeRangeInfoUpdated(const std::vector<FDTensor>& inputs);
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
138
csrc/fastdeploy/backends/tensorrt/utils.cc
Normal file
138
csrc/fastdeploy/backends/tensorrt/utils.cc
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "fastdeploy/backends/tensorrt/utils.h"
|
||||||
|
|
||||||
|
namespace fastdeploy {
|
||||||
|
|
||||||
|
int ShapeRangeInfo::Update(const std::vector<int64_t>& new_shape) {
|
||||||
|
if (new_shape.size() != shape.size()) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
int need_update_engine = 0;
|
||||||
|
for (size_t i = 0; i < shape.size(); ++i) {
|
||||||
|
if (is_static[i] == 1 && new_shape[i] != shape[i]) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (new_shape[i] < min[i] || min[i] < 0) {
|
||||||
|
need_update_engine = 1;
|
||||||
|
}
|
||||||
|
if (new_shape[i] > max[i] || max[i] < 0) {
|
||||||
|
need_update_engine = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (need_update_engine == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
FDWARNING << "[New Shape Out of Range] input name: " << name
|
||||||
|
<< ", shape: " << new_shape
|
||||||
|
<< ", The shape range before: min_shape=" << min
|
||||||
|
<< ", max_shape=" << max << "." << std::endl;
|
||||||
|
for (size_t i = 0; i < shape.size(); ++i) {
|
||||||
|
if (new_shape[i] < min[i] || min[i] < 0) {
|
||||||
|
min[i] = new_shape[i];
|
||||||
|
}
|
||||||
|
if (new_shape[i] > max[i] || max[i] < 0) {
|
||||||
|
max[i] = new_shape[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FDWARNING
|
||||||
|
<< "[New Shape Out of Range] The updated shape range now: min_shape="
|
||||||
|
<< min << ", max_shape=" << max << "." << std::endl;
|
||||||
|
return need_update_engine;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t TrtDataTypeSize(const nvinfer1::DataType& dtype) {
|
||||||
|
if (dtype == nvinfer1::DataType::kFLOAT) {
|
||||||
|
return sizeof(float);
|
||||||
|
} else if (dtype == nvinfer1::DataType::kHALF) {
|
||||||
|
return sizeof(float) / 2;
|
||||||
|
} else if (dtype == nvinfer1::DataType::kINT8) {
|
||||||
|
return sizeof(int8_t);
|
||||||
|
} else if (dtype == nvinfer1::DataType::kINT32) {
|
||||||
|
return sizeof(int32_t);
|
||||||
|
}
|
||||||
|
// kBOOL
|
||||||
|
return sizeof(bool);
|
||||||
|
}
|
||||||
|
|
||||||
|
FDDataType GetFDDataType(const nvinfer1::DataType& dtype) {
|
||||||
|
if (dtype == nvinfer1::DataType::kFLOAT) {
|
||||||
|
return FDDataType::FP32;
|
||||||
|
} else if (dtype == nvinfer1::DataType::kHALF) {
|
||||||
|
return FDDataType::FP16;
|
||||||
|
} else if (dtype == nvinfer1::DataType::kINT8) {
|
||||||
|
return FDDataType::INT8;
|
||||||
|
} else if (dtype == nvinfer1::DataType::kINT32) {
|
||||||
|
return FDDataType::INT32;
|
||||||
|
}
|
||||||
|
// kBOOL
|
||||||
|
return FDDataType::BOOL;
|
||||||
|
}
|
||||||
|
|
||||||
|
nvinfer1::DataType ReaderDtypeToTrtDtype(int reader_dtype) {
|
||||||
|
if (reader_dtype == 0) {
|
||||||
|
return nvinfer1::DataType::kFLOAT;
|
||||||
|
} else if (reader_dtype == 1) {
|
||||||
|
FDASSERT(false, "TensorRT cannot support data type of double now.");
|
||||||
|
} else if (reader_dtype == 2) {
|
||||||
|
FDASSERT(false, "TensorRT cannot support data type of uint8 now.");
|
||||||
|
} else if (reader_dtype == 3) {
|
||||||
|
return nvinfer1::DataType::kINT8;
|
||||||
|
} else if (reader_dtype == 4) {
|
||||||
|
return nvinfer1::DataType::kINT32;
|
||||||
|
} else if (reader_dtype == 5) {
|
||||||
|
// regard int64 as int32
|
||||||
|
return nvinfer1::DataType::kINT32;
|
||||||
|
}
|
||||||
|
FDASSERT(false, "Received unexpected data type of %d", reader_dtype);
|
||||||
|
return nvinfer1::DataType::kFLOAT;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<int> ToVec(const nvinfer1::Dims& dim) {
|
||||||
|
std::vector<int> out(dim.d, dim.d + dim.nbDims);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t Volume(const nvinfer1::Dims& d) {
|
||||||
|
return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int64_t>());
|
||||||
|
}
|
||||||
|
|
||||||
|
nvinfer1::Dims ToDims(const std::vector<int>& vec) {
|
||||||
|
int limit = static_cast<int>(nvinfer1::Dims::MAX_DIMS);
|
||||||
|
if (static_cast<int>(vec.size()) > limit) {
|
||||||
|
FDWARNING << "Vector too long, only first 8 elements are used in dimension."
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
// Pick first nvinfer1::Dims::MAX_DIMS elements
|
||||||
|
nvinfer1::Dims dims{std::min(static_cast<int>(vec.size()), limit), {}};
|
||||||
|
std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d));
|
||||||
|
return dims;
|
||||||
|
}
|
||||||
|
|
||||||
|
nvinfer1::Dims ToDims(const std::vector<int64_t>& vec) {
|
||||||
|
int limit = static_cast<int>(nvinfer1::Dims::MAX_DIMS);
|
||||||
|
if (static_cast<int>(vec.size()) > limit) {
|
||||||
|
FDWARNING << "Vector too long, only first 8 elements are used in dimension."
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
// Pick first nvinfer1::Dims::MAX_DIMS elements
|
||||||
|
nvinfer1::Dims dims{std::min(static_cast<int>(vec.size()), limit), {}};
|
||||||
|
std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d));
|
||||||
|
return dims;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace fastdeploy
|
@@ -14,53 +14,54 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <iostream>
|
#include "NvInfer.h"
|
||||||
#include <map>
|
#include "fastdeploy/core/fd_tensor.h"
|
||||||
#include <string>
|
#include "fastdeploy/utils/utils.h"
|
||||||
#include <vector>
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <cuda_runtime_api.h>
|
#include <cuda_runtime_api.h>
|
||||||
#include "NvInfer.h"
|
#include <iostream>
|
||||||
#include "fastdeploy/utils/utils.h"
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
|
#include <numeric>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
|
|
||||||
struct FDInferDeleter {
|
struct FDInferDeleter {
|
||||||
template<typename T> void operator()(T* obj) const {
|
template <typename T> void operator()(T* obj) const {
|
||||||
delete obj;
|
if (obj) {
|
||||||
|
obj->destroy();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename T> using FDUniquePtr = std::unique_ptr<T, FDInferDeleter>;
|
template <typename T> using FDUniquePtr = std::unique_ptr<T, FDInferDeleter>;
|
||||||
|
|
||||||
inline uint32_t GetElementSize(nvinfer1::DataType t) noexcept {
|
int64_t Volume(const nvinfer1::Dims& d);
|
||||||
switch (t) {
|
|
||||||
case nvinfer1::DataType::kINT32:
|
nvinfer1::Dims ToDims(const std::vector<int>& vec);
|
||||||
return 4;
|
nvinfer1::Dims ToDims(const std::vector<int64_t>& vec);
|
||||||
case nvinfer1::DataType::kFLOAT:
|
|
||||||
return 4;
|
size_t TrtDataTypeSize(const nvinfer1::DataType& dtype);
|
||||||
case nvinfer1::DataType::kHALF:
|
|
||||||
return 2;
|
FDDataType GetFDDataType(const nvinfer1::DataType& dtype);
|
||||||
case nvinfer1::DataType::kBOOL:
|
|
||||||
case nvinfer1::DataType::kINT8:
|
nvinfer1::DataType ReaderDtypeToTrtDtype(int reader_dtype);
|
||||||
return 1;
|
|
||||||
|
std::vector<int> ToVec(const nvinfer1::Dims& dim);
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
std::ostream& operator<<(std::ostream& out, const std::vector<T>& vec) {
|
||||||
|
out << "[";
|
||||||
|
for (size_t i = 0; i < vec.size(); ++i) {
|
||||||
|
if (i != vec.size() - 1) {
|
||||||
|
out << vec[i] << ", ";
|
||||||
|
} else {
|
||||||
|
out << vec[i] << "]";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return 0;
|
return out;
|
||||||
}
|
|
||||||
|
|
||||||
inline int64_t Volume(const nvinfer1::Dims& d) {
|
|
||||||
return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies<int64_t>());
|
|
||||||
}
|
|
||||||
|
|
||||||
inline nvinfer1::Dims ToDims(const std::vector<int>& vec) {
|
|
||||||
int limit = static_cast<int>(nvinfer1::Dims::MAX_DIMS);
|
|
||||||
if (static_cast<int>(vec.size()) > limit) {
|
|
||||||
FDWARNING << "Vector too long, only first 8 elements are used in dimension." << std::endl;
|
|
||||||
}
|
|
||||||
// Pick first nvinfer1::Dims::MAX_DIMS elements
|
|
||||||
nvinfer1::Dims dims{std::min(static_cast<int>(vec.size()), limit), {}};
|
|
||||||
std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d));
|
|
||||||
return dims;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename AllocFunc, typename FreeFunc> class FDGenericBuffer {
|
template <typename AllocFunc, typename FreeFunc> class FDGenericBuffer {
|
||||||
@@ -123,9 +124,7 @@ template <typename AllocFunc, typename FreeFunc> class FDGenericBuffer {
|
|||||||
//!
|
//!
|
||||||
//! \brief Returns the size (in bytes) of the buffer.
|
//! \brief Returns the size (in bytes) of the buffer.
|
||||||
//!
|
//!
|
||||||
size_t nbBytes() const {
|
size_t nbBytes() const { return this->size() * TrtDataTypeSize(mType); }
|
||||||
return this->size() * GetElementSize(mType);
|
|
||||||
}
|
|
||||||
|
|
||||||
//!
|
//!
|
||||||
//! \brief Resizes the buffer. This is a no-op if the new size is smaller than
|
//! \brief Resizes the buffer. This is a no-op if the new size is smaller than
|
||||||
@@ -145,9 +144,7 @@ template <typename AllocFunc, typename FreeFunc> class FDGenericBuffer {
|
|||||||
//!
|
//!
|
||||||
//! \brief Overload of resize that accepts Dims
|
//! \brief Overload of resize that accepts Dims
|
||||||
//!
|
//!
|
||||||
void resize(const nvinfer1::Dims& dims) {
|
void resize(const nvinfer1::Dims& dims) { return this->resize(Volume(dims)); }
|
||||||
return this->resize(Volume(dims));
|
|
||||||
}
|
|
||||||
|
|
||||||
~FDGenericBuffer() { freeFn(mBuffer); }
|
~FDGenericBuffer() { freeFn(mBuffer); }
|
||||||
|
|
||||||
@@ -183,11 +180,14 @@ class FDTrtLogger : public nvinfer1::ILogger {
|
|||||||
logger = new FDTrtLogger();
|
logger = new FDTrtLogger();
|
||||||
return logger;
|
return logger;
|
||||||
}
|
}
|
||||||
void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override {
|
void log(nvinfer1::ILogger::Severity severity,
|
||||||
|
const char* msg) noexcept override {
|
||||||
if (severity == nvinfer1::ILogger::Severity::kINFO) {
|
if (severity == nvinfer1::ILogger::Severity::kINFO) {
|
||||||
FDINFO << msg << std::endl;
|
// Disable this log
|
||||||
|
// FDINFO << msg << std::endl;
|
||||||
} else if (severity == nvinfer1::ILogger::Severity::kWARNING) {
|
} else if (severity == nvinfer1::ILogger::Severity::kWARNING) {
|
||||||
FDWARNING << msg << std::endl;
|
// Disable this log
|
||||||
|
// FDWARNING << msg << std::endl;
|
||||||
} else if (severity == nvinfer1::ILogger::Severity::kERROR) {
|
} else if (severity == nvinfer1::ILogger::Severity::kERROR) {
|
||||||
FDERROR << msg << std::endl;
|
FDERROR << msg << std::endl;
|
||||||
} else if (severity == nvinfer1::ILogger::Severity::kINTERNAL_ERROR) {
|
} else if (severity == nvinfer1::ILogger::Severity::kINTERNAL_ERROR) {
|
||||||
@@ -196,4 +196,47 @@ class FDTrtLogger : public nvinfer1::ILogger {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace fastdeploy
|
struct ShapeRangeInfo {
|
||||||
|
ShapeRangeInfo(const std::vector<int64_t>& new_shape) {
|
||||||
|
shape.assign(new_shape.begin(), new_shape.end());
|
||||||
|
min.resize(new_shape.size());
|
||||||
|
max.resize(new_shape.size());
|
||||||
|
is_static.resize(new_shape.size());
|
||||||
|
for (size_t i = 0; i < new_shape.size(); ++i) {
|
||||||
|
if (new_shape[i] > 0) {
|
||||||
|
min[i] = new_shape[i];
|
||||||
|
max[i] = new_shape[i];
|
||||||
|
is_static[i] = 1;
|
||||||
|
} else {
|
||||||
|
min[i] = -1;
|
||||||
|
max[i] = -1;
|
||||||
|
is_static[i] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string name;
|
||||||
|
std::vector<int64_t> shape;
|
||||||
|
std::vector<int64_t> min;
|
||||||
|
std::vector<int64_t> max;
|
||||||
|
std::vector<int64_t> opt;
|
||||||
|
std::vector<int8_t> is_static;
|
||||||
|
// return
|
||||||
|
// -1: new shape is inillegal
|
||||||
|
// 0 : new shape is able to inference
|
||||||
|
// 1 : new shape is out of range, need to update engine
|
||||||
|
int Update(const std::vector<int64_t>& new_shape);
|
||||||
|
int Update(const std::vector<int>& new_shape) {
|
||||||
|
std::vector<int64_t> new_shape_int64(new_shape.begin(), new_shape.end());
|
||||||
|
return Update(new_shape_int64);
|
||||||
|
}
|
||||||
|
|
||||||
|
friend std::ostream& operator<<(std::ostream& out,
|
||||||
|
const ShapeRangeInfo& info) {
|
||||||
|
out << "Input name: " << info.name << ", shape=" << info.shape
|
||||||
|
<< ", min=" << info.min << ", max=" << info.max << std::endl;
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace fastdeploy
|
||||||
|
3
external/eigen.cmake
vendored
3
external/eigen.cmake
vendored
@@ -17,7 +17,8 @@ include(ExternalProject)
|
|||||||
# update eigen to the commit id f612df27 on 03/16/2021
|
# update eigen to the commit id f612df27 on 03/16/2021
|
||||||
set(EIGEN_PREFIX_DIR ${THIRD_PARTY_PATH}/eigen3)
|
set(EIGEN_PREFIX_DIR ${THIRD_PARTY_PATH}/eigen3)
|
||||||
set(EIGEN_SOURCE_DIR ${THIRD_PARTY_PATH}/eigen3/src/extern_eigen3)
|
set(EIGEN_SOURCE_DIR ${THIRD_PARTY_PATH}/eigen3/src/extern_eigen3)
|
||||||
set(EIGEN_REPOSITORY https://gitlab.com/libeigen/eigen.git)
|
#set(EIGEN_REPOSITORY https://gitlab.com/libeigen/eigen.git)
|
||||||
|
set(EIGEN_REPOSITORY https://gitee.com/jiangjiajun/eigen.git)
|
||||||
set(EIGEN_TAG f612df273689a19d25b45ca4f8269463207c4fee)
|
set(EIGEN_TAG f612df273689a19d25b45ca4f8269463207c4fee)
|
||||||
|
|
||||||
if(WIN32)
|
if(WIN32)
|
||||||
|
4
external/paddle2onnx.cmake
vendored
4
external/paddle2onnx.cmake
vendored
@@ -42,8 +42,8 @@ else()
|
|||||||
CACHE FILEPATH "paddle2onnx compile library." FORCE)
|
CACHE FILEPATH "paddle2onnx compile library." FORCE)
|
||||||
endif(WIN32)
|
endif(WIN32)
|
||||||
|
|
||||||
set(PADDLE2ONNX_URL_BASE "https://bj.bcebos.com/paddle2onnx/libs/")
|
set(PADDLE2ONNX_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
|
||||||
set(PADDLE2ONNX_VERSION "1.0.0rc3")
|
set(PADDLE2ONNX_VERSION "1.0.1")
|
||||||
if(WIN32)
|
if(WIN32)
|
||||||
set(PADDLE2ONNX_FILE "paddle2onnx-win-x64-${PADDLE2ONNX_VERSION}.zip")
|
set(PADDLE2ONNX_FILE "paddle2onnx-win-x64-${PADDLE2ONNX_VERSION}.zip")
|
||||||
if(NOT CMAKE_CL_64)
|
if(NOT CMAKE_CL_64)
|
||||||
|
Reference in New Issue
Block a user