mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
Fix bug of get input/output information from PaddleBackend (#339)
* Fix bug of get input/output information from PaddleBackend * Support Paddle Inference with TensorRT (#340) * Fix bug
This commit is contained in:
@@ -16,23 +16,43 @@
|
||||
|
||||
namespace fastdeploy {
|
||||
|
||||
void PaddleBackend::BuildOption(const PaddleBackendOption& option,
|
||||
const std::string& model_file) {
|
||||
void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
|
||||
if (option.use_gpu) {
|
||||
config_.EnableUseGpu(option.gpu_mem_init_size, option.gpu_id);
|
||||
if (option.enable_trt) {
|
||||
#ifdef ENABLE_TRT_BACKEND
|
||||
auto precision = paddle_infer::PrecisionType::kFloat32;
|
||||
if (option.trt_option.enable_fp16) {
|
||||
precision = paddle_infer::PrecisionType::kHalf;
|
||||
}
|
||||
bool use_static = false;
|
||||
if (option.trt_option.serialize_file != "") {
|
||||
FDWARNING << "Detect that tensorrt cache file has been set to " << option.trt_option.serialize_file << ", but while enable paddle2trt, please notice that the cache file will save to the directory where paddle model saved." << std::endl;
|
||||
use_static = true;
|
||||
}
|
||||
config_.EnableTensorRtEngine(option.trt_option.max_workspace_size, 32, 3, precision, use_static);
|
||||
std::map<std::string, std::vector<int>> max_shape;
|
||||
std::map<std::string, std::vector<int>> min_shape;
|
||||
std::map<std::string, std::vector<int>> opt_shape;
|
||||
for (const auto& item : option.trt_option.min_shape) {
|
||||
auto max_iter = option.trt_option.max_shape.find(item.first);
|
||||
auto opt_iter = option.trt_option.opt_shape.find(item.first);
|
||||
FDASSERT(max_iter != option.trt_option.max_shape.end(), "Cannot find %s in TrtBackendOption::min_shape.", item.first.c_str());
|
||||
FDASSERT(opt_iter != option.trt_option.opt_shape.end(), "Cannot find %s in TrtBackendOption::opt_shape.", item.first.c_str());
|
||||
max_shape[item.first].assign(max_iter->second.begin(), max_iter->second.end());
|
||||
opt_shape[item.first].assign(opt_iter->second.begin(), opt_iter->second.end());
|
||||
min_shape[item.first].assign(item.second.begin(), item.second.end());
|
||||
}
|
||||
if (min_shape.size() > 0) {
|
||||
config_.SetTRTDynamicShapeInfo(min_shape, max_shape, opt_shape);
|
||||
}
|
||||
#else
|
||||
FDWARNING << "The FastDeploy is not compiled with TensorRT backend, so will fallback to GPU with Paddle Inference Backend." << std::endl;
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
config_.DisableGpu();
|
||||
if (option.enable_mkldnn) {
|
||||
config_.EnableMKLDNN();
|
||||
std::string contents;
|
||||
if (!ReadBinaryFromFile(model_file, &contents)) {
|
||||
return;
|
||||
}
|
||||
auto reader =
|
||||
paddle2onnx::PaddleReader(contents.c_str(), contents.size());
|
||||
if (reader.is_quantize_model) {
|
||||
config_.EnableMkldnnInt8();
|
||||
}
|
||||
config_.SetMkldnnCacheCapacity(option.mkldnn_cache_size);
|
||||
}
|
||||
}
|
||||
@@ -62,28 +82,48 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
|
||||
return false;
|
||||
}
|
||||
config_.SetModel(model_file, params_file);
|
||||
BuildOption(option, model_file);
|
||||
BuildOption(option);
|
||||
|
||||
// The input/output information get from predictor is not right, use PaddleReader instead now
|
||||
std::string contents;
|
||||
if (!ReadBinaryFromFile(model_file, &contents)) {
|
||||
return false;
|
||||
}
|
||||
auto reader =
|
||||
paddle2onnx::PaddleReader(contents.c_str(), contents.size());
|
||||
|
||||
// If it's a quantized model, and use cpu with mkldnn, automaticaly switch to int8 mode
|
||||
if (reader.is_quantize_model) {
|
||||
if (option.use_gpu) {
|
||||
FDWARNING << "The loaded model is a quantized model, while inference on GPU, please use TensorRT backend to get better performance." << std::endl;
|
||||
}
|
||||
if (option.enable_mkldnn) {
|
||||
config_.EnableMkldnnInt8();
|
||||
} else {
|
||||
FDWARNING << "The loaded model is a quantized model, while inference on CPU, please enable MKLDNN to get better performance." << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
inputs_desc_.resize(reader.num_inputs);
|
||||
for (int i = 0; i < reader.num_inputs; ++i) {
|
||||
std::string name(reader.inputs[i].name);
|
||||
std::vector<int64_t> shape(
|
||||
reader.inputs[i].shape,
|
||||
reader.inputs[i].shape + reader.inputs[i].rank);
|
||||
inputs_desc_[i].name = name;
|
||||
inputs_desc_[i].shape.assign(shape.begin(), shape.end());
|
||||
inputs_desc_[i].dtype = ReaderDataTypeToFD(reader.inputs[i].dtype);
|
||||
}
|
||||
outputs_desc_.resize(reader.num_outputs);
|
||||
for (int i = 0; i < reader.num_outputs; ++i) {
|
||||
std::string name(reader.outputs[i].name);
|
||||
std::vector<int64_t> shape(reader.outputs[i].shape, reader.outputs[i].shape + reader.outputs[i].rank);
|
||||
outputs_desc_[i].name = name;
|
||||
outputs_desc_[i].shape.assign(shape.begin(), shape.end());
|
||||
outputs_desc_[i].dtype = ReaderDataTypeToFD(reader.outputs[i].dtype);
|
||||
}
|
||||
|
||||
predictor_ = paddle_infer::CreatePredictor(config_);
|
||||
std::vector<std::string> input_names = predictor_->GetInputNames();
|
||||
std::vector<std::string> output_names = predictor_->GetOutputNames();
|
||||
for (size_t i = 0; i < input_names.size(); ++i) {
|
||||
auto handle = predictor_->GetInputHandle(input_names[i]);
|
||||
TensorInfo info;
|
||||
auto shape = handle->shape();
|
||||
info.shape.assign(shape.begin(), shape.end());
|
||||
info.dtype = PaddleDataTypeToFD(handle->type());
|
||||
info.name = input_names[i];
|
||||
inputs_desc_.emplace_back(info);
|
||||
}
|
||||
for (size_t i = 0; i < output_names.size(); ++i) {
|
||||
auto handle = predictor_->GetOutputHandle(output_names[i]);
|
||||
TensorInfo info;
|
||||
auto shape = handle->shape();
|
||||
info.shape.assign(shape.begin(), shape.end());
|
||||
info.dtype = PaddleDataTypeToFD(handle->type());
|
||||
info.name = output_names[i];
|
||||
outputs_desc_.emplace_back(info);
|
||||
}
|
||||
initialized_ = true;
|
||||
return true;
|
||||
}
|
||||
@@ -131,4 +171,4 @@ bool PaddleBackend::Infer(std::vector<FDTensor>& inputs,
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace fastdeploy
|
||||
} // namespace fastdeploy
|
||||
|
Reference in New Issue
Block a user