mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[Other] Optimize runtime module (#1356)
* Optimize runtime * fix error * [Backend] Add option to print tensorrt conversion log (#1386) Add option to print tensorrt conversion log Co-authored-by: root <root@bjyz-sys-gpu-kongming3.bjyz.baidu.com> --------- Co-authored-by: root <root@bjyz-sys-gpu-kongming3.bjyz.baidu.com>
This commit is contained in:
@@ -98,6 +98,33 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool PaddleBackend::Init(const RuntimeOption& runtime_option) {
|
||||||
|
if (!(Supported(runtime_option.model_format, Backend::PDINFER) && Supported(runtime_option.device, Backend::PDINFER))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto option = runtime_option;
|
||||||
|
option.paddle_infer_option.model_file = runtime_option.model_file;
|
||||||
|
option.paddle_infer_option.params_file = runtime_option.params_file;
|
||||||
|
option.paddle_infer_option.model_from_memory_ = runtime_option.model_from_memory_;
|
||||||
|
option.paddle_infer_option.device = runtime_option.device;
|
||||||
|
option.paddle_infer_option.device_id = runtime_option.device_id;
|
||||||
|
option.paddle_infer_option.enable_pinned_memory = runtime_option.enable_pinned_memory;
|
||||||
|
option.paddle_infer_option.external_stream_ = runtime_option.external_stream_;
|
||||||
|
option.paddle_infer_option.trt_option = runtime_option.trt_option;
|
||||||
|
option.paddle_infer_option.trt_option.gpu_id = runtime_option.device_id;
|
||||||
|
if (option.model_from_memory_) {
|
||||||
|
return InitFromPaddle(option.model_file, option.params_file, option.paddle_infer_option);
|
||||||
|
} else {
|
||||||
|
std::string model_buffer = "";
|
||||||
|
std::string params_buffer = "";
|
||||||
|
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer), "Failed to read model file from %s.", option.model_file.c_str());
|
||||||
|
FDASSERT(ReadBinaryFromFile(option.params_file, ¶ms_buffer), "Failed to read parameters file from %s.", option.params_file.c_str());
|
||||||
|
return InitFromPaddle(model_buffer, params_buffer, option.paddle_infer_option);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
bool PaddleBackend::InitFromPaddle(const std::string& model_buffer,
|
bool PaddleBackend::InitFromPaddle(const std::string& model_buffer,
|
||||||
const std::string& params_buffer,
|
const std::string& params_buffer,
|
||||||
const PaddleBackendOption& option) {
|
const PaddleBackendOption& option) {
|
||||||
|
@@ -54,12 +54,7 @@ class PaddleBackend : public BaseBackend {
|
|||||||
public:
|
public:
|
||||||
PaddleBackend() {}
|
PaddleBackend() {}
|
||||||
virtual ~PaddleBackend() = default;
|
virtual ~PaddleBackend() = default;
|
||||||
void BuildOption(const PaddleBackendOption& option);
|
bool Init(const RuntimeOption& option);
|
||||||
|
|
||||||
bool InitFromPaddle(const std::string& model_buffer,
|
|
||||||
const std::string& params_buffer,
|
|
||||||
const PaddleBackendOption& option = PaddleBackendOption());
|
|
||||||
|
|
||||||
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
|
bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
|
||||||
bool copy_to_fd = true) override;
|
bool copy_to_fd = true) override;
|
||||||
|
|
||||||
@@ -77,6 +72,12 @@ class PaddleBackend : public BaseBackend {
|
|||||||
std::vector<TensorInfo> GetOutputInfos() override;
|
std::vector<TensorInfo> GetOutputInfos() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
void BuildOption(const PaddleBackendOption& option);
|
||||||
|
|
||||||
|
bool InitFromPaddle(const std::string& model_buffer,
|
||||||
|
const std::string& params_buffer,
|
||||||
|
const PaddleBackendOption& option = PaddleBackendOption());
|
||||||
|
|
||||||
void
|
void
|
||||||
CollectShapeRun(paddle_infer::Predictor* predictor,
|
CollectShapeRun(paddle_infer::Predictor* predictor,
|
||||||
const std::map<std::string, std::vector<int>>& shape) const;
|
const std::map<std::string, std::vector<int>>& shape) const;
|
||||||
|
@@ -30,6 +30,9 @@ struct TrtBackendOption {
|
|||||||
/// `max_workspace_size` for TensorRT
|
/// `max_workspace_size` for TensorRT
|
||||||
size_t max_workspace_size = 1 << 30;
|
size_t max_workspace_size = 1 << 30;
|
||||||
|
|
||||||
|
/// Enable log while converting onnx model to tensorrt
|
||||||
|
bool enable_log_info = false;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* @brief Enable half precison inference, on some device not support half precision, it will fallback to float32 mode
|
* @brief Enable half precison inference, on some device not support half precision, it will fallback to float32 mode
|
||||||
*/
|
*/
|
||||||
|
@@ -21,6 +21,7 @@ void BindTrtOption(pybind11::module& m) {
|
|||||||
pybind11::class_<TrtBackendOption>(m, "TrtBackendOption")
|
pybind11::class_<TrtBackendOption>(m, "TrtBackendOption")
|
||||||
.def(pybind11::init())
|
.def(pybind11::init())
|
||||||
.def_readwrite("enable_fp16", &TrtBackendOption::enable_fp16)
|
.def_readwrite("enable_fp16", &TrtBackendOption::enable_fp16)
|
||||||
|
.def_readwrite("enable_log_info", &TrtBackendOption::enable_log_info)
|
||||||
.def_readwrite("max_batch_size", &TrtBackendOption::max_batch_size)
|
.def_readwrite("max_batch_size", &TrtBackendOption::max_batch_size)
|
||||||
.def_readwrite("max_workspace_size",
|
.def_readwrite("max_workspace_size",
|
||||||
&TrtBackendOption::max_workspace_size)
|
&TrtBackendOption::max_workspace_size)
|
||||||
|
@@ -114,6 +114,13 @@ bool TrtBackend::LoadTrtCache(const std::string& trt_engine_file) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool TrtBackend::Init(const RuntimeOption& runtime_option) {
|
bool TrtBackend::Init(const RuntimeOption& runtime_option) {
|
||||||
|
auto trt_option = runtime_option.trt_option;
|
||||||
|
trt_option.model_file = runtime_option.model_file;
|
||||||
|
trt_option.params_file = runtime_option.params_file;
|
||||||
|
trt_option.model_format = runtime_option.model_format;
|
||||||
|
trt_option.gpu_id = runtime_option.device_id;
|
||||||
|
trt_option.enable_pinned_memory = runtime_option.enable_pinned_memory;
|
||||||
|
trt_option.external_stream_ = runtime_option.external_stream_;
|
||||||
if (runtime_option.device != Device::GPU) {
|
if (runtime_option.device != Device::GPU) {
|
||||||
FDERROR << "TrtBackend only supports Device::GPU, but now it's "
|
FDERROR << "TrtBackend only supports Device::GPU, but now it's "
|
||||||
<< runtime_option.device << "." << std::endl;
|
<< runtime_option.device << "." << std::endl;
|
||||||
@@ -130,7 +137,7 @@ bool TrtBackend::Init(const RuntimeOption& runtime_option) {
|
|||||||
if (runtime_option.model_from_memory_) {
|
if (runtime_option.model_from_memory_) {
|
||||||
return InitFromPaddle(runtime_option.model_file,
|
return InitFromPaddle(runtime_option.model_file,
|
||||||
runtime_option.params_file,
|
runtime_option.params_file,
|
||||||
runtime_option.trt_option);
|
trt_option);
|
||||||
} else {
|
} else {
|
||||||
std::string model_buffer;
|
std::string model_buffer;
|
||||||
std::string params_buffer;
|
std::string params_buffer;
|
||||||
@@ -141,17 +148,17 @@ bool TrtBackend::Init(const RuntimeOption& runtime_option) {
|
|||||||
"Failed to read parameters file %s.",
|
"Failed to read parameters file %s.",
|
||||||
runtime_option.params_file.c_str());
|
runtime_option.params_file.c_str());
|
||||||
return InitFromPaddle(model_buffer, params_buffer,
|
return InitFromPaddle(model_buffer, params_buffer,
|
||||||
runtime_option.trt_option);
|
trt_option);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (runtime_option.model_from_memory_) {
|
if (runtime_option.model_from_memory_) {
|
||||||
return InitFromOnnx(runtime_option.model_file, runtime_option.trt_option);
|
return InitFromOnnx(runtime_option.model_file, trt_option);
|
||||||
} else {
|
} else {
|
||||||
std::string model_buffer;
|
std::string model_buffer;
|
||||||
FDASSERT(ReadBinaryFromFile(runtime_option.model_file, &model_buffer),
|
FDASSERT(ReadBinaryFromFile(runtime_option.model_file, &model_buffer),
|
||||||
"Failed to read model file %s.",
|
"Failed to read model file %s.",
|
||||||
runtime_option.model_file.c_str());
|
runtime_option.model_file.c_str());
|
||||||
return InitFromOnnx(model_buffer, runtime_option.trt_option);
|
return InitFromOnnx(model_buffer, trt_option);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@@ -525,6 +532,9 @@ void TrtBackend::AllocateOutputsBuffer(std::vector<FDTensor>* outputs,
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool TrtBackend::BuildTrtEngine() {
|
bool TrtBackend::BuildTrtEngine() {
|
||||||
|
if (option_.enable_log_info) {
|
||||||
|
FDTrtLogger::Get()->SetLog(true, true);
|
||||||
|
}
|
||||||
auto config =
|
auto config =
|
||||||
FDUniquePtr<nvinfer1::IBuilderConfig>(builder_->createBuilderConfig());
|
FDUniquePtr<nvinfer1::IBuilderConfig>(builder_->createBuilderConfig());
|
||||||
if (!config) {
|
if (!config) {
|
||||||
|
@@ -220,20 +220,30 @@ class FDTrtLogger : public nvinfer1::ILogger {
|
|||||||
logger = new FDTrtLogger();
|
logger = new FDTrtLogger();
|
||||||
return logger;
|
return logger;
|
||||||
}
|
}
|
||||||
|
void SetLog(bool enable_info = false, bool enable_warning = false) {
|
||||||
|
enable_info_ = enable_info;
|
||||||
|
enable_warning_ = enable_warning;
|
||||||
|
}
|
||||||
|
|
||||||
void log(nvinfer1::ILogger::Severity severity,
|
void log(nvinfer1::ILogger::Severity severity,
|
||||||
const char* msg) noexcept override {
|
const char* msg) noexcept override {
|
||||||
if (severity == nvinfer1::ILogger::Severity::kINFO) {
|
if (severity == nvinfer1::ILogger::Severity::kINFO) {
|
||||||
// Disable this log
|
if (enable_info_) {
|
||||||
// FDINFO << msg << std::endl;
|
FDINFO << msg << std::endl;
|
||||||
|
}
|
||||||
} else if (severity == nvinfer1::ILogger::Severity::kWARNING) {
|
} else if (severity == nvinfer1::ILogger::Severity::kWARNING) {
|
||||||
// Disable this log
|
if (enable_warning_) {
|
||||||
// FDWARNING << msg << std::endl;
|
FDWARNING << msg << std::endl;
|
||||||
|
}
|
||||||
} else if (severity == nvinfer1::ILogger::Severity::kERROR) {
|
} else if (severity == nvinfer1::ILogger::Severity::kERROR) {
|
||||||
FDERROR << msg << std::endl;
|
FDERROR << msg << std::endl;
|
||||||
} else if (severity == nvinfer1::ILogger::Severity::kINTERNAL_ERROR) {
|
} else if (severity == nvinfer1::ILogger::Severity::kINTERNAL_ERROR) {
|
||||||
FDASSERT(false, "%s", msg);
|
FDASSERT(false, "%s", msg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
private:
|
||||||
|
bool enable_info_ = false;
|
||||||
|
bool enable_warning_ = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ShapeRangeInfo {
|
struct ShapeRangeInfo {
|
||||||
|
@@ -154,19 +154,10 @@ bool Runtime::Init(const RuntimeOption& _option) {
|
|||||||
} else if (option.backend == Backend::SOPHGOTPU) {
|
} else if (option.backend == Backend::SOPHGOTPU) {
|
||||||
CreateSophgoNPUBackend();
|
CreateSophgoNPUBackend();
|
||||||
} else if (option.backend == Backend::POROS) {
|
} else if (option.backend == Backend::POROS) {
|
||||||
FDASSERT(option.device == Device::CPU || option.device == Device::GPU,
|
CreatePorosBackend();
|
||||||
"Backend::POROS only supports Device::CPU/Device::GPU.");
|
|
||||||
FDASSERT(option.model_format == ModelFormat::TORCHSCRIPT,
|
|
||||||
"Backend::POROS only supports model format of "
|
|
||||||
"ModelFormat::TORCHSCRIPT.");
|
|
||||||
FDINFO << "Runtime initialized with Backend::POROS in " << option.device
|
|
||||||
<< "." << std::endl;
|
|
||||||
return true;
|
|
||||||
} else {
|
} else {
|
||||||
FDERROR << "Runtime only support "
|
std::string msg = Str(GetAvailableBackends());
|
||||||
"Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as "
|
FDERROR << "The compiled FastDeploy only supports " << msg << ", " << option.backend << " is not supported now." << std::endl;
|
||||||
"backend now."
|
|
||||||
<< std::endl;
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
backend_->benchmark_option_ = option.benchmark_option;
|
backend_->benchmark_option_ = option.benchmark_option;
|
||||||
@@ -264,43 +255,9 @@ void Runtime::ReleaseModelMemoryBuffer() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Runtime::CreatePaddleBackend() {
|
void Runtime::CreatePaddleBackend() {
|
||||||
FDASSERT(
|
|
||||||
option.device == Device::CPU || option.device == Device::GPU ||
|
|
||||||
option.device == Device::IPU,
|
|
||||||
"Backend::PDINFER only supports Device::CPU/Device::GPU/Device::IPU.");
|
|
||||||
FDASSERT(
|
|
||||||
option.model_format == ModelFormat::PADDLE,
|
|
||||||
"Backend::PDINFER only supports model format of ModelFormat::PADDLE.");
|
|
||||||
#ifdef ENABLE_PADDLE_BACKEND
|
#ifdef ENABLE_PADDLE_BACKEND
|
||||||
option.paddle_infer_option.model_file = option.model_file;
|
|
||||||
option.paddle_infer_option.params_file = option.params_file;
|
|
||||||
option.paddle_infer_option.model_from_memory_ = option.model_from_memory_;
|
|
||||||
option.paddle_infer_option.device = option.device;
|
|
||||||
option.paddle_infer_option.device_id = option.device_id;
|
|
||||||
option.paddle_infer_option.enable_pinned_memory = option.enable_pinned_memory;
|
|
||||||
option.paddle_infer_option.external_stream_ = option.external_stream_;
|
|
||||||
option.paddle_infer_option.trt_option = option.trt_option;
|
|
||||||
option.paddle_infer_option.trt_option.gpu_id = option.device_id;
|
|
||||||
backend_ = utils::make_unique<PaddleBackend>();
|
backend_ = utils::make_unique<PaddleBackend>();
|
||||||
auto casted_backend = dynamic_cast<PaddleBackend*>(backend_.get());
|
FDASSERT(backend_->Init(option), "Failed to initialized Paddle Inference backend.");
|
||||||
|
|
||||||
if (option.model_from_memory_) {
|
|
||||||
FDASSERT(
|
|
||||||
casted_backend->InitFromPaddle(option.model_file, option.params_file,
|
|
||||||
option.paddle_infer_option),
|
|
||||||
"Load model from Paddle failed while initliazing PaddleBackend.");
|
|
||||||
ReleaseModelMemoryBuffer();
|
|
||||||
} else {
|
|
||||||
std::string model_buffer = "";
|
|
||||||
std::string params_buffer = "";
|
|
||||||
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
|
|
||||||
"Fail to read binary from model file");
|
|
||||||
FDASSERT(ReadBinaryFromFile(option.params_file, ¶ms_buffer),
|
|
||||||
"Fail to read binary from parameter file");
|
|
||||||
FDASSERT(casted_backend->InitFromPaddle(model_buffer, params_buffer,
|
|
||||||
option.paddle_infer_option),
|
|
||||||
"Load model from Paddle failed while initliazing PaddleBackend.");
|
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
FDASSERT(false,
|
FDASSERT(false,
|
||||||
"PaddleBackend is not available, please compiled with "
|
"PaddleBackend is not available, please compiled with "
|
||||||
@@ -339,12 +296,6 @@ void Runtime::CreateOrtBackend() {
|
|||||||
|
|
||||||
void Runtime::CreateTrtBackend() {
|
void Runtime::CreateTrtBackend() {
|
||||||
#ifdef ENABLE_TRT_BACKEND
|
#ifdef ENABLE_TRT_BACKEND
|
||||||
option.trt_option.model_file = option.model_file;
|
|
||||||
option.trt_option.params_file = option.params_file;
|
|
||||||
option.trt_option.model_format = option.model_format;
|
|
||||||
option.trt_option.gpu_id = option.device_id;
|
|
||||||
option.trt_option.enable_pinned_memory = option.enable_pinned_memory;
|
|
||||||
option.trt_option.external_stream_ = option.external_stream_;
|
|
||||||
backend_ = utils::make_unique<TrtBackend>();
|
backend_ = utils::make_unique<TrtBackend>();
|
||||||
FDASSERT(backend_->Init(option), "Failed to initialize TensorRT backend.");
|
FDASSERT(backend_->Init(option), "Failed to initialize TensorRT backend.");
|
||||||
#else
|
#else
|
||||||
|
Reference in New Issue
Block a user