mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
[Backend] Add fixed size optimization for transformer model (#1430)
Add enable_fixed_size_opt flag
This commit is contained in:
@@ -66,6 +66,8 @@ struct PaddleBackendOption {
|
||||
int mkldnn_cache_size = -1;
|
||||
/// initialize memory size(MB) for GPU
|
||||
int gpu_mem_init_size = 100;
|
||||
/// The option to enable fixed size optimization for transformer model
|
||||
bool enable_fixed_size_opt = false;
|
||||
|
||||
/// Disable type of operators run on TensorRT
|
||||
void DisableTrtOps(const std::vector<std::string>& ops) {
|
||||
|
@@ -36,6 +36,8 @@ void BindPaddleOption(pybind11::module& m) {
|
||||
BindIpuOption(m);
|
||||
pybind11::class_<PaddleBackendOption>(m, "PaddleBackendOption")
|
||||
.def(pybind11::init())
|
||||
.def_readwrite("enable_fixed_size_opt",
|
||||
&PaddleBackendOption::enable_fixed_size_opt)
|
||||
.def_readwrite("enable_log_info", &PaddleBackendOption::enable_log_info)
|
||||
.def_readwrite("enable_mkldnn", &PaddleBackendOption::enable_mkldnn)
|
||||
.def_readwrite("enable_trt", &PaddleBackendOption::enable_trt)
|
||||
|
26
fastdeploy/runtime/backends/paddle/paddle_backend.cc
Executable file → Normal file
26
fastdeploy/runtime/backends/paddle/paddle_backend.cc
Executable file → Normal file
@@ -58,6 +58,10 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
|
||||
option.trt_option.max_batch_size, 3,
|
||||
precision, use_static);
|
||||
SetTRTDynamicShapeToConfig(option);
|
||||
if (option_.enable_fixed_size_opt) {
|
||||
paddle_infer::experimental::InternalUtils::SetTransformerMaskid(
|
||||
&config_, "opt");
|
||||
}
|
||||
}
|
||||
} else if (option.device == Device::IPU) {
|
||||
#ifdef WITH_IPU
|
||||
@@ -99,28 +103,36 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
|
||||
}
|
||||
|
||||
bool PaddleBackend::Init(const RuntimeOption& runtime_option) {
|
||||
if (!(Supported(runtime_option.model_format, Backend::PDINFER) && Supported(runtime_option.device, Backend::PDINFER))) {
|
||||
if (!(Supported(runtime_option.model_format, Backend::PDINFER) &&
|
||||
Supported(runtime_option.device, Backend::PDINFER))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto option = runtime_option;
|
||||
option.paddle_infer_option.model_file = runtime_option.model_file;
|
||||
option.paddle_infer_option.params_file = runtime_option.params_file;
|
||||
option.paddle_infer_option.model_from_memory_ = runtime_option.model_from_memory_;
|
||||
option.paddle_infer_option.model_from_memory_ =
|
||||
runtime_option.model_from_memory_;
|
||||
option.paddle_infer_option.device = runtime_option.device;
|
||||
option.paddle_infer_option.device_id = runtime_option.device_id;
|
||||
option.paddle_infer_option.enable_pinned_memory = runtime_option.enable_pinned_memory;
|
||||
option.paddle_infer_option.enable_pinned_memory =
|
||||
runtime_option.enable_pinned_memory;
|
||||
option.paddle_infer_option.external_stream_ = runtime_option.external_stream_;
|
||||
option.paddle_infer_option.trt_option = runtime_option.trt_option;
|
||||
option.paddle_infer_option.trt_option.gpu_id = runtime_option.device_id;
|
||||
if (option.model_from_memory_) {
|
||||
return InitFromPaddle(option.model_file, option.params_file, option.paddle_infer_option);
|
||||
return InitFromPaddle(option.model_file, option.params_file,
|
||||
option.paddle_infer_option);
|
||||
} else {
|
||||
std::string model_buffer = "";
|
||||
std::string params_buffer = "";
|
||||
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer), "Failed to read model file from %s.", option.model_file.c_str());
|
||||
FDASSERT(ReadBinaryFromFile(option.params_file, ¶ms_buffer), "Failed to read parameters file from %s.", option.params_file.c_str());
|
||||
return InitFromPaddle(model_buffer, params_buffer, option.paddle_infer_option);
|
||||
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
|
||||
"Failed to read model file from %s.", option.model_file.c_str());
|
||||
FDASSERT(ReadBinaryFromFile(option.params_file, ¶ms_buffer),
|
||||
"Failed to read parameters file from %s.",
|
||||
option.params_file.c_str());
|
||||
return InitFromPaddle(model_buffer, params_buffer,
|
||||
option.paddle_infer_option);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
Reference in New Issue
Block a user