[Backend] Add fixed size optimization for transformer model (#1430)

Add enable_fixed_size_opt flag
This commit is contained in:
Jack Zhou
2023-02-24 09:45:04 +08:00
committed by GitHub
parent d3845eb4e1
commit 524c85745b
3 changed files with 23 additions and 7 deletions

View File

@@ -66,6 +66,8 @@ struct PaddleBackendOption {
int mkldnn_cache_size = -1;
/// initialize memory size(MB) for GPU
int gpu_mem_init_size = 100;
/// The option to enable fixed size optimization for transformer model
bool enable_fixed_size_opt = false;
/// Disable type of operators run on TensorRT
void DisableTrtOps(const std::vector<std::string>& ops) {

View File

@@ -36,6 +36,8 @@ void BindPaddleOption(pybind11::module& m) {
BindIpuOption(m);
pybind11::class_<PaddleBackendOption>(m, "PaddleBackendOption")
.def(pybind11::init())
.def_readwrite("enable_fixed_size_opt",
&PaddleBackendOption::enable_fixed_size_opt)
.def_readwrite("enable_log_info", &PaddleBackendOption::enable_log_info)
.def_readwrite("enable_mkldnn", &PaddleBackendOption::enable_mkldnn)
.def_readwrite("enable_trt", &PaddleBackendOption::enable_trt)

26
fastdeploy/runtime/backends/paddle/paddle_backend.cc Executable file → Normal file
View File

@@ -58,6 +58,10 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
option.trt_option.max_batch_size, 3,
precision, use_static);
SetTRTDynamicShapeToConfig(option);
if (option_.enable_fixed_size_opt) {
paddle_infer::experimental::InternalUtils::SetTransformerMaskid(
&config_, "opt");
}
}
} else if (option.device == Device::IPU) {
#ifdef WITH_IPU
@@ -99,28 +103,36 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
}
bool PaddleBackend::Init(const RuntimeOption& runtime_option) {
if (!(Supported(runtime_option.model_format, Backend::PDINFER) && Supported(runtime_option.device, Backend::PDINFER))) {
if (!(Supported(runtime_option.model_format, Backend::PDINFER) &&
Supported(runtime_option.device, Backend::PDINFER))) {
return false;
}
auto option = runtime_option;
option.paddle_infer_option.model_file = runtime_option.model_file;
option.paddle_infer_option.params_file = runtime_option.params_file;
option.paddle_infer_option.model_from_memory_ = runtime_option.model_from_memory_;
option.paddle_infer_option.model_from_memory_ =
runtime_option.model_from_memory_;
option.paddle_infer_option.device = runtime_option.device;
option.paddle_infer_option.device_id = runtime_option.device_id;
option.paddle_infer_option.enable_pinned_memory = runtime_option.enable_pinned_memory;
option.paddle_infer_option.enable_pinned_memory =
runtime_option.enable_pinned_memory;
option.paddle_infer_option.external_stream_ = runtime_option.external_stream_;
option.paddle_infer_option.trt_option = runtime_option.trt_option;
option.paddle_infer_option.trt_option.gpu_id = runtime_option.device_id;
if (option.model_from_memory_) {
return InitFromPaddle(option.model_file, option.params_file, option.paddle_infer_option);
return InitFromPaddle(option.model_file, option.params_file,
option.paddle_infer_option);
} else {
std::string model_buffer = "";
std::string params_buffer = "";
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer), "Failed to read model file from %s.", option.model_file.c_str());
FDASSERT(ReadBinaryFromFile(option.params_file, &params_buffer), "Failed to read parameters file from %s.", option.params_file.c_str());
return InitFromPaddle(model_buffer, params_buffer, option.paddle_infer_option);
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
"Failed to read model file from %s.", option.model_file.c_str());
FDASSERT(ReadBinaryFromFile(option.params_file, &params_buffer),
"Failed to read parameters file from %s.",
option.params_file.c_str());
return InitFromPaddle(model_buffer, params_buffer,
option.paddle_infer_option);
}
return false;
}