diff --git a/fastdeploy/runtime/backends/paddle/option.h b/fastdeploy/runtime/backends/paddle/option.h index 652b72401..6802b8da4 100755 --- a/fastdeploy/runtime/backends/paddle/option.h +++ b/fastdeploy/runtime/backends/paddle/option.h @@ -66,6 +66,8 @@ struct PaddleBackendOption { int mkldnn_cache_size = -1; /// initialize memory size(MB) for GPU int gpu_mem_init_size = 100; + /// The option to enable fixed size optimization for transformer model + bool enable_fixed_size_opt = false; /// Disable type of operators run on TensorRT void DisableTrtOps(const std::vector& ops) { diff --git a/fastdeploy/runtime/backends/paddle/option_pybind.cc b/fastdeploy/runtime/backends/paddle/option_pybind.cc index 50b34ca61..60b66e672 100644 --- a/fastdeploy/runtime/backends/paddle/option_pybind.cc +++ b/fastdeploy/runtime/backends/paddle/option_pybind.cc @@ -36,6 +36,8 @@ void BindPaddleOption(pybind11::module& m) { BindIpuOption(m); pybind11::class_(m, "PaddleBackendOption") .def(pybind11::init()) + .def_readwrite("enable_fixed_size_opt", + &PaddleBackendOption::enable_fixed_size_opt) .def_readwrite("enable_log_info", &PaddleBackendOption::enable_log_info) .def_readwrite("enable_mkldnn", &PaddleBackendOption::enable_mkldnn) .def_readwrite("enable_trt", &PaddleBackendOption::enable_trt) diff --git a/fastdeploy/runtime/backends/paddle/paddle_backend.cc b/fastdeploy/runtime/backends/paddle/paddle_backend.cc old mode 100755 new mode 100644 index 1fc45e990..bfe122f97 --- a/fastdeploy/runtime/backends/paddle/paddle_backend.cc +++ b/fastdeploy/runtime/backends/paddle/paddle_backend.cc @@ -58,6 +58,10 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) { option.trt_option.max_batch_size, 3, precision, use_static); SetTRTDynamicShapeToConfig(option); + if (option_.enable_fixed_size_opt) { + paddle_infer::experimental::InternalUtils::SetTransformerMaskid( + &config_, "opt"); + } } } else if (option.device == Device::IPU) { #ifdef WITH_IPU @@ -99,28 +103,36 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) { } bool PaddleBackend::Init(const RuntimeOption& runtime_option) { - if (!(Supported(runtime_option.model_format, Backend::PDINFER) && Supported(runtime_option.device, Backend::PDINFER))) { + if (!(Supported(runtime_option.model_format, Backend::PDINFER) && + Supported(runtime_option.device, Backend::PDINFER))) { return false; } auto option = runtime_option; option.paddle_infer_option.model_file = runtime_option.model_file; option.paddle_infer_option.params_file = runtime_option.params_file; - option.paddle_infer_option.model_from_memory_ = runtime_option.model_from_memory_; + option.paddle_infer_option.model_from_memory_ = + runtime_option.model_from_memory_; option.paddle_infer_option.device = runtime_option.device; option.paddle_infer_option.device_id = runtime_option.device_id; - option.paddle_infer_option.enable_pinned_memory = runtime_option.enable_pinned_memory; + option.paddle_infer_option.enable_pinned_memory = + runtime_option.enable_pinned_memory; option.paddle_infer_option.external_stream_ = runtime_option.external_stream_; option.paddle_infer_option.trt_option = runtime_option.trt_option; option.paddle_infer_option.trt_option.gpu_id = runtime_option.device_id; if (option.model_from_memory_) { - return InitFromPaddle(option.model_file, option.params_file, option.paddle_infer_option); + return InitFromPaddle(option.model_file, option.params_file, + option.paddle_infer_option); } else { std::string model_buffer = ""; std::string params_buffer = ""; - FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer), "Failed to read model file from %s.", option.model_file.c_str()); - FDASSERT(ReadBinaryFromFile(option.params_file, ¶ms_buffer), "Failed to read parameters file from %s.", option.params_file.c_str()); - return InitFromPaddle(model_buffer, params_buffer, option.paddle_infer_option); + FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer), + "Failed to read model file from %s.", option.model_file.c_str()); + FDASSERT(ReadBinaryFromFile(option.params_file, ¶ms_buffer), + "Failed to read parameters file from %s.", + option.params_file.c_str()); + return InitFromPaddle(model_buffer, params_buffer, + option.paddle_infer_option); } return false; }