diff --git a/fastdeploy/backends/paddle/paddle_backend.cc b/fastdeploy/backends/paddle/paddle_backend.cc index 70d8305c5..c1ecacee2 100644 --- a/fastdeploy/backends/paddle/paddle_backend.cc +++ b/fastdeploy/backends/paddle/paddle_backend.cc @@ -36,7 +36,7 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) { FDWARNING << "Detect that tensorrt cache file has been set to " << option.trt_option.serialize_file << ", but while enable paddle2trt, please notice that the cache file will save to the directory where paddle model saved." << std::endl; use_static = true; } - config_.EnableTensorRtEngine(option.trt_option.max_workspace_size, 32, 3, precision, use_static); + config_.EnableTensorRtEngine(option.trt_option.max_workspace_size, option.trt_option.max_batch_size, 3, precision, use_static); SetTRTDynamicShapeToConfig(option); #else FDWARNING << "The FastDeploy is not compiled with TensorRT backend, so will fallback to GPU with Paddle Inference Backend." << std::endl; @@ -112,8 +112,9 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file, FDWARNING << "Detect that tensorrt cache file has been set to " << option.trt_option.serialize_file << ", but while enable paddle2trt, please notice that the cache file will save to the directory where paddle model saved." << std::endl; use_static = true; } - config_.EnableTensorRtEngine(option.trt_option.max_workspace_size, 32, 3, paddle_infer::PrecisionType::kInt8, use_static, false); + config_.EnableTensorRtEngine(option.trt_option.max_workspace_size, option.trt_option.max_batch_size, 3, paddle_infer::PrecisionType::kInt8, use_static, false); SetTRTDynamicShapeToConfig(option); + #endif } } diff --git a/fastdeploy/pybind/runtime.cc b/fastdeploy/pybind/runtime.cc index 11cf9bf4e..759c55530 100644 --- a/fastdeploy/pybind/runtime.cc +++ b/fastdeploy/pybind/runtime.cc @@ -42,6 +42,7 @@ void BindRuntime(pybind11::module& m) { .def("set_lite_power_mode", &RuntimeOption::SetLitePowerMode) .def("set_trt_input_shape", &RuntimeOption::SetTrtInputShape) .def("set_trt_max_workspace_size", &RuntimeOption::SetTrtMaxWorkspaceSize) + .def("set_trt_max_batch_size", &RuntimeOption::SetTrtMaxBatchSize) .def("enable_paddle_to_trt", &RuntimeOption::EnablePaddleToTrt) .def("enable_trt_fp16", &RuntimeOption::EnableTrtFP16) .def("disable_trt_fp16", &RuntimeOption::DisableTrtFP16) diff --git a/fastdeploy/runtime.cc b/fastdeploy/runtime.cc index 94ea9de0b..4dd1bac59 100755 --- a/fastdeploy/runtime.cc +++ b/fastdeploy/runtime.cc @@ -371,6 +371,9 @@ void RuntimeOption::SetTrtInputShape(const std::string& input_name, void RuntimeOption::SetTrtMaxWorkspaceSize(size_t max_workspace_size) { trt_max_workspace_size = max_workspace_size; } +void RuntimeOption::SetTrtMaxBatchSize(size_t max_batch_size){ + trt_max_batch_size = max_batch_size; +} void RuntimeOption::EnableTrtFP16() { trt_enable_fp16 = true; } diff --git a/fastdeploy/runtime.h b/fastdeploy/runtime.h index 7ab6f1fb2..8330f412e 100644 --- a/fastdeploy/runtime.h +++ b/fastdeploy/runtime.h @@ -200,6 +200,9 @@ struct FASTDEPLOY_DECL RuntimeOption { /// Set max_workspace_size for TensorRT, default 1<<30 void SetTrtMaxWorkspaceSize(size_t trt_max_workspace_size); + /// Set max_batch_size for TensorRT, default 32 + void SetTrtMaxBatchSize(size_t max_batch_size); + /** * @brief Enable FP16 inference while using TensorRT backend. Notice: not all the GPU device support FP16, on those device doesn't support FP16, FastDeploy will fallback to FP32 automaticly */ @@ -339,7 +342,7 @@ struct FASTDEPLOY_DECL RuntimeOption { std::string model_file = ""; // Path of model file std::string params_file = ""; // Path of parameters file, can be empty // format of input model - ModelFormat model_format = ModelFormat::AUTOREC; + ModelFormat model_format = ModelFormat::AUTOREC; }; /*! @brief Runtime object used to inference the loaded model on different devices diff --git a/python/fastdeploy/runtime.py b/python/fastdeploy/runtime.py index e8a6058a4..4d0311d4b 100755 --- a/python/fastdeploy/runtime.py +++ b/python/fastdeploy/runtime.py @@ -18,6 +18,7 @@ from . import ModelFormat from . import c_lib_wrap as C from . import rknpu2 + class Runtime: """FastDeploy Runtime object. """ @@ -207,10 +208,12 @@ class RuntimeOption: """ return self._option.use_cpu() - def use_rknpu2(self,rknpu2_name=rknpu2.CpuName.RK3588,rknpu2_core=rknpu2.CoreMask.RKNN_NPU_CORE_0): + def use_rknpu2(self, + rknpu2_name=rknpu2.CpuName.RK3588, + rknpu2_core=rknpu2.CoreMask.RKNN_NPU_CORE_0): """Inference with CPU """ - return self._option.use_rknpu2(rknpu2_name,rknpu2_core) + return self._option.use_rknpu2(rknpu2_name, rknpu2_core) def set_cpu_thread_num(self, thread_num=-1): """Set number of threads if inference with CPU @@ -344,6 +347,11 @@ class RuntimeOption: """ return self._option.set_trt_max_workspace_size(trt_max_workspace_size) + def set_trt_max_batch_size(self, trt_max_batch_size): + """Set max batch size while using TensorRT backend. + """ + return self._option.set_trt_max_batch_size(trt_max_batch_size) + def enable_paddle_trt_collect_shape(self): return self._option.enable_paddle_trt_collect_shape()