mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 00:57:33 +08:00
[Backend] Add SetTrtMaxBatchSize function for TensorRT. (#526)
* Imporve OCR Readme * Improve OCR Readme * Improve OCR Readme * Improve OCR Readme * Improve OCR Readme * Add Initialize function to PP-OCR * Add Initialize function to PP-OCR * Add Initialize function to PP-OCR * Make all the model links come from PaddleOCR * Improve OCR readme * Improve OCR readme * Improve OCR readme * Improve OCR readme * Add Readme for vision results * Add Readme for vision results * Add Readme for vision results * Add Readme for vision results * Add Readme for vision results * Add Readme for vision results * Add Readme for vision results * Add Readme for vision results * Add Readme for vision results * Add Readme for vision results * Add check for label file in postprocess of Rec model * Add check for label file in postprocess of Rec model * Add check for label file in postprocess of Rec model * Add check for label file in postprocess of Rec model * Add check for label file in postprocess of Rec model * Add check for label file in postprocess of Rec model * Add comments to create API docs * Improve OCR comments * Rename OCR and add comments * Make sure previous python example works * Make sure previous python example works * Fix Rec model bug * Fix Rec model bug * Fix rec model bug * Add SetTrtMaxBatchSize function for TensorRT * Add SetTrtMaxBatchSize Pybind * Add set_trt_max_batch_size python function Co-authored-by: Jason <jiangjiajun@baidu.com>
This commit is contained in:
@@ -36,7 +36,7 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
|
||||
FDWARNING << "Detect that tensorrt cache file has been set to " << option.trt_option.serialize_file << ", but while enable paddle2trt, please notice that the cache file will save to the directory where paddle model saved." << std::endl;
|
||||
use_static = true;
|
||||
}
|
||||
config_.EnableTensorRtEngine(option.trt_option.max_workspace_size, 32, 3, precision, use_static);
|
||||
config_.EnableTensorRtEngine(option.trt_option.max_workspace_size, option.trt_option.max_batch_size, 3, precision, use_static);
|
||||
SetTRTDynamicShapeToConfig(option);
|
||||
#else
|
||||
FDWARNING << "The FastDeploy is not compiled with TensorRT backend, so will fallback to GPU with Paddle Inference Backend." << std::endl;
|
||||
@@ -112,8 +112,9 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_file,
|
||||
FDWARNING << "Detect that tensorrt cache file has been set to " << option.trt_option.serialize_file << ", but while enable paddle2trt, please notice that the cache file will save to the directory where paddle model saved." << std::endl;
|
||||
use_static = true;
|
||||
}
|
||||
config_.EnableTensorRtEngine(option.trt_option.max_workspace_size, 32, 3, paddle_infer::PrecisionType::kInt8, use_static, false);
|
||||
config_.EnableTensorRtEngine(option.trt_option.max_workspace_size, option.trt_option.max_batch_size, 3, paddle_infer::PrecisionType::kInt8, use_static, false);
|
||||
SetTRTDynamicShapeToConfig(option);
|
||||
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
@@ -42,6 +42,7 @@ void BindRuntime(pybind11::module& m) {
|
||||
.def("set_lite_power_mode", &RuntimeOption::SetLitePowerMode)
|
||||
.def("set_trt_input_shape", &RuntimeOption::SetTrtInputShape)
|
||||
.def("set_trt_max_workspace_size", &RuntimeOption::SetTrtMaxWorkspaceSize)
|
||||
.def("set_trt_max_batch_size", &RuntimeOption::SetTrtMaxBatchSize)
|
||||
.def("enable_paddle_to_trt", &RuntimeOption::EnablePaddleToTrt)
|
||||
.def("enable_trt_fp16", &RuntimeOption::EnableTrtFP16)
|
||||
.def("disable_trt_fp16", &RuntimeOption::DisableTrtFP16)
|
||||
|
@@ -371,6 +371,9 @@ void RuntimeOption::SetTrtInputShape(const std::string& input_name,
|
||||
void RuntimeOption::SetTrtMaxWorkspaceSize(size_t max_workspace_size) {
|
||||
trt_max_workspace_size = max_workspace_size;
|
||||
}
|
||||
void RuntimeOption::SetTrtMaxBatchSize(size_t max_batch_size){
|
||||
trt_max_batch_size = max_batch_size;
|
||||
}
|
||||
|
||||
void RuntimeOption::EnableTrtFP16() { trt_enable_fp16 = true; }
|
||||
|
||||
|
@@ -200,6 +200,9 @@ struct FASTDEPLOY_DECL RuntimeOption {
|
||||
/// Set max_workspace_size for TensorRT, default 1<<30
|
||||
void SetTrtMaxWorkspaceSize(size_t trt_max_workspace_size);
|
||||
|
||||
/// Set max_batch_size for TensorRT, default 32
|
||||
void SetTrtMaxBatchSize(size_t max_batch_size);
|
||||
|
||||
/**
|
||||
* @brief Enable FP16 inference while using TensorRT backend. Notice: not all the GPU device support FP16, on those device doesn't support FP16, FastDeploy will fallback to FP32 automaticly
|
||||
*/
|
||||
|
@@ -18,6 +18,7 @@ from . import ModelFormat
|
||||
from . import c_lib_wrap as C
|
||||
from . import rknpu2
|
||||
|
||||
|
||||
class Runtime:
|
||||
"""FastDeploy Runtime object.
|
||||
"""
|
||||
@@ -207,10 +208,12 @@ class RuntimeOption:
|
||||
"""
|
||||
return self._option.use_cpu()
|
||||
|
||||
def use_rknpu2(self,rknpu2_name=rknpu2.CpuName.RK3588,rknpu2_core=rknpu2.CoreMask.RKNN_NPU_CORE_0):
|
||||
def use_rknpu2(self,
|
||||
rknpu2_name=rknpu2.CpuName.RK3588,
|
||||
rknpu2_core=rknpu2.CoreMask.RKNN_NPU_CORE_0):
|
||||
"""Inference with CPU
|
||||
"""
|
||||
return self._option.use_rknpu2(rknpu2_name,rknpu2_core)
|
||||
return self._option.use_rknpu2(rknpu2_name, rknpu2_core)
|
||||
|
||||
def set_cpu_thread_num(self, thread_num=-1):
|
||||
"""Set number of threads if inference with CPU
|
||||
@@ -344,6 +347,11 @@ class RuntimeOption:
|
||||
"""
|
||||
return self._option.set_trt_max_workspace_size(trt_max_workspace_size)
|
||||
|
||||
def set_trt_max_batch_size(self, trt_max_batch_size):
|
||||
"""Set max batch size while using TensorRT backend.
|
||||
"""
|
||||
return self._option.set_trt_max_batch_size(trt_max_batch_size)
|
||||
|
||||
def enable_paddle_trt_collect_shape(self):
|
||||
return self._option.enable_paddle_trt_collect_shape()
|
||||
|
||||
|
Reference in New Issue
Block a user