From 8e13a38bf53dada691fda6ef9b3a6a4829a3beef Mon Sep 17 00:00:00 2001 From: WJJ1995 Date: Sun, 11 Dec 2022 14:16:37 +0800 Subject: [PATCH] [Backend] Modify trt_max_batch_size the default value to 1 (#817) * add onnx_ort_runtime demo * rm in requirements * support batch eval * fixed MattingResults bug * move assignment for DetectionResult * integrated x2paddle * add model convert readme * update readme * re-lint * add processor api * Add MattingResult Free * change valid_cpu_backends order * add ppocr benchmark * mv bs from 64 to 32 * fixed quantize.md * fixed quantize bugs * Add Monitor for benchmark * update mem monitor * Set trt_max_batch_size default 1 Co-authored-by: Jason --- benchmark/convert_info.py | 10 ++++------ fastdeploy/runtime.h | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) mode change 100644 => 100755 fastdeploy/runtime.h diff --git a/benchmark/convert_info.py b/benchmark/convert_info.py index 338a0cec5..e5942e1f7 100644 --- a/benchmark/convert_info.py +++ b/benchmark/convert_info.py @@ -70,10 +70,8 @@ for i in range(line_nums): cpu_rss_mb_list = cpu_rss_mb_ori.split(".") cpu_rss_mb = cpu_rss_mb_list[0] + "." + cpu_rss_mb_list[1][:2] if "gpu_rss_mb" in lines[i + 4]: - gpu_rss_mb_ori = lines[i + 4].split(": ")[1] - # two decimal places - gpu_rss_mb_list = gpu_rss_mb_ori.split(".") - gpu_rss_mb = gpu_rss_mb_list[0] + "." + gpu_rss_mb_list[1][:2] + gpu_rss_mb_ori = lines[i + 4].split(": ")[1].strip() + gpu_rss_mb = str(gpu_rss_mb_ori) + ".0" if "ort_cpu_1" in lines[i]: ort_cpu_thread1[ model_name] = runtime + "\t" + end2end + "\t" + cpu_rss_mb @@ -111,7 +109,7 @@ for i in range(line_nums): f2 = open("struct_cpu_" + domain + ".txt", "w") f2.writelines( - "model_name\tthread_nums\tort_run\tort_end2end\tcpu_rss_mb\tov_run\tov_end2end\tcpu_rss_mb\tpaddle_run\tpaddle_end2end\tcpu_rss_mb\n" + "model_name\tthread_nums\tort_run\tort_end2end\tcpu_mem\tov_run\tov_end2end\tcpu_mem\tpaddle_run\tpaddle_end2end\tcpu_mem\n" ) for model_name in model_name_set: lines1 = model_name + '\t1\t' @@ -148,7 +146,7 @@ f2.close() f3 = open("struct_gpu_" + domain + ".txt", "w") f3.writelines( - "model_name\tort_run\tort_end2end\tgpu_rss_mb\tpaddle_run\tpaddle_end2end\tgpu_rss_mb\tpaddle_trt_run\tpaddle_trt_end2end\tgpu_rss_mb\tpaddle_trt_fp16_run\tpaddle_trt_fp16_end2end\tgpu_rss_mb\ttrt_run\ttrt_end2end\tgpu_rss_mb\ttrt_fp16_run\ttrt_fp16_end2end\tgpu_rss_mb\n" + "model_name\tort_run\tort_end2end\tgpu_mem\tpaddle_run\tpaddle_end2end\tgpu_mem\tpaddle_trt_run\tpaddle_trt_end2end\tgpu_mem\tpaddle_trt_fp16_run\tpaddle_trt_fp16_end2end\tgpu_mem\ttrt_run\ttrt_end2end\tgpu_mem\ttrt_fp16_run\ttrt_fp16_end2end\tgpu_mem\n" ) for model_name in model_name_set: lines1 = model_name + '\t' diff --git a/fastdeploy/runtime.h b/fastdeploy/runtime.h old mode 100644 new mode 100755 index e53c7ca1e..9c22c929f --- a/fastdeploy/runtime.h +++ b/fastdeploy/runtime.h @@ -362,7 +362,7 @@ struct FASTDEPLOY_DECL RuntimeOption { std::string trt_serialize_file = ""; bool trt_enable_fp16 = false; bool trt_enable_int8 = false; - size_t trt_max_batch_size = 32; + size_t trt_max_batch_size = 1; size_t trt_max_workspace_size = 1 << 30; // ======Only for PaddleTrt Backend======= std::vector trt_disabled_ops_{};