[Benchmark] Add run_benchmark_cpu.sh (#1465)

* add GPL lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * support yolov8 * add pybind for yolov8 * add yolov8 readme * add cpp benchmark * add cpu and gpu mem * public part split * add runtime mode * fixed bugs * add cpu_thread_nums * deal with comments * deal with comments * deal with comments * rm useless code * add FASTDEPLOY_DECL * add FASTDEPLOY_DECL * fixed for windows * mv rss to pss * mv rss to pss * Update utils.cc * use thread to collect mem * Add ResourceUsageMonitor * rm useless code * fixed bug * fixed typo * update ResourceUsageMonitor * fixed bug * fixed bug * add note for ResourceUsageMonitor * deal with comments * add macros * deal with comments * deal with comments * deal with comments * re-lint * rm pmap and use mem api * rm pmap and use mem api * add mem api * Add PrintBenchmarkInfo func * Add PrintBenchmarkInfo func * Add PrintBenchmarkInfo func * deal with comments * fixed enable_paddle_to_trt * add log for paddle_trt * support ppcls benchmark * use new trt option api * update benchmark info * simplify benchmark.cc * simplify benchmark.cc * deal with comments * Add ppseg && ppocr benchmark * add OCR rec img * add ocr benchmark * fixed trt shape * add trt shape * resolve conflict * add ENABLE_BENCHMARK define * Add ClassifyDiff * Add Resize for ClassifyResult * deal with comments * add convert info script * resolve conflict * Add SaveBenchmarkResult func * fixed bug * fixed bug * fixed bug * add config.txt for option * fixed bug * fixed bug * fixed bug * add benchmark.sh * mv thread_nums from 8 to 1 * deal with comments * deal with comments * fixed readme * deal with comments --------- Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
2025-10-17 22:21:48 +08:00 · 2023-03-02 19:11:21 +08:00
parent a157da17a4
commit 99b1bc5d6e
17 changed files with 432 additions and 353 deletions
--- a/benchmark/cpp/CMakeLists.txt
+++ b/benchmark/cpp/CMakeLists.txt
@@ -11,7 +11,6 @@ include_directories(${FASTDEPLOY_INCS})
 add_executable(benchmark_yolov5 ${PROJECT_SOURCE_DIR}/benchmark_yolov5.cc)
 add_executable(benchmark_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_ppyolov8.cc)
 add_executable(benchmark_ppcls ${PROJECT_SOURCE_DIR}/benchmark_ppcls.cc)
-add_executable(benchmark_precision_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_precision_ppyolov8.cc)
 add_executable(benchmark_ppseg ${PROJECT_SOURCE_DIR}/benchmark_ppseg.cc)
 add_executable(benchmark_ppocr_det ${PROJECT_SOURCE_DIR}/benchmark_ppocr_det.cc)
 add_executable(benchmark_ppocr_cls ${PROJECT_SOURCE_DIR}/benchmark_ppocr_cls.cc)
@@ -21,7 +20,6 @@ if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
  target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags pthread)
  target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags pthread)
  target_link_libraries(benchmark_ppcls ${FASTDEPLOY_LIBS} gflags pthread)
-  target_link_libraries(benchmark_precision_ppyolov8 ${FASTDEPLOY_LIBS} gflags pthread)
  target_link_libraries(benchmark_ppseg ${FASTDEPLOY_LIBS} gflags pthread)
  target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags pthread)
  target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags pthread)
@@ -30,7 +28,6 @@ else()
  target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags)
  target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags)
  target_link_libraries(benchmark_ppcls ${FASTDEPLOY_LIBS} gflags)
-  target_link_libraries(benchmark_precision_ppyolov8 ${FASTDEPLOY_LIBS} gflags)
  target_link_libraries(benchmark_ppseg ${FASTDEPLOY_LIBS} gflags)
  target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags)
  target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags)
--- a/benchmark/cpp/README.md
+++ b/benchmark/cpp/README.md
@@ -11,26 +11,37 @@

 运行FastDeploy C++ Benchmark，需先准备好相应的环境，并在ENABLE_BENCHMARK=ON模式下从源码编译FastDeploy C++ SDK. 以下将按照硬件维度，来说明相应的系统环境要求。不同环境下的详细要求，请参考[FastDeploy环境要求](../../docs/cn/build_and_install)  

-## 2. Benchmark 参数设置说明  
+## 2. Benchmark 设置说明  

-<div id="参数设置说明"></div>  
+具体flags.h提供选项如下:

+<div id="选项设置说明"></div>  

-| 参数                 | 作用                                        |
+| 选项                 | 作用                                        |
 | -------------------- | ------------------------------------------ |
 | --model              | 模型路径                                     |
 | --image              | 图片路径    |
-| --device             | 选择 CPU/GPU/XPU，默认为 CPU  |
-| --cpu_thread_nums     | CPU 线程数，默认为 8      |
-| --device_id          | GPU/XPU 卡号，默认为 0 |
-| --warmup           | 跑benchmark的warmup次数，默认为 200 |
-| --repeat           | 跑benchmark的循环次数，默认为 1000 |  
-| --profile_mode      | 指定需要测试性能的模式，可选值为`[runtime, end2end]`，默认为 runtime |  
-| --include_h2d_d2h   | 是否把H2D+D2H的耗时统计在内，该参数只在profile_mode为runtime时有效，默认为 false |  
-| --backend            | 指定后端类型，有default, ort, ov, trt, paddle, paddle_trt, lite 等，为default时，会自动选择最优后端，推荐设置为显式设置明确的backend。默认为 default   |
-| --use_fp16    | 是否开启fp16，当前只对 trt, paddle-trt, lite后端有效，默认为 false |
-| --collect_memory_info    | 是否记录 cpu/gpu memory信息，默认 false  |
-| --sampling_interval    | 记录 cpu/gpu memory信息采样时间间隔，单位ms，默认为 50  |  
+| --config_path        | config.txt路径，包含具体设备、后端等信息  |
+
+具体config.txt包含信息含义如下:
+
+<div id="参数设置说明"></div>  
+
+| 参数                 | 作用                                        |
+| -------------------- | ------------------------------------------ |
+| device             | 选择 CPU/GPU/XPU，默认为 CPU  |
+| device_id          | GPU/XPU 卡号，默认为 0 |
+| cpu_thread_nums     | CPU 线程数，默认为 1      |
+| warmup           | 跑benchmark的warmup次数，默认为 200 |
+| repeat           | 跑benchmark的循环次数，默认为 1000 |
+| backend            | 指定后端类型，有default, ort, ov, trt, paddle, paddle_trt, lite 等，为default时，会自动选择最优后端，推荐设置为显式设置明确的backend。默认为 default   |
+| profile_mode      | 指定需要测试性能的模式，可选值为`[runtime, end2end]`，默认为 runtime |
+| include_h2d_d2h   | 是否把H2D+D2H的耗时统计在内，该参数只在profile_mode为runtime时有效，默认为 false |  
+| use_fp16    | 是否开启fp16，当前只对 trt, paddle-trt, lite后端有效，默认为 false |
+| collect_memory_info    | 是否记录 cpu/gpu memory信息，默认 false  |
+| sampling_interval    | 记录 cpu/gpu memory信息采样时间间隔，单位ms，默认为 50  |
+| precision_compare    | 是否进行精度比较，默认为 false  |  
+| result_path    | 记录 Benchmark 数据的 txt 文件路径  |  

 ## 3. X86_64 CPU 和 NVIDIA GPU 环境下运行 Benchmark

@@ -93,41 +104,11 @@ tar -zxvf yolov8_s_500e_coco.tgz

 ```bash  

-# 统计性能  
-# CPU
-# Paddle Inference
-./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device cpu --cpu_thread_nums 8 --backend paddle --profile_mode runtime
-
-# ONNX Runtime
-./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device cpu --cpu_thread_nums 8 --backend ort --profile_mode runtime
-
-# OpenVINO
-./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device cpu --cpu_thread_nums 8 --backend ov --profile_mode runtime
-
-# GPU
-# Paddle Inference
-./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device gpu --device_id 0 --backend paddle --profile_mode runtime --warmup 200 --repeat 2000
-
-# Paddle Inference + TensorRT
-./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device gpu --device_id 0 --backend paddle_trt --profile_mode runtime --warmup 200 --repeat 2000
-
-# Paddle Inference + TensorRT + FP16
-./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device gpu --device_id 0 --backend paddle --profile_mode runtime --warmup 200 --repeat 2000 --use_fp16
-
-# ONNX Runtime
-./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device gpu --device_id 0 --backend ort --profile_mode runtime --warmup 200 --repeat 2000
-
-# TensorRT
-./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device gpu --device_id 0 --backend paddle --profile_mode runtime --warmup 200 --repeat 2000
-
-# TensorRT + FP16
-./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device gpu --device_id 0 --backend trt --profile_mode runtime --warmup 200 --repeat 2000 --use_fp16
-
-# 统计内存显存占用  
-# 增加--collect_memory_info选项
-./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device cpu --cpu_thread_nums 8 --backend paddle --profile_mode runtime --collect_memory_info
+# 统计性能，用户根据需求修改config.txt文件，具体含义参考上表
+# eg：如果想测paddle gpu backend，将device改为gpu，backend修改为paddle即可
+./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --config_path config.txt
 ```
-注意，为避免对性能统计产生影响，测试性能时，最好不要开启内存显存统计的功能，当指定--collect_memory_info参数时，只有内存显存参数是稳定可靠的。更多参数设置，请参考[参数设置说明](#参数设置说明)
+注意，为避免对性能统计产生影响，测试性能时，最好不要开启内存显存统计的功能，当把collect_memory_info参数设置为true时，只有内存显存参数是稳定可靠的。更多参数设置，请参考[参数设置说明](#参数设置说明)


 ## 4. ARM CPU 环境下运行 Benchmark
--- a/benchmark/cpp/benchmark.sh
+++ b/benchmark/cpp/benchmark.sh
@@ -0,0 +1,9 @@
+# Run all models specify hardware and specify backend
+./benchmark_ppseg --model PP_HumanSegV2_Lite_192x192_with_argmax_infer --image portrait_heng.jpg --config_path config.txt
+./benchmark_ppseg --model PP_HumanSegV2_Mobile_192x192_with_argmax_infer --image portrait_heng.jpg --config_path config.txt
+./benchmark_ppcls --model MobileNetV2_ssld_infer --image ILSVRC2012_val_00000010.jpeg --config_path config.txt
+./benchmark_ppocr_det --model ch_PP-OCRv3_det_infer --image 12.jpg --config_path config.txt
+./benchmark_ppocr_cls --model ch_ppocr_mobile_v2.0_cls_infer --image rec_img.jpg --config_path config.txt
+./benchmark_ppocr_rec --model ch_PP-OCRv3_rec_infer --image rec_img.jpg --rec_label_file ppocr_keys_v1.txt --config_path config.txt
+./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --config_path config.txt
+./benchmark_yolov5 --model yolov5s.onnx --image 000000014439.jpg --config_path config.txt
--- a/benchmark/cpp/benchmark_ppcls.cc
+++ b/benchmark/cpp/benchmark_ppcls.cc
@@ -27,8 +27,11 @@ int main(int argc, char* argv[]) {
    return -1;
  }
  auto im = cv::imread(FLAGS_image);
+  std::unordered_map<std::string, std::string> config_info;
+  benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path,
+                                                &config_info);
  // Set max_batch_size 1 for best performance
-  if (FLAGS_backend == "paddle_trt") {
+  if (config_info["backend"] == "paddle_trt") {
    option.trt_option.max_batch_size = 1;
  }
  auto model_file = FLAGS_model + sep + "inference.pdmodel";
@@ -37,6 +40,7 @@ int main(int argc, char* argv[]) {
  auto model_ppcls = vision::classification::PaddleClasModel(
      model_file, params_file, config_file, option);
  vision::ClassifyResult res;
+  if (config_info["precision_compare"] == "true") {
    // Run once at least
    model_ppcls.Predict(im, &res);
    // 1. Test result diff
@@ -56,6 +60,7 @@ int main(int argc, char* argv[]) {
    std::cout << "Scores diff: mean=" << cls_diff.scores.mean
              << ", max=" << cls_diff.scores.max
              << ", min=" << cls_diff.scores.min << std::endl;
+  }
  BENCHMARK_MODEL(model_ppcls, model_ppcls.Predict(im, &res))
 #endif
  return 0;
--- a/benchmark/cpp/benchmark_ppocr_cls.cc
+++ b/benchmark/cpp/benchmark_ppocr_cls.cc
@@ -16,6 +16,13 @@
 #include "macros.h"
 #include "option.h"

+namespace vision = fastdeploy::vision;
+namespace benchmark = fastdeploy::benchmark;
+
+DEFINE_string(trt_shape, "1,3,48,10:4,3,48,320:8,3,48,1024",
+              "Set min/opt/max shape for trt/paddle_trt backend."
+              "eg:--trt_shape 1,3,48,10:4,3,48,320:8,3,48,1024");
+
 int main(int argc, char* argv[]) {
 #if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
  // Initialization
@@ -24,20 +31,27 @@ int main(int argc, char* argv[]) {
    return -1;
  }
  auto im = cv::imread(FLAGS_image);
+  std::unordered_map<std::string, std::string> config_info;
+  benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path,
+                                                &config_info);
  // Classification Model
  auto cls_model_file = FLAGS_model + sep + "inference.pdmodel";
  auto cls_params_file = FLAGS_model + sep + "inference.pdiparams";
-  if (FLAGS_backend == "paddle_trt") {
+  if (config_info["backend"] == "paddle_trt") {
    option.paddle_infer_option.collect_trt_shape = true;
  }
-  if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
-    option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320},
-                               {8, 3, 48, 1024});
+  if (config_info["backend"] == "paddle_trt" ||
+      config_info["backend"] == "trt") {
+    std::vector<std::vector<int32_t>> trt_shapes =
+        benchmark::ResultManager::GetInputShapes(FLAGS_trt_shape);
+    option.trt_option.SetShape("x", trt_shapes[0], trt_shapes[1],
+                               trt_shapes[2]);
  }
-  auto model_ppocr_cls = fastdeploy::vision::ocr::Classifier(
-      cls_model_file, cls_params_file, option);
+  auto model_ppocr_cls =
+      vision::ocr::Classifier(cls_model_file, cls_params_file, option);
  int32_t res_label;
  float res_score;
+  if (config_info["precision_compare"] == "true") {
    // Run once at least
    model_ppocr_cls.Predict(im, &res_label, &res_score);
    // 1. Test result diff
@@ -50,6 +64,7 @@ int main(int argc, char* argv[]) {
    std::cout << "PPOCR Cls label diff: " << ppocr_cls_label_diff << std::endl;
    std::cout << "PPOCR Cls score diff: " << abs(ppocr_cls_score_diff)
              << std::endl;
+  }
  BENCHMARK_MODEL(model_ppocr_cls,
                  model_ppocr_cls.Predict(im, &res_label, &res_score));
 #endif
--- a/benchmark/cpp/benchmark_ppocr_det.cc
+++ b/benchmark/cpp/benchmark_ppocr_det.cc
@@ -19,6 +19,10 @@
 namespace vision = fastdeploy::vision;
 namespace benchmark = fastdeploy::benchmark;

+DEFINE_string(trt_shape, "1,3,64,64:1,3,640,640:1,3,960,960",
+              "Set min/opt/max shape for trt/paddle_trt backend."
+              "eg:--trt_shape 1,3,64,64:1,3,640,640:1,3,960,960");
+
 int main(int argc, char* argv[]) {
 #if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
  // Initialization
@@ -27,19 +31,26 @@ int main(int argc, char* argv[]) {
    return -1;
  }
  auto im = cv::imread(FLAGS_image);
+  std::unordered_map<std::string, std::string> config_info;
+  benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path,
+                                                &config_info);
  // Detection Model
  auto det_model_file = FLAGS_model + sep + "inference.pdmodel";
  auto det_params_file = FLAGS_model + sep + "inference.pdiparams";
-  if (FLAGS_backend == "paddle_trt") {
+  if (config_info["backend"] == "paddle_trt") {
    option.paddle_infer_option.collect_trt_shape = true;
  }
-  if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
-    option.trt_option.SetShape("x", {1, 3, 64, 64}, {1, 3, 640, 640},
-                               {1, 3, 960, 960});
+  if (config_info["backend"] == "paddle_trt" ||
+      config_info["backend"] == "trt") {
+    std::vector<std::vector<int32_t>> trt_shapes =
+        benchmark::ResultManager::GetInputShapes(FLAGS_trt_shape);
+    option.trt_option.SetShape("x", trt_shapes[0], trt_shapes[1],
+                               trt_shapes[2]);
  }
  auto model_ppocr_det =
      vision::ocr::DBDetector(det_model_file, det_params_file, option);
  std::vector<std::array<int, 8>> res;
+  if (config_info["precision_compare"] == "true") {
    // Run once at least
    model_ppocr_det.Predict(im, &res);
    // 1. Test result diff
@@ -57,6 +68,7 @@ int main(int argc, char* argv[]) {
    std::cout << "PPOCR Boxes diff: mean=" << ppocr_det_diff.boxes.mean
              << ", max=" << ppocr_det_diff.boxes.max
              << ", min=" << ppocr_det_diff.boxes.min << std::endl;
+  }
  BENCHMARK_MODEL(model_ppocr_det, model_ppocr_det.Predict(im, &res));
 #endif
  return 0;
--- a/benchmark/cpp/benchmark_ppocr_rec.cc
+++ b/benchmark/cpp/benchmark_ppocr_rec.cc
@@ -16,7 +16,13 @@
 #include "macros.h"
 #include "option.h"

+namespace vision = fastdeploy::vision;
+namespace benchmark = fastdeploy::benchmark;
+
 DEFINE_string(rec_label_file, "", "Path of Recognization label file of PPOCR.");
+DEFINE_string(trt_shape, "1,3,48,10:4,3,48,320:8,3,48,2304",
+              "Set min/opt/max shape for trt/paddle_trt backend."
+              "eg:--trt_shape 1,3,48,10:4,3,48,320:8,3,48,2304");

 int main(int argc, char* argv[]) {
 #if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
@@ -26,20 +32,27 @@ int main(int argc, char* argv[]) {
    return -1;
  }
  auto im = cv::imread(FLAGS_image);
+  std::unordered_map<std::string, std::string> config_info;
+  benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path,
+                                                &config_info);
  // Recognition Model
  auto rec_model_file = FLAGS_model + sep + "inference.pdmodel";
  auto rec_params_file = FLAGS_model + sep + "inference.pdiparams";
-  if (FLAGS_backend == "paddle_trt") {
+  if (config_info["backend"] == "paddle_trt") {
    option.paddle_infer_option.collect_trt_shape = true;
  }
-  if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
-    option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320},
-                               {8, 3, 48, 2304});
+  if (config_info["backend"] == "paddle_trt" ||
+      config_info["backend"] == "trt") {
+    std::vector<std::vector<int32_t>> trt_shapes =
+        benchmark::ResultManager::GetInputShapes(FLAGS_trt_shape);
+    option.trt_option.SetShape("x", trt_shapes[0], trt_shapes[1],
+                               trt_shapes[2]);
  }
-  auto model_ppocr_rec = fastdeploy::vision::ocr::Recognizer(
+  auto model_ppocr_rec = vision::ocr::Recognizer(
      rec_model_file, rec_params_file, FLAGS_rec_label_file, option);
  std::string text;
  float rec_score;
+  if (config_info["precision_compare"] == "true") {
    // Run once at least
    model_ppocr_rec.Predict(im, &text, &rec_score);
    // 1. Test result diff
@@ -52,6 +65,7 @@ int main(int argc, char* argv[]) {
    std::cout << "PPOCR Rec text diff: " << ppocr_rec_text_diff << std::endl;
    std::cout << "PPOCR Rec score diff: " << abs(ppocr_rec_score_diff)
              << std::endl;
+  }
  BENCHMARK_MODEL(model_ppocr_rec,
                  model_ppocr_rec.Predict(im, &text, &rec_score));
 #endif
--- a/benchmark/cpp/benchmark_ppseg.cc
+++ b/benchmark/cpp/benchmark_ppseg.cc
@@ -19,6 +19,10 @@
 namespace vision = fastdeploy::vision;
 namespace benchmark = fastdeploy::benchmark;

+DEFINE_string(trt_shape, "1,3,192,192:1,3,192,192:1,3,192,192",
+              "Set min/opt/max shape for trt/paddle_trt backend."
+              "eg:--trt_shape 1,3,192,192:1,3,192,192:1,3,192,192");
+
 int main(int argc, char* argv[]) {
 #if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
  // Initialization
@@ -27,19 +31,26 @@ int main(int argc, char* argv[]) {
    return -1;
  }
  auto im = cv::imread(FLAGS_image);
+  std::unordered_map<std::string, std::string> config_info;
+  benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path,
+                                                &config_info);
  auto model_file = FLAGS_model + sep + "model.pdmodel";
  auto params_file = FLAGS_model + sep + "model.pdiparams";
  auto config_file = FLAGS_model + sep + "deploy.yaml";
-  if (FLAGS_backend == "paddle_trt") {
+  if (config_info["backend"] == "paddle_trt") {
    option.paddle_infer_option.collect_trt_shape = true;
  }
-  if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
-    option.trt_option.SetShape("x", {1, 3, 192, 192}, {1, 3, 192, 192},
-                               {1, 3, 192, 192});
+  if (config_info["backend"] == "paddle_trt" ||
+      config_info["backend"] == "trt") {
+    std::vector<std::vector<int32_t>> trt_shapes =
+        benchmark::ResultManager::GetInputShapes(FLAGS_trt_shape);
+    option.trt_option.SetShape("x", trt_shapes[0], trt_shapes[1],
+                               trt_shapes[2]);
  }
  auto model_ppseg = vision::segmentation::PaddleSegModel(
      model_file, params_file, config_file, option);
  vision::SegmentationResult res;
+  if (config_info["precision_compare"] == "true") {
    // Run once at least
    model_ppseg.Predict(im, &res);
    // 1. Test result diff
@@ -62,6 +73,7 @@ int main(int argc, char* argv[]) {
                << ", max=" << seg_diff.scores.max
                << ", min=" << seg_diff.scores.min << std::endl;
    }
+  }
  BENCHMARK_MODEL(model_ppseg, model_ppseg.Predict(im, &res))
  auto vis_im = vision::VisSegmentation(im, res, 0.5);
  cv::imwrite("vis_result.jpg", vis_im);
--- a/benchmark/cpp/benchmark_ppyolov8.cc
+++ b/benchmark/cpp/benchmark_ppyolov8.cc
@@ -16,6 +16,11 @@
 #include "macros.h"
 #include "option.h"

+namespace vision = fastdeploy::vision;
+namespace benchmark = fastdeploy::benchmark;
+
+DEFINE_bool(no_nms, false, "Whether the model contains nms.");
+
 int main(int argc, char* argv[]) {
 #if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
  // Initialization
@@ -24,16 +29,73 @@ int main(int argc, char* argv[]) {
    return -1;
  }
  auto im = cv::imread(FLAGS_image);
+  std::unordered_map<std::string, std::string> config_info;
+  benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path,
+                                                &config_info);
  auto model_file = FLAGS_model + sep + "model.pdmodel";
  auto params_file = FLAGS_model + sep + "model.pdiparams";
  auto config_file = FLAGS_model + sep + "infer_cfg.yml";
-  auto model_ppyolov8 = fastdeploy::vision::detection::PaddleYOLOv8(
-      model_file, params_file, config_file, option);
-  fastdeploy::vision::DetectionResult res;
+  auto model_ppyolov8 = vision::detection::PaddleYOLOv8(model_file, params_file,
+                                                        config_file, option);
+  vision::DetectionResult res;
+  if (config_info["precision_compare"] == "true") {
+    // Run once at least
+    model_ppyolov8.Predict(im, &res);
+    // 1. Test result diff
+    std::cout << "=============== Test result diff =================\n";
+    // Save result to -> disk.
+    std::string det_result_path = "ppyolov8_result.txt";
+    benchmark::ResultManager::SaveDetectionResult(res, det_result_path);
+    // Load result from <- disk.
+    vision::DetectionResult res_loaded;
+    benchmark::ResultManager::LoadDetectionResult(&res_loaded, det_result_path);
+    // Calculate diff between two results.
+    auto det_diff =
+        benchmark::ResultManager::CalculateDiffStatis(res, res_loaded);
+    std::cout << "Boxes diff: mean=" << det_diff.boxes.mean
+              << ", max=" << det_diff.boxes.max
+              << ", min=" << det_diff.boxes.min << std::endl;
+    std::cout << "Label_ids diff: mean=" << det_diff.labels.mean
+              << ", max=" << det_diff.labels.max
+              << ", min=" << det_diff.labels.min << std::endl;
+    // 2. Test tensor diff
+    std::cout << "=============== Test tensor diff =================\n";
+    std::vector<vision::DetectionResult> batch_res;
+    std::vector<fastdeploy::FDTensor> input_tensors, output_tensors;
+    std::vector<cv::Mat> imgs;
+    imgs.push_back(im);
+    std::vector<vision::FDMat> fd_images = vision::WrapMat(imgs);
+
+    model_ppyolov8.GetPreprocessor().Run(&fd_images, &input_tensors);
+    input_tensors[0].name = "image";
+    input_tensors[1].name = "scale_factor";
+    input_tensors[2].name = "im_shape";
+    input_tensors.pop_back();
+    model_ppyolov8.Infer(input_tensors, &output_tensors);
+    model_ppyolov8.GetPostprocessor().Run(output_tensors, &batch_res);
+    // Save tensor to -> disk.
+    auto& tensor_dump = output_tensors[0];
+    std::string det_tensor_path = "ppyolov8_tensor.txt";
+    benchmark::ResultManager::SaveFDTensor(tensor_dump, det_tensor_path);
+    // Load tensor from <- disk.
+    fastdeploy::FDTensor tensor_loaded;
+    benchmark::ResultManager::LoadFDTensor(&tensor_loaded, det_tensor_path);
+    // Calculate diff between two tensors.
+    auto det_tensor_diff = benchmark::ResultManager::CalculateDiffStatis(
+        tensor_dump, tensor_loaded);
+    std::cout << "Tensor diff: mean=" << det_tensor_diff.data.mean
+              << ", max=" << det_tensor_diff.data.max
+              << ", min=" << det_tensor_diff.data.min << std::endl;
+  }
+  // Run profiling
+  if (FLAGS_no_nms) {
+    model_ppyolov8.GetPostprocessor().ApplyDecodeAndNMS();
+  }
  BENCHMARK_MODEL(model_ppyolov8, model_ppyolov8.Predict(im, &res))
-  auto vis_im = fastdeploy::vision::VisDetection(im, res);
+  auto vis_im = vision::VisDetection(im, res);
  cv::imwrite("vis_result.jpg", vis_im);
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 #endif
+
  return 0;
 }
--- a/benchmark/cpp/benchmark_precision_ppyolov8.cc
+++ b/benchmark/cpp/benchmark_precision_ppyolov8.cc
@@ -1,91 +0,0 @@
-// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "flags.h"
-#include "macros.h"
-#include "option.h"
-
-namespace vision = fastdeploy::vision;
-namespace benchmark = fastdeploy::benchmark;
-
-int main(int argc, char* argv[]) {
-#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
-  // Initialization
-  auto option = fastdeploy::RuntimeOption();
-  if (!CreateRuntimeOption(&option, argc, argv, true)) {
-    return -1;
-  }
-  auto im = cv::imread(FLAGS_image);
-  auto model_file = FLAGS_model + sep + "model.pdmodel";
-  auto params_file = FLAGS_model + sep + "model.pdiparams";
-  auto config_file = FLAGS_model + sep + "infer_cfg.yml";
-  auto model_ppyolov8 = vision::detection::PaddleYOLOv8(model_file, params_file,
-                                                        config_file, option);
-  vision::DetectionResult res;
-  // Run once at least
-  model_ppyolov8.Predict(im, &res);
-  // 1. Test result diff
-  std::cout << "=============== Test result diff =================\n";
-  // Save result to -> disk.
-  std::string det_result_path = "ppyolov8_result.txt";
-  benchmark::ResultManager::SaveDetectionResult(res, det_result_path);
-  // Load result from <- disk.
-  vision::DetectionResult res_loaded;
-  benchmark::ResultManager::LoadDetectionResult(&res_loaded, det_result_path);
-  // Calculate diff between two results.
-  auto det_diff =
-      benchmark::ResultManager::CalculateDiffStatis(res, res_loaded);
-  std::cout << "Boxes diff: mean=" << det_diff.boxes.mean
-            << ", max=" << det_diff.boxes.max << ", min=" << det_diff.boxes.min
-            << std::endl;
-  std::cout << "Label_ids diff: mean=" << det_diff.labels.mean
-            << ", max=" << det_diff.labels.max
-            << ", min=" << det_diff.labels.min << std::endl;
-  // 2. Test tensor diff
-  std::cout << "=============== Test tensor diff =================\n";
-  std::vector<vision::DetectionResult> batch_res;
-  std::vector<fastdeploy::FDTensor> input_tensors, output_tensors;
-  std::vector<cv::Mat> imgs;
-  imgs.push_back(im);
-  std::vector<vision::FDMat> fd_images = vision::WrapMat(imgs);
-
-  model_ppyolov8.GetPreprocessor().Run(&fd_images, &input_tensors);
-  input_tensors[0].name = "image";
-  input_tensors[1].name = "scale_factor";
-  input_tensors[2].name = "im_shape";
-  input_tensors.pop_back();
-  model_ppyolov8.Infer(input_tensors, &output_tensors);
-  model_ppyolov8.GetPostprocessor().Run(output_tensors, &batch_res);
-  // Save tensor to -> disk.
-  auto& tensor_dump = output_tensors[0];
-  std::string det_tensor_path = "ppyolov8_tensor.txt";
-  benchmark::ResultManager::SaveFDTensor(tensor_dump, det_tensor_path);
-  // Load tensor from <- disk.
-  fastdeploy::FDTensor tensor_loaded;
-  benchmark::ResultManager::LoadFDTensor(&tensor_loaded, det_tensor_path);
-  // Calculate diff between two tensors.
-  auto det_tensor_diff =
-      benchmark::ResultManager::CalculateDiffStatis(tensor_dump, tensor_loaded);
-  std::cout << "Tensor diff: mean=" << det_tensor_diff.data.mean
-            << ", max=" << det_tensor_diff.data.max
-            << ", min=" << det_tensor_diff.data.min << std::endl;
-  // 3. Run profiling
-  BENCHMARK_MODEL(model_ppyolov8, model_ppyolov8.Predict(im, &res))
-  auto vis_im = vision::VisDetection(im, res);
-  cv::imwrite("vis_result.jpg", vis_im);
-  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
-#endif
-
-  return 0;
-}
--- a/benchmark/cpp/benchmark_yolov5.cc
+++ b/benchmark/cpp/benchmark_yolov5.cc
@@ -27,8 +27,12 @@ int main(int argc, char* argv[]) {
    return -1;
  }
  auto im = cv::imread(FLAGS_image);
+  std::unordered_map<std::string, std::string> config_info;
+  benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path,
+                                                &config_info);
  auto model_yolov5 = vision::detection::YOLOv5(FLAGS_model, "", option);
  vision::DetectionResult res;
+  if (config_info["precision_compare"] == "true") {
    // Run once at least
    model_yolov5.Predict(im, &res);
    // 1. Test result diff
@@ -43,11 +47,12 @@ int main(int argc, char* argv[]) {
    auto det_diff =
        benchmark::ResultManager::CalculateDiffStatis(res, res_loaded);
    std::cout << "Boxes diff: mean=" << det_diff.boxes.mean
-            << ", max=" << det_diff.boxes.max << ", min=" << det_diff.boxes.min
-            << std::endl;
+              << ", max=" << det_diff.boxes.max
+              << ", min=" << det_diff.boxes.min << std::endl;
    std::cout << "Label_ids diff: mean=" << det_diff.labels.mean
              << ", max=" << det_diff.labels.max
              << ", min=" << det_diff.labels.min << std::endl;
+  }
  BENCHMARK_MODEL(model_yolov5, model_yolov5.Predict(im, &res))
  auto vis_im = vision::VisDetection(im, res);
  cv::imwrite("vis_result.jpg", vis_im);
--- a/benchmark/cpp/config.txt
+++ b/benchmark/cpp/config.txt
@@ -0,0 +1,13 @@
+device: cpu
+device_id: 0
+cpu_thread_nums: 1
+warmup: 200
+repeat: 1000
+backend: default
+profile_mode: runtime
+include_h2d_d2h: true
+use_fp16: false
+collect_memory_info: false
+sampling_interval: 1
+precision_compare: false
+result_path: benchmark_cpu.txt
--- a/benchmark/cpp/flags.h
+++ b/benchmark/cpp/flags.h
@@ -14,6 +14,7 @@

 #pragma once

+#include <unordered_map>
 #include "gflags/gflags.h"
 #include "fastdeploy/benchmark/utils.h"

@@ -25,39 +26,19 @@ static const char sep = '/';

 DEFINE_string(model, "", "Directory of the inference model.");
 DEFINE_string(image, "", "Path of the image file.");
-DEFINE_string(device, "cpu",
-              "Type of inference device, support 'cpu/gpu/xpu'.");
-DEFINE_int32(device_id, 0, "device(gpu/xpu/...) id.");
-DEFINE_int32(warmup, 200, "Number of warmup for profiling.");
-DEFINE_int32(repeat, 1000, "Number of repeats for profiling.");
-DEFINE_string(profile_mode, "runtime", "runtime or end2end.");
-DEFINE_string(backend, "default",
-              "The inference runtime backend, support: ['default', 'ort', "
-              "'paddle', 'ov', 'trt', 'paddle_trt', 'lite']");
-DEFINE_int32(cpu_thread_nums, 8, "Set numbers of cpu thread.");
-DEFINE_bool(
-    include_h2d_d2h, false, "Whether run profiling with h2d and d2h.");
-DEFINE_bool(
-    use_fp16, false,
-    "Whether to use FP16 mode, only support 'trt', 'paddle_trt' "
-    "and 'lite' backend");
-DEFINE_bool(
-    collect_memory_info, false, "Whether to collect memory info");
-DEFINE_int32(sampling_interval, 50, "How often to collect memory info(ms).");
-DEFINE_string(result_path, "benchmark.txt", "Path of benchmark result file.");
+DEFINE_string(config_path, "config.txt", "Path of benchmark config.");

 static void PrintUsage() {
-  std::cout << "Usage: infer_demo --model model_path --image img_path --device "
-               "[cpu|gpu|xpu] --backend "
-               "[default|ort|paddle|ov|trt|paddle_trt|lite] "
-               "--use_fp16 false"
+  std::cout << "Usage: infer_demo --model model_path --image img_path "
+               "--config_path config.txt[Path of benchmark config.] "
            << std::endl;
  std::cout << "Default value of device: cpu" << std::endl;
  std::cout << "Default value of backend: default" << std::endl;
  std::cout << "Default value of use_fp16: false" << std::endl;
 }

-static void PrintBenchmarkInfo() {
+static void PrintBenchmarkInfo(std::unordered_map<std::string,
+                               std::string> config_info) {
 #if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
  // Get model name
  std::vector<std::string> model_names;
@@ -71,31 +52,32 @@ static void PrintBenchmarkInfo() {
  ss.precision(3);
  ss << "\n======= Model Info =======\n";
  ss << "model_name: " << model_names[model_names.size() - 1] << std::endl;
-  ss << "profile_mode: " << FLAGS_profile_mode << std::endl;
-  if (FLAGS_profile_mode == "runtime") {
-    ss << "include_h2d_d2h: " << FLAGS_include_h2d_d2h << std::endl;
+  ss << "profile_mode: " << config_info["profile_mode"] << std::endl;
+  if (config_info["profile_mode"] == "runtime") {
+    ss << "include_h2d_d2h: " << config_info["include_h2d_d2h"] << std::endl;
  }
  ss << "\n======= Backend Info =======\n";
-  ss << "warmup: " << FLAGS_warmup << std::endl;
-  ss << "repeats: " << FLAGS_repeat << std::endl;
-  ss << "device: " << FLAGS_device << std::endl;
-  if (FLAGS_device == "gpu") {
-    ss << "device_id: " << FLAGS_device_id << std::endl;
+  ss << "warmup: " << config_info["warmup"] << std::endl;
+  ss << "repeats: " << config_info["repeat"] << std::endl;
+  ss << "device: " << config_info["device"] << std::endl;
+  if (config_info["device"] == "gpu") {
+    ss << "device_id: " << config_info["device_id"] << std::endl;
+    ss << "use_fp16: " << config_info["use_fp16"] << std::endl;
  }
-  ss << "backend: " << FLAGS_backend << std::endl;
-  if (FLAGS_device == "cpu") {
-    ss << "cpu_thread_nums: " << FLAGS_cpu_thread_nums << std::endl;
+  ss << "backend: " << config_info["backend"] << std::endl;
+  if (config_info["device"] == "cpu") {
+    ss << "cpu_thread_nums: " << config_info["cpu_thread_nums"] << std::endl;
  }
-  ss << "use_fp16: " << FLAGS_use_fp16 << std::endl;
-  ss << "collect_memory_info: " << FLAGS_collect_memory_info << std::endl;
-  if (FLAGS_collect_memory_info) {
-    ss << "sampling_interval: " << std::to_string(FLAGS_sampling_interval)
+  ss << "collect_memory_info: "
+     << config_info["collect_memory_info"] << std::endl;
+  if (config_info["collect_memory_info"] == "true") {
+    ss << "sampling_interval: " << config_info["sampling_interval"]
       << "ms" << std::endl;
  }
  std::cout << ss.str() << std::endl;
  // Save benchmark info
  fastdeploy::benchmark::ResultManager::SaveBenchmarkResult(ss.str(),
-                                                  FLAGS_result_path);
+                                        config_info["result_path"]);
 #endif
  return;
 }
--- a/benchmark/cpp/macros.h
+++ b/benchmark/cpp/macros.h
@@ -22,15 +22,18 @@
    std::cerr << "Failed to initialize." << std::endl;                      \
    return 0;                                                               \
  }                                                                         \
-  auto __im__ = cv::imread(FLAGS_image);                                    \
+  std::unordered_map<std::string, std::string> __config_info__;             \
+  fastdeploy::benchmark::ResultManager::LoadBenchmarkConfig(                \
+                             FLAGS_config_path, &__config_info__);          \
  std::stringstream __ss__;                                                 \
  __ss__.precision(6);                                                      \
  fastdeploy::benchmark::ResourceUsageMonitor __resource_moniter__(         \
-      FLAGS_sampling_interval, FLAGS_device_id);                            \
-  if (FLAGS_collect_memory_info) {                                          \
+                     std::stoi(__config_info__["sampling_interval"]),       \
+                     std::stoi(__config_info__["device_id"]));              \
+  if (__config_info__["collect_memory_info"] == "true") {                   \
    __resource_moniter__.Start();                                           \
  }                                                                         \
-  if (FLAGS_profile_mode == "runtime") {                                    \
+  if (__config_info__["profile_mode"] == "runtime") {                       \
    if (!BENCHMARK_FUNC) {                                                  \
      std::cerr << "Failed to predict." << std::endl;                       \
      return 0;                                                             \
@@ -39,29 +42,35 @@
    std::cout << "Runtime(ms): " << __profile_time__ << "ms." << std::endl; \
    __ss__ << "Runtime(ms): " << __profile_time__ << "ms." << std::endl;    \
  } else {                                                                  \
-    std::cout << "Warmup " << FLAGS_warmup << " times..." << std::endl;     \
-    for (int __i__ = 0; __i__ < FLAGS_warmup; __i__++) {                    \
+    std::cout << "Warmup "                                                  \
+              << __config_info__["warmup"]                                  \
+              << " times..." << std::endl;                                  \
+    int __warmup__ = std::stoi(__config_info__["warmup"]);                  \
+    for (int __i__ = 0; __i__ < __warmup__; __i__++) {                      \
      if (!BENCHMARK_FUNC) {                                                \
        std::cerr << "Failed to predict." << std::endl;                     \
        return 0;                                                           \
      }                                                                     \
    }                                                                       \
    std::cout << "Counting time..." << std::endl;                           \
-    std::cout << "Repeat " << FLAGS_repeat << " times..." << std::endl;     \
+    std::cout << "Repeat "                                                  \
+              << __config_info__["repeat"]                                  \
+              << " times..." << std::endl;                                  \
    fastdeploy::TimeCounter __tc__;                                         \
    __tc__.Start();                                                         \
-    for (int __i__ = 0; __i__ < FLAGS_repeat; __i__++) {                    \
+    int __repeat__ = std::stoi(__config_info__["repeat"]);                  \
+    for (int __i__ = 0; __i__ < __repeat__; __i__++) {                      \
      if (!BENCHMARK_FUNC) {                                                \
        std::cerr << "Failed to predict." << std::endl;                     \
        return 0;                                                           \
      }                                                                     \
    }                                                                       \
    __tc__.End();                                                           \
-    double __end2end__ = __tc__.Duration() / FLAGS_repeat * 1000;           \
+    double __end2end__ = __tc__.Duration() / __repeat__ * 1000;             \
    std::cout << "End2End(ms): " << __end2end__ << "ms." << std::endl;      \
    __ss__ << "End2End(ms): " << __end2end__ << "ms." << std::endl;         \
  }                                                                         \
-  if (FLAGS_collect_memory_info) {                                          \
+  if (__config_info__["collect_memory_info"] == "true") {                   \
    float __cpu_mem__ = __resource_moniter__.GetMaxCpuMem();                \
    float __gpu_mem__ = __resource_moniter__.GetMaxGpuMem();                \
    float __gpu_util__ = __resource_moniter__.GetMaxGpuUtil();              \
@@ -74,5 +83,5 @@
    __resource_moniter__.Stop();                                            \
  }                                                                         \
  fastdeploy::benchmark::ResultManager::SaveBenchmarkResult(__ss__.str(),   \
-                                          FLAGS_result_path);               \
+                                         __config_info__["result_path"]);   \
 }
--- a/benchmark/cpp/option.h
+++ b/benchmark/cpp/option.h
@@ -19,81 +19,89 @@
 static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option,
                        int argc, char* argv[], bool remove_flags) {
  google::ParseCommandLineFlags(&argc, &argv, remove_flags);
-  if (FLAGS_profile_mode == "runtime") {
-    option->EnableProfiling(FLAGS_include_h2d_d2h, FLAGS_repeat, FLAGS_warmup);
+  option->DisableValidBackendCheck();
+  std::unordered_map<std::string, std::string> config_info;
+  fastdeploy::benchmark::ResultManager::LoadBenchmarkConfig(
+                            FLAGS_config_path, &config_info);
+  if (config_info["profile_mode"] == "runtime") {
+    option->EnableProfiling(config_info["include_h2d_d2h"] == "true",
+                            std::stoi(config_info["repeat"]),
+                            std::stoi(config_info["warmup"]));
  }
-  if (FLAGS_device == "gpu") {
-    option->UseGpu(FLAGS_device_id);
-    if (FLAGS_backend == "ort") {
+  if (config_info["device"] == "gpu") {
+    option->UseGpu(std::stoi(config_info["device_id"]));
+    if (config_info["backend"] == "ort") {
      option->UseOrtBackend();
-    } else if (FLAGS_backend == "paddle") {
+    } else if (config_info["backend"] == "paddle") {
      option->UsePaddleInferBackend();
-    } else if (FLAGS_backend == "trt" || FLAGS_backend == "paddle_trt") {
+    } else if (config_info["backend"] == "trt" ||
+               config_info["backend"] == "paddle_trt") {
      option->UseTrtBackend();
-      if (FLAGS_backend == "paddle_trt") {
+      if (config_info["backend"] == "paddle_trt") {
        option->UsePaddleInferBackend();
        option->paddle_infer_option.enable_trt = true;
      }
-      if (FLAGS_use_fp16) {
+      if (config_info["use_fp16"] == "true") {
        option->trt_option.enable_fp16 = true;
      }
-    } else if (FLAGS_backend == "default") {
+    } else if (config_info["backend"] == "default") {
      return true;
    } else {
      std::cout << "While inference with GPU, only support "
                   "default/ort/paddle/trt/paddle_trt now, "
-                << FLAGS_backend << " is not supported." << std::endl;
+                << config_info["backend"] << " is not supported." << std::endl;
      PrintUsage();
      return false;
    }
-  } else if (FLAGS_device == "cpu") {
-    option->SetCpuThreadNum(FLAGS_cpu_thread_nums);
-    if (FLAGS_backend == "ort") {
+  } else if (config_info["device"] == "cpu") {
+    option->SetCpuThreadNum(std::stoi(config_info["cpu_thread_nums"]));
+    if (config_info["backend"] == "ort") {
      option->UseOrtBackend();
-    } else if (FLAGS_backend == "ov") {
+    } else if (config_info["backend"] == "ov") {
      option->UseOpenVINOBackend();
-    } else if (FLAGS_backend == "paddle") {
+    } else if (config_info["backend"] == "paddle") {
      option->UsePaddleInferBackend();
-    } else if (FLAGS_backend == "lite") {
+    } else if (config_info["backend"] == "lite") {
      option->UsePaddleLiteBackend();
-      if (FLAGS_use_fp16) {
+      if (config_info["use_fp16"] == "true") {
        option->paddle_lite_option.enable_fp16 = true;
      }
-    } else if (FLAGS_backend == "default") {
+    } else if (config_info["backend"] == "default") {
      return true;
    } else {
      std::cout << "While inference with CPU, only support "
                   "default/ort/ov/paddle/lite now, "
-                << FLAGS_backend << " is not supported." << std::endl;
+                << config_info["backend"] << " is not supported." << std::endl;
      PrintUsage();
      return false;
    }
-  } else if (FLAGS_device == "xpu") {
-    option->UseKunlunXin(FLAGS_device_id);
-    if (FLAGS_backend == "ort") {
+  } else if (config_info["device"] == "xpu") {
+    option->UseKunlunXin(std::stoi(config_info["device_id"]));
+    if (config_info["backend"] == "ort") {
      option->UseOrtBackend();
-    } else if (FLAGS_backend == "paddle") {
+    } else if (config_info["backend"] == "paddle") {
      option->UsePaddleInferBackend();
-    } else if (FLAGS_backend == "lite") {
+    } else if (config_info["backend"] == "lite") {
      option->UsePaddleLiteBackend();
-      if (FLAGS_use_fp16) {
+      if (config_info["use_fp16"] == "true") {
        option->paddle_lite_option.enable_fp16 = true;
      }
-    } else if (FLAGS_backend == "default") {
+    } else if (config_info["backend"] == "default") {
      return true;
    } else {
      std::cout << "While inference with XPU, only support "
                   "default/ort/paddle/lite now, "
-                << FLAGS_backend << " is not supported." << std::endl;
+                << config_info["backend"] << " is not supported." << std::endl;
      PrintUsage();
      return false;
    }
  } else {
-    std::cerr << "Only support device CPU/GPU/XPU now, " << FLAGS_device
+    std::cerr << "Only support device CPU/GPU/XPU now, "
+              << config_info["device"]
              << " is not supported." << std::endl;
    PrintUsage();
    return false;
  }
-  PrintBenchmarkInfo();
+  PrintBenchmarkInfo(config_info);
  return true;
 }
--- a/fastdeploy/benchmark/utils.cc
+++ b/fastdeploy/benchmark/utils.cc
@@ -249,7 +249,7 @@ bool ResultManager::SaveFDTensor(const FDTensor& tensor,

 bool ResultManager::LoadFDTensor(FDTensor* tensor, const std::string& path) {
  if (!CheckFileExists(path)) {
-    FDERROR << "Can't found file from" << path << std::endl;
+    FDERROR << "Can't found file from " << path << std::endl;
    return false;
  }
  auto lines = ReadLines(path);
@@ -365,6 +365,45 @@ void ResultManager::SaveBenchmarkResult(const std::string& res,
  fs.close();
 }

+bool ResultManager::LoadBenchmarkConfig(
+    const std::string& path,
+    std::unordered_map<std::string, std::string>* config_info) {
+  if (!CheckFileExists(path)) {
+    FDERROR << "Can't found file from " << path << std::endl;
+    return false;
+  }
+  auto lines = ReadLines(path);
+  for (auto line : lines) {
+    std::vector<std::string> tokens;
+    Split(line, tokens, ':');
+    (*config_info)[tokens[0]] = Strip(tokens[1], ' ');
+  }
+  return true;
+}
+
+std::vector<std::vector<int32_t>> ResultManager::GetInputShapes(
+    const std::string& raw_shapes) {
+  std::vector<std::vector<int32_t>> shapes;
+  std::vector<std::string> shape_tokens;
+  Split(raw_shapes, shape_tokens, ':');
+  for (auto str_shape : shape_tokens) {
+    std::vector<int32_t> shape;
+    std::string tmp_str = str_shape;
+    while (!tmp_str.empty()) {
+      int dim = atoi(tmp_str.data());
+      shape.push_back(dim);
+      size_t next_offset = tmp_str.find(",");
+      if (next_offset == std::string::npos) {
+        break;
+      } else {
+        tmp_str = tmp_str.substr(next_offset + 1);
+      }
+    }
+    shapes.push_back(shape);
+  }
+  return shapes;
+}
+
 #if defined(ENABLE_VISION)
 bool ResultManager::SaveDetectionResult(const vision::DetectionResult& res,
                                        const std::string& path) {
@@ -520,7 +559,7 @@ bool ResultManager::SaveOCRDetResult(const std::vector<std::array<int, 8>>& res,
 bool ResultManager::LoadDetectionResult(vision::DetectionResult* res,
                                        const std::string& path) {
  if (!CheckFileExists(path)) {
-    FDERROR << "Can't found file from" << path << std::endl;
+    FDERROR << "Can't found file from " << path << std::endl;
    return false;
  }
  auto lines = ReadLines(path);
@@ -553,7 +592,7 @@ bool ResultManager::LoadDetectionResult(vision::DetectionResult* res,
 bool ResultManager::LoadClassifyResult(vision::ClassifyResult* res,
                                       const std::string& path) {
  if (!CheckFileExists(path)) {
-    FDERROR << "Can't found file from" << path << std::endl;
+    FDERROR << "Can't found file from " << path << std::endl;
    return false;
  }
  auto lines = ReadLines(path);
@@ -575,7 +614,7 @@ bool ResultManager::LoadClassifyResult(vision::ClassifyResult* res,
 bool ResultManager::LoadSegmentationResult(vision::SegmentationResult* res,
                                           const std::string& path) {
  if (!CheckFileExists(path)) {
-    FDERROR << "Can't found file from" << path << std::endl;
+    FDERROR << "Can't found file from " << path << std::endl;
    return false;
  }
  auto lines = ReadLines(path);
@@ -602,7 +641,7 @@ bool ResultManager::LoadSegmentationResult(vision::SegmentationResult* res,
 bool ResultManager::LoadOCRDetResult(std::vector<std::array<int, 8>>* res,
                                     const std::string& path) {
  if (!CheckFileExists(path)) {
-    FDERROR << "Can't found file from" << path << std::endl;
+    FDERROR << "Can't found file from " << path << std::endl;
    return false;
  }
  auto lines = ReadLines(path);
--- a/fastdeploy/benchmark/utils.h
+++ b/fastdeploy/benchmark/utils.h
@@ -15,6 +15,7 @@

 #include <memory>
 #include <thread>  // NOLINT
+#include <unordered_map>
 #include "fastdeploy/utils/utils.h"
 #include "fastdeploy/core/fd_tensor.h"
 #if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
@@ -141,6 +142,12 @@ struct FASTDEPLOY_DECL ResultManager {
  /// Save Benchmark data
  static void SaveBenchmarkResult(const std::string& res,
                                  const std::string& path);
+  /// Load Benchmark config
+  static bool LoadBenchmarkConfig(const std::string& path,
+             std::unordered_map<std::string, std::string>* config_info);
+  /// Get Input Shapes
+  static std::vector<std::vector<int32_t>> GetInputShapes(
+                                      const std::string& raw_shapes);
 #if defined(ENABLE_VISION)
  /// Save & Load functions for basic results.
  static bool SaveDetectionResult(const vision::DetectionResult& res,