[Benchmark] Add macros for benchmark (#1301)

* add GPL lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * support yolov8 * add pybind for yolov8 * add yolov8 readme * add cpp benchmark * add cpu and gpu mem * public part split * add runtime mode * fixed bugs * add cpu_thread_nums * deal with comments * deal with comments * deal with comments * rm useless code * add FASTDEPLOY_DECL * add FASTDEPLOY_DECL * fixed for windows * mv rss to pss * mv rss to pss * Update utils.cc * use thread to collect mem * Add ResourceUsageMonitor * rm useless code * fixed bug * fixed typo * update ResourceUsageMonitor * fixed bug * fixed bug * add note for ResourceUsageMonitor * deal with comments * add macros * deal with comments * deal with comments * deal with comments * re-lint --------- Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
2025-10-05 08:37:06 +08:00 · 2023-02-13 16:12:54 +08:00
parent e63f5f369e
commit 47b1d27fbb
5 changed files with 190 additions and 252 deletions
--- a/benchmark/cpp/benchmark_ppyolov8.cc
+++ b/benchmark/cpp/benchmark_ppyolov8.cc
@@ -12,9 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "fastdeploy/benchmark/utils.h"
-#include "fastdeploy/vision.h"
 #include "flags.h"
+#include "macros.h"
+#include "option.h"

 #ifdef WIN32
 const char sep = '\\';
@@ -22,104 +22,24 @@ const char sep = '\\';
 const char sep = '/';
 #endif

-bool RunModel(std::string model_dir, std::string image_file, size_t warmup,
-              size_t repeats, size_t dump_period, std::string cpu_mem_file_name,
-              std::string gpu_mem_file_name) {
+int main(int argc, char* argv[]) {
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  auto im = cv::imread(FLAGS_image);
  // Initialization
  auto option = fastdeploy::RuntimeOption();
  if (!CreateRuntimeOption(&option)) {
    PrintUsage();
    return false;
  }
-  auto model_file = model_dir + sep + "model.pdmodel";
-  auto params_file = model_dir + sep + "model.pdiparams";
-  auto config_file = model_dir + sep + "infer_cfg.yml";
-
-  if (FLAGS_profile_mode == "runtime") {
-    option.EnableProfiling(FLAGS_include_h2d_d2h, repeats, warmup);
-  }
-  auto model = fastdeploy::vision::detection::PaddleYOLOv8(
+  auto model_file = FLAGS_model + sep + "model.pdmodel";
+  auto params_file = FLAGS_model + sep + "model.pdiparams";
+  auto config_file = FLAGS_model + sep + "infer_cfg.yml";
+  auto model_ppyolov8 = fastdeploy::vision::detection::PaddleYOLOv8(
      model_file, params_file, config_file, option);
-  if (!model.Initialized()) {
-    std::cerr << "Failed to initialize." << std::endl;
-    return false;
-  }
-  auto im = cv::imread(image_file);
-  // For Runtime
-  if (FLAGS_profile_mode == "runtime") {
-    fastdeploy::vision::DetectionResult res;
-    if (!model.Predict(im, &res)) {
-      std::cerr << "Failed to predict." << std::endl;
-      return false;
-    }
-    double profile_time = model.GetProfileTime() * 1000;
-    std::cout << "Runtime(ms): " << profile_time << "ms." << std::endl;
-    auto vis_im = fastdeploy::vision::VisDetection(im, res);
-    cv::imwrite("vis_result.jpg", vis_im);
-    std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
-  } else {
-    // For End2End
-    // Step1: warm up for warmup times
-    std::cout << "Warmup " << warmup << " times..." << std::endl;
-    for (int i = 0; i < warmup; i++) {
-      fastdeploy::vision::DetectionResult res;
-      if (!model.Predict(im, &res)) {
-        std::cerr << "Failed to predict." << std::endl;
-        return false;
-      }
-    }
-    std::vector<float> end2end_statis;
-    // Step2: repeat for repeats times
-    std::cout << "Counting time..." << std::endl;
-    fastdeploy::TimeCounter tc;
-    fastdeploy::vision::DetectionResult res;
-    for (int i = 0; i < repeats; i++) {
-      if (FLAGS_collect_memory_info && i % dump_period == 0) {
-        fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name);
-#if defined(WITH_GPU)
-        fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name,
-                                                         FLAGS_device_id);
-#endif
-      }
-      tc.Start();
-      if (!model.Predict(im, &res)) {
-        std::cerr << "Failed to predict." << std::endl;
-        return false;
-      }
-      tc.End();
-      end2end_statis.push_back(tc.Duration() * 1000);
-    }
-    float end2end = std::accumulate(end2end_statis.end() - repeats,
-                                    end2end_statis.end(), 0.f) /
-                    repeats;
-    std::cout << "End2End(ms): " << end2end << "ms." << std::endl;
-    auto vis_im = fastdeploy::vision::VisDetection(im, res);
-    cv::imwrite("vis_result.jpg", vis_im);
-    std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
-  }
-
-  return true;
-}
-
-int main(int argc, char* argv[]) {
-  google::ParseCommandLineFlags(&argc, &argv, true);
-  int repeats = FLAGS_repeat;
-  int warmup = FLAGS_warmup;
-  int dump_period = FLAGS_dump_period;
-  std::string cpu_mem_file_name = "result_cpu.txt";
-  std::string gpu_mem_file_name = "result_gpu.txt";
-  // Run model
-  if (RunModel(FLAGS_model, FLAGS_image, warmup, repeats, dump_period,
-               cpu_mem_file_name, gpu_mem_file_name) != true) {
-    exit(1);
-  }
-  if (FLAGS_collect_memory_info) {
-    float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name);
-    std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
-#if defined(WITH_GPU)
-    float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name);
-    std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
-#endif
-  }
+  fastdeploy::vision::DetectionResult res;
+  BENCHMARK_MODEL(model_ppyolov8, model_ppyolov8.Predict(im, &res))
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
  return 0;
 }
--- a/benchmark/cpp/benchmark_yolov5.cc
+++ b/benchmark/cpp/benchmark_yolov5.cc
@@ -12,96 +12,25 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "fastdeploy/benchmark/utils.h"
-#include "fastdeploy/vision.h"
 #include "flags.h"
+#include "macros.h"
+#include "option.h"

-bool RunModel(std::string model_file, std::string image_file, size_t warmup,
-              size_t repeats, size_t sampling_interval) {
+int main(int argc, char* argv[]) {
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  auto im = cv::imread(FLAGS_image);
  // Initialization
  auto option = fastdeploy::RuntimeOption();
  if (!CreateRuntimeOption(&option)) {
    PrintUsage();
    return false;
  }
-  if (FLAGS_profile_mode == "runtime") {
-    option.EnableProfiling(FLAGS_include_h2d_d2h, repeats, warmup);
-  }
-  auto model = fastdeploy::vision::detection::YOLOv5(model_file, "", option);
-  if (!model.Initialized()) {
-    std::cerr << "Failed to initialize." << std::endl;
-    return false;
-  }
-  auto im = cv::imread(image_file);
-  // For collect memory info
-  fastdeploy::benchmark::ResourceUsageMonitor resource_moniter(
-      sampling_interval, FLAGS_device_id);
-  if (FLAGS_collect_memory_info) {
-    resource_moniter.Start();
-  }
-  // For Runtime
-  if (FLAGS_profile_mode == "runtime") {
-    fastdeploy::vision::DetectionResult res;
-    if (!model.Predict(im, &res)) {
-      std::cerr << "Failed to predict." << std::endl;
-      return false;
-    }
-    double profile_time = model.GetProfileTime() * 1000;
-    std::cout << "Runtime(ms): " << profile_time << "ms." << std::endl;
-    auto vis_im = fastdeploy::vision::VisDetection(im, res);
-    cv::imwrite("vis_result.jpg", vis_im);
-    std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
-  } else {
-    // For End2End
-    // Step1: warm up for warmup times
-    std::cout << "Warmup " << warmup << " times..." << std::endl;
-    for (int i = 0; i < warmup; i++) {
-      fastdeploy::vision::DetectionResult res;
-      if (!model.Predict(im, &res)) {
-        std::cerr << "Failed to predict." << std::endl;
-        return false;
-      }
-    }
-    // Step2: repeat for repeats times
-    std::cout << "Counting time..." << std::endl;
-    std::cout << "Repeat " << repeats << " times..." << std::endl;
-    fastdeploy::vision::DetectionResult res;
-    fastdeploy::TimeCounter tc;
-    tc.Start();
-    for (int i = 0; i < repeats; i++) {
-      if (!model.Predict(im, &res)) {
-        std::cerr << "Failed to predict." << std::endl;
-        return false;
-      }
-    }
-    tc.End();
-    double end2end = tc.Duration() / repeats * 1000;
-    std::cout << "End2End(ms): " << end2end << "ms." << std::endl;
-    auto vis_im = fastdeploy::vision::VisDetection(im, res);
-    cv::imwrite("vis_result.jpg", vis_im);
-    std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
-  }
-  if (FLAGS_collect_memory_info) {
-    float cpu_mem = resource_moniter.GetMaxCpuMem();
-    float gpu_mem = resource_moniter.GetMaxGpuMem();
-    float gpu_util = resource_moniter.GetMaxGpuUtil();
-    std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
-    std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
-    std::cout << "gpu_util: " << gpu_util << std::endl;
-    resource_moniter.Stop();
-  }
-
-  return true;
-}
-
-int main(int argc, char* argv[]) {
-  google::ParseCommandLineFlags(&argc, &argv, true);
-  int repeats = FLAGS_repeat;
-  int warmup = FLAGS_warmup;
-  int sampling_interval = FLAGS_sampling_interval;
-  // Run model
-  if (!RunModel(FLAGS_model, FLAGS_image, warmup, repeats, sampling_interval)) {
-    exit(1);
-  }
+  auto model_yolov5 =
+      fastdeploy::vision::detection::YOLOv5(FLAGS_model, "", option);
+  fastdeploy::vision::DetectionResult res;
+  BENCHMARK_MODEL(model_yolov5, model_yolov5.Predict(im, &res))
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
  return 0;
 }
--- a/benchmark/cpp/flags.h
+++ b/benchmark/cpp/flags.h
@@ -15,7 +15,6 @@
 #pragma once

 #include "gflags/gflags.h"
-#include "fastdeploy/utils/perf.h"

 DEFINE_string(model, "", "Directory of the inference model.");
 DEFINE_string(image, "", "Path of the image file.");
@@ -49,75 +48,3 @@ void PrintUsage() {
  std::cout << "Default value of backend: default" << std::endl;
  std::cout << "Default value of use_fp16: false" << std::endl;
 }
-
-bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
-  if (FLAGS_device == "gpu") {
-    option->UseGpu(FLAGS_device_id);
-    if (FLAGS_backend == "ort") {
-      option->UseOrtBackend();
-    } else if (FLAGS_backend == "paddle") {
-      option->UsePaddleInferBackend();
-    } else if (FLAGS_backend == "trt" || FLAGS_backend == "paddle_trt") {
-      option->UseTrtBackend();
-      if (FLAGS_backend == "paddle_trt") {
-        option->EnablePaddleToTrt();
-      }
-      if (FLAGS_use_fp16) {
-        option->EnableTrtFP16();
-      }
-    } else if (FLAGS_backend == "default") {
-      return true;
-    } else {
-      std::cout << "While inference with GPU, only support "
-                   "default/ort/paddle/trt/paddle_trt now, "
-                << FLAGS_backend << " is not supported." << std::endl;
-      return false;
-    }
-  } else if (FLAGS_device == "cpu") {
-    option->SetCpuThreadNum(FLAGS_cpu_thread_nums);
-    if (FLAGS_backend == "ort") {
-      option->UseOrtBackend();
-    } else if (FLAGS_backend == "ov") {
-      option->UseOpenVINOBackend();
-    } else if (FLAGS_backend == "paddle") {
-      option->UsePaddleInferBackend();
-    } else if (FLAGS_backend == "lite") {
-      option->UsePaddleLiteBackend();
-      if (FLAGS_use_fp16) {
-        option->EnableLiteFP16();
-      }
-    } else if (FLAGS_backend == "default") {
-      return true;
-    } else {
-      std::cout << "While inference with CPU, only support "
-                   "default/ort/ov/paddle/lite now, "
-                << FLAGS_backend << " is not supported." << std::endl;
-      return false;
-    }
-  } else if (FLAGS_device == "xpu") {
-    option->UseKunlunXin(FLAGS_device_id);
-    if (FLAGS_backend == "ort") {
-      option->UseOrtBackend();
-    } else if (FLAGS_backend == "paddle") {
-      option->UsePaddleInferBackend();
-    } else if (FLAGS_backend == "lite") {
-      option->UsePaddleLiteBackend();
-      if (FLAGS_use_fp16) {
-        option->EnableLiteFP16();
-      }
-    } else if (FLAGS_backend == "default") {
-      return true;
-    } else {
-      std::cout << "While inference with XPU, only support "
-                   "default/ort/paddle/lite now, "
-                << FLAGS_backend << " is not supported." << std::endl;
-      return false;
-    }
-  } else {
-    std::cerr << "Only support device CPU/GPU/XPU now, " << FLAGS_device
-              << " is not supported." << std::endl;
-    return false;
-  }
-
-  return true;
-}
--- a/benchmark/cpp/macros.h
+++ b/benchmark/cpp/macros.h
@@ -0,0 +1,70 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include "fastdeploy/benchmark/utils.h"
+#include "fastdeploy/utils/perf.h"
+
+#define BENCHMARK_MODEL(MODEL_NAME, BENCHMARK_FUNC)                         \
+{                                                                           \
+  std::cout << "====" << #MODEL_NAME << "====" << std::endl;                \
+  if (!MODEL_NAME.Initialized()) {                                          \
+    std::cerr << "Failed to initialize." << std::endl;                      \
+    return 0;                                                               \
+  }                                                                         \
+  auto __im__ = cv::imread(FLAGS_image);                                    \
+  fastdeploy::benchmark::ResourceUsageMonitor __resource_moniter__(         \
+      FLAGS_sampling_interval, FLAGS_device_id);                            \
+  if (FLAGS_collect_memory_info) {                                          \
+    __resource_moniter__.Start();                                           \
+  }                                                                         \
+  if (FLAGS_profile_mode == "runtime") {                                    \
+    if (!BENCHMARK_FUNC) {                                                  \
+      std::cerr << "Failed to predict." << std::endl;                       \
+      return 0;                                                             \
+    }                                                                       \
+    double __profile_time__ = MODEL_NAME.GetProfileTime() * 1000;           \
+    std::cout << "Runtime(ms): " << __profile_time__ << "ms." << std::endl; \
+  } else {                                                                  \
+    std::cout << "Warmup " << FLAGS_warmup << " times..." << std::endl;     \
+    for (int __i__ = 0; __i__ < FLAGS_warmup; __i__++) {                    \
+      if (!BENCHMARK_FUNC) {                                                \
+        std::cerr << "Failed to predict." << std::endl;                     \
+        return 0;                                                           \
+      }                                                                     \
+    }                                                                       \
+    std::cout << "Counting time..." << std::endl;                           \
+    std::cout << "Repeat " << FLAGS_repeat << " times..." << std::endl;     \
+    fastdeploy::TimeCounter __tc__;                                         \
+    __tc__.Start();                                                         \
+    for (int __i__ = 0; __i__ < FLAGS_repeat; __i__++) {                    \
+      if (!BENCHMARK_FUNC) {                                                \
+        std::cerr << "Failed to predict." << std::endl;                     \
+        return 0;                                                           \
+      }                                                                     \
+    }                                                                       \
+    __tc__.End();                                                           \
+    double __end2end__ = __tc__.Duration() / FLAGS_repeat * 1000;           \
+    std::cout << "End2End(ms): " << __end2end__ << "ms." << std::endl;      \
+  }                                                                         \
+  if (FLAGS_collect_memory_info) {                                          \
+    float __cpu_mem__ = __resource_moniter__.GetMaxCpuMem();                \
+    float __gpu_mem__ = __resource_moniter__.GetMaxGpuMem();                \
+    float __gpu_util__ = __resource_moniter__.GetMaxGpuUtil();              \
+    std::cout << "cpu_pss_mb: " << __cpu_mem__ << "MB." << std::endl;       \
+    std::cout << "gpu_pss_mb: " << __gpu_mem__ << "MB." << std::endl;       \
+    std::cout << "gpu_util: " << __gpu_util__ << std::endl;                 \
+    __resource_moniter__.Stop();                                            \
+  }                                                                         \
+}
--- a/benchmark/cpp/option.h
+++ b/benchmark/cpp/option.h
@@ -0,0 +1,92 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "fastdeploy/vision.h"
+
+static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
+  if (FLAGS_profile_mode == "runtime") {
+    option->EnableProfiling(FLAGS_include_h2d_d2h, FLAGS_repeat, FLAGS_warmup);
+  }
+  if (FLAGS_device == "gpu") {
+    option->UseGpu(FLAGS_device_id);
+    if (FLAGS_backend == "ort") {
+      option->UseOrtBackend();
+    } else if (FLAGS_backend == "paddle") {
+      option->UsePaddleInferBackend();
+    } else if (FLAGS_backend == "trt" || FLAGS_backend == "paddle_trt") {
+      option->UseTrtBackend();
+      if (FLAGS_backend == "paddle_trt") {
+        option->EnablePaddleToTrt();
+      }
+      if (FLAGS_use_fp16) {
+        option->EnableTrtFP16();
+      }
+    } else if (FLAGS_backend == "default") {
+      return true;
+    } else {
+      std::cout << "While inference with GPU, only support "
+                   "default/ort/paddle/trt/paddle_trt now, "
+                << FLAGS_backend << " is not supported." << std::endl;
+      return false;
+    }
+  } else if (FLAGS_device == "cpu") {
+    option->SetCpuThreadNum(FLAGS_cpu_thread_nums);
+    if (FLAGS_backend == "ort") {
+      option->UseOrtBackend();
+    } else if (FLAGS_backend == "ov") {
+      option->UseOpenVINOBackend();
+    } else if (FLAGS_backend == "paddle") {
+      option->UsePaddleInferBackend();
+    } else if (FLAGS_backend == "lite") {
+      option->UsePaddleLiteBackend();
+      if (FLAGS_use_fp16) {
+        option->EnableLiteFP16();
+      }
+    } else if (FLAGS_backend == "default") {
+      return true;
+    } else {
+      std::cout << "While inference with CPU, only support "
+                   "default/ort/ov/paddle/lite now, "
+                << FLAGS_backend << " is not supported." << std::endl;
+      return false;
+    }
+  } else if (FLAGS_device == "xpu") {
+    option->UseKunlunXin(FLAGS_device_id);
+    if (FLAGS_backend == "ort") {
+      option->UseOrtBackend();
+    } else if (FLAGS_backend == "paddle") {
+      option->UsePaddleInferBackend();
+    } else if (FLAGS_backend == "lite") {
+      option->UsePaddleLiteBackend();
+      if (FLAGS_use_fp16) {
+        option->EnableLiteFP16();
+      }
+    } else if (FLAGS_backend == "default") {
+      return true;
+    } else {
+      std::cout << "While inference with XPU, only support "
+                   "default/ort/paddle/lite now, "
+                << FLAGS_backend << " is not supported." << std::endl;
+      return false;
+    }
+  } else {
+    std::cerr << "Only support device CPU/GPU/XPU now, " << FLAGS_device
+              << " is not supported." << std::endl;
+    return false;
+  }
+
+  return true;
+}