From 47b1d27fbb11e88dd4ccfe13d11e4561c1d9eb4e Mon Sep 17 00:00:00 2001
From: WJJ1995 <wjjisloser@163.com>
Date: Mon, 13 Feb 2023 16:12:54 +0800
Subject: [PATCH] [Benchmark] Add macros for benchmark (#1301)

* add GPL lisence

* add GPL-3.0 lisence

* add GPL-3.0 lisence

* add GPL-3.0 lisence

* support yolov8

* add pybind for yolov8

* add yolov8 readme

* add cpp benchmark

* add cpu and gpu mem

* public part split

* add runtime mode

* fixed bugs

* add cpu_thread_nums

* deal with comments

* deal with comments

* deal with comments

* rm useless code

* add FASTDEPLOY_DECL

* add FASTDEPLOY_DECL

* fixed for windows

* mv rss to pss

* mv rss to pss

* Update utils.cc

* use thread to collect mem

* Add ResourceUsageMonitor

* rm useless code

* fixed bug

* fixed typo

* update ResourceUsageMonitor

* fixed bug

* fixed bug

* add note for ResourceUsageMonitor

* deal with comments

* add macros

* deal with comments

* deal with comments

* deal with comments

* re-lint

---------

Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
---
 benchmark/cpp/benchmark_ppyolov8.cc | 110 ++++------------------------
 benchmark/cpp/benchmark_yolov5.cc   |  97 ++++--------------------
 benchmark/cpp/flags.h               |  73 ------------------
 benchmark/cpp/macros.h              |  70 ++++++++++++++++++
 benchmark/cpp/option.h              |  92 +++++++++++++++++++++++
 5 files changed, 190 insertions(+), 252 deletions(-)
 mode change 100755 => 100644 benchmark/cpp/benchmark_yolov5.cc
 create mode 100755 benchmark/cpp/macros.h
 create mode 100755 benchmark/cpp/option.h

diff --git a/benchmark/cpp/benchmark_ppyolov8.cc b/benchmark/cpp/benchmark_ppyolov8.cc
index cff374200..545474635 100644
--- a/benchmark/cpp/benchmark_ppyolov8.cc
+++ b/benchmark/cpp/benchmark_ppyolov8.cc
@@ -12,9 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "fastdeploy/benchmark/utils.h"
-#include "fastdeploy/vision.h"
 #include "flags.h"
+#include "macros.h"
+#include "option.h"
 
 #ifdef WIN32
 const char sep = '\\';
@@ -22,104 +22,24 @@ const char sep = '\\';
 const char sep = '/';
 #endif
 
-bool RunModel(std::string model_dir, std::string image_file, size_t warmup,
-              size_t repeats, size_t dump_period, std::string cpu_mem_file_name,
-              std::string gpu_mem_file_name) {
+int main(int argc, char* argv[]) {
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  auto im = cv::imread(FLAGS_image);
   // Initialization
   auto option = fastdeploy::RuntimeOption();
   if (!CreateRuntimeOption(&option)) {
     PrintUsage();
     return false;
   }
-  auto model_file = model_dir + sep + "model.pdmodel";
-  auto params_file = model_dir + sep + "model.pdiparams";
-  auto config_file = model_dir + sep + "infer_cfg.yml";
-
-  if (FLAGS_profile_mode == "runtime") {
-    option.EnableProfiling(FLAGS_include_h2d_d2h, repeats, warmup);
-  }
-  auto model = fastdeploy::vision::detection::PaddleYOLOv8(
+  auto model_file = FLAGS_model + sep + "model.pdmodel";
+  auto params_file = FLAGS_model + sep + "model.pdiparams";
+  auto config_file = FLAGS_model + sep + "infer_cfg.yml";
+  auto model_ppyolov8 = fastdeploy::vision::detection::PaddleYOLOv8(
       model_file, params_file, config_file, option);
-  if (!model.Initialized()) {
-    std::cerr << "Failed to initialize." << std::endl;
-    return false;
-  }
-  auto im = cv::imread(image_file);
-  // For Runtime
-  if (FLAGS_profile_mode == "runtime") {
-    fastdeploy::vision::DetectionResult res;
-    if (!model.Predict(im, &res)) {
-      std::cerr << "Failed to predict." << std::endl;
-      return false;
-    }
-    double profile_time = model.GetProfileTime() * 1000;
-    std::cout << "Runtime(ms): " << profile_time << "ms." << std::endl;
-    auto vis_im = fastdeploy::vision::VisDetection(im, res);
-    cv::imwrite("vis_result.jpg", vis_im);
-    std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
-  } else {
-    // For End2End
-    // Step1: warm up for warmup times
-    std::cout << "Warmup " << warmup << " times..." << std::endl;
-    for (int i = 0; i < warmup; i++) {
-      fastdeploy::vision::DetectionResult res;
-      if (!model.Predict(im, &res)) {
-        std::cerr << "Failed to predict." << std::endl;
-        return false;
-      }
-    }
-    std::vector<float> end2end_statis;
-    // Step2: repeat for repeats times
-    std::cout << "Counting time..." << std::endl;
-    fastdeploy::TimeCounter tc;
-    fastdeploy::vision::DetectionResult res;
-    for (int i = 0; i < repeats; i++) {
-      if (FLAGS_collect_memory_info && i % dump_period == 0) {
-        fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name);
-#if defined(WITH_GPU)
-        fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name,
-                                                         FLAGS_device_id);
-#endif
-      }
-      tc.Start();
-      if (!model.Predict(im, &res)) {
-        std::cerr << "Failed to predict." << std::endl;
-        return false;
-      }
-      tc.End();
-      end2end_statis.push_back(tc.Duration() * 1000);
-    }
-    float end2end = std::accumulate(end2end_statis.end() - repeats,
-                                    end2end_statis.end(), 0.f) /
-                    repeats;
-    std::cout << "End2End(ms): " << end2end << "ms." << std::endl;
-    auto vis_im = fastdeploy::vision::VisDetection(im, res);
-    cv::imwrite("vis_result.jpg", vis_im);
-    std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
-  }
-
-  return true;
-}
-
-int main(int argc, char* argv[]) {
-  google::ParseCommandLineFlags(&argc, &argv, true);
-  int repeats = FLAGS_repeat;
-  int warmup = FLAGS_warmup;
-  int dump_period = FLAGS_dump_period;
-  std::string cpu_mem_file_name = "result_cpu.txt";
-  std::string gpu_mem_file_name = "result_gpu.txt";
-  // Run model
-  if (RunModel(FLAGS_model, FLAGS_image, warmup, repeats, dump_period,
-               cpu_mem_file_name, gpu_mem_file_name) != true) {
-    exit(1);
-  }
-  if (FLAGS_collect_memory_info) {
-    float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name);
-    std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
-#if defined(WITH_GPU)
-    float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name);
-    std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
-#endif
-  }
+  fastdeploy::vision::DetectionResult res;
+  BENCHMARK_MODEL(model_ppyolov8, model_ppyolov8.Predict(im, &res))
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
   return 0;
-}
+}
\ No newline at end of file
diff --git a/benchmark/cpp/benchmark_yolov5.cc b/benchmark/cpp/benchmark_yolov5.cc
old mode 100755
new mode 100644
index 2e5df6b1c..5b2cab855
--- a/benchmark/cpp/benchmark_yolov5.cc
+++ b/benchmark/cpp/benchmark_yolov5.cc
@@ -12,96 +12,25 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "fastdeploy/benchmark/utils.h"
-#include "fastdeploy/vision.h"
 #include "flags.h"
+#include "macros.h"
+#include "option.h"
 
-bool RunModel(std::string model_file, std::string image_file, size_t warmup,
-              size_t repeats, size_t sampling_interval) {
+int main(int argc, char* argv[]) {
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  auto im = cv::imread(FLAGS_image);
   // Initialization
   auto option = fastdeploy::RuntimeOption();
   if (!CreateRuntimeOption(&option)) {
     PrintUsage();
     return false;
   }
-  if (FLAGS_profile_mode == "runtime") {
-    option.EnableProfiling(FLAGS_include_h2d_d2h, repeats, warmup);
-  }
-  auto model = fastdeploy::vision::detection::YOLOv5(model_file, "", option);
-  if (!model.Initialized()) {
-    std::cerr << "Failed to initialize." << std::endl;
-    return false;
-  }
-  auto im = cv::imread(image_file);
-  // For collect memory info
-  fastdeploy::benchmark::ResourceUsageMonitor resource_moniter(
-      sampling_interval, FLAGS_device_id);
-  if (FLAGS_collect_memory_info) {
-    resource_moniter.Start();
-  }
-  // For Runtime
-  if (FLAGS_profile_mode == "runtime") {
-    fastdeploy::vision::DetectionResult res;
-    if (!model.Predict(im, &res)) {
-      std::cerr << "Failed to predict." << std::endl;
-      return false;
-    }
-    double profile_time = model.GetProfileTime() * 1000;
-    std::cout << "Runtime(ms): " << profile_time << "ms." << std::endl;
-    auto vis_im = fastdeploy::vision::VisDetection(im, res);
-    cv::imwrite("vis_result.jpg", vis_im);
-    std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
-  } else {
-    // For End2End
-    // Step1: warm up for warmup times
-    std::cout << "Warmup " << warmup << " times..." << std::endl;
-    for (int i = 0; i < warmup; i++) {
-      fastdeploy::vision::DetectionResult res;
-      if (!model.Predict(im, &res)) {
-        std::cerr << "Failed to predict." << std::endl;
-        return false;
-      }
-    }
-    // Step2: repeat for repeats times
-    std::cout << "Counting time..." << std::endl;
-    std::cout << "Repeat " << repeats << " times..." << std::endl;
-    fastdeploy::vision::DetectionResult res;
-    fastdeploy::TimeCounter tc;
-    tc.Start();
-    for (int i = 0; i < repeats; i++) {
-      if (!model.Predict(im, &res)) {
-        std::cerr << "Failed to predict." << std::endl;
-        return false;
-      }
-    }
-    tc.End();
-    double end2end = tc.Duration() / repeats * 1000;
-    std::cout << "End2End(ms): " << end2end << "ms." << std::endl;
-    auto vis_im = fastdeploy::vision::VisDetection(im, res);
-    cv::imwrite("vis_result.jpg", vis_im);
-    std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
-  }
-  if (FLAGS_collect_memory_info) {
-    float cpu_mem = resource_moniter.GetMaxCpuMem();
-    float gpu_mem = resource_moniter.GetMaxGpuMem();
-    float gpu_util = resource_moniter.GetMaxGpuUtil();
-    std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
-    std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
-    std::cout << "gpu_util: " << gpu_util << std::endl;
-    resource_moniter.Stop();
-  }
-
-  return true;
-}
-
-int main(int argc, char* argv[]) {
-  google::ParseCommandLineFlags(&argc, &argv, true);
-  int repeats = FLAGS_repeat;
-  int warmup = FLAGS_warmup;
-  int sampling_interval = FLAGS_sampling_interval;
-  // Run model
-  if (!RunModel(FLAGS_model, FLAGS_image, warmup, repeats, sampling_interval)) {
-    exit(1);
-  }
+  auto model_yolov5 =
+      fastdeploy::vision::detection::YOLOv5(FLAGS_model, "", option);
+  fastdeploy::vision::DetectionResult res;
+  BENCHMARK_MODEL(model_yolov5, model_yolov5.Predict(im, &res))
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
   return 0;
-}
\ No newline at end of file
+}
diff --git a/benchmark/cpp/flags.h b/benchmark/cpp/flags.h
index 64f22c702..7f8c3a29f 100755
--- a/benchmark/cpp/flags.h
+++ b/benchmark/cpp/flags.h
@@ -15,7 +15,6 @@
 #pragma once
 
 #include "gflags/gflags.h"
-#include "fastdeploy/utils/perf.h"
 
 DEFINE_string(model, "", "Directory of the inference model.");
 DEFINE_string(image, "", "Path of the image file.");
@@ -49,75 +48,3 @@ void PrintUsage() {
   std::cout << "Default value of backend: default" << std::endl;
   std::cout << "Default value of use_fp16: false" << std::endl;
 }
-
-bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
-  if (FLAGS_device == "gpu") {
-    option->UseGpu(FLAGS_device_id);
-    if (FLAGS_backend == "ort") {
-      option->UseOrtBackend();
-    } else if (FLAGS_backend == "paddle") {
-      option->UsePaddleInferBackend();
-    } else if (FLAGS_backend == "trt" || FLAGS_backend == "paddle_trt") {
-      option->UseTrtBackend();
-      if (FLAGS_backend == "paddle_trt") {
-        option->EnablePaddleToTrt();
-      }
-      if (FLAGS_use_fp16) {
-        option->EnableTrtFP16();
-      }
-    } else if (FLAGS_backend == "default") {
-      return true;
-    } else {
-      std::cout << "While inference with GPU, only support "
-                   "default/ort/paddle/trt/paddle_trt now, "
-                << FLAGS_backend << " is not supported." << std::endl;
-      return false;
-    }
-  } else if (FLAGS_device == "cpu") {
-    option->SetCpuThreadNum(FLAGS_cpu_thread_nums);
-    if (FLAGS_backend == "ort") {
-      option->UseOrtBackend();
-    } else if (FLAGS_backend == "ov") {
-      option->UseOpenVINOBackend();
-    } else if (FLAGS_backend == "paddle") {
-      option->UsePaddleInferBackend();
-    } else if (FLAGS_backend == "lite") {
-      option->UsePaddleLiteBackend();
-      if (FLAGS_use_fp16) {
-        option->EnableLiteFP16();
-      }
-    } else if (FLAGS_backend == "default") {
-      return true;
-    } else {
-      std::cout << "While inference with CPU, only support "
-                   "default/ort/ov/paddle/lite now, "
-                << FLAGS_backend << " is not supported." << std::endl;
-      return false;
-    }
-  } else if (FLAGS_device == "xpu") {
-    option->UseKunlunXin(FLAGS_device_id);
-    if (FLAGS_backend == "ort") {
-      option->UseOrtBackend();
-    } else if (FLAGS_backend == "paddle") {
-      option->UsePaddleInferBackend();
-    } else if (FLAGS_backend == "lite") {
-      option->UsePaddleLiteBackend();
-      if (FLAGS_use_fp16) {
-        option->EnableLiteFP16();
-      }
-    } else if (FLAGS_backend == "default") {
-      return true;
-    } else {
-      std::cout << "While inference with XPU, only support "
-                   "default/ort/paddle/lite now, "
-                << FLAGS_backend << " is not supported." << std::endl;
-      return false;
-    }
-  } else {
-    std::cerr << "Only support device CPU/GPU/XPU now, " << FLAGS_device
-              << " is not supported." << std::endl;
-    return false;
-  }
-
-  return true;
-}
diff --git a/benchmark/cpp/macros.h b/benchmark/cpp/macros.h
new file mode 100755
index 000000000..bebd26e0d
--- /dev/null
+++ b/benchmark/cpp/macros.h
@@ -0,0 +1,70 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include "fastdeploy/benchmark/utils.h"
+#include "fastdeploy/utils/perf.h"
+
+#define BENCHMARK_MODEL(MODEL_NAME, BENCHMARK_FUNC)                         \
+{                                                                           \
+  std::cout << "====" << #MODEL_NAME << "====" << std::endl;                \
+  if (!MODEL_NAME.Initialized()) {                                          \
+    std::cerr << "Failed to initialize." << std::endl;                      \
+    return 0;                                                               \
+  }                                                                         \
+  auto __im__ = cv::imread(FLAGS_image);                                    \
+  fastdeploy::benchmark::ResourceUsageMonitor __resource_moniter__(         \
+      FLAGS_sampling_interval, FLAGS_device_id);                            \
+  if (FLAGS_collect_memory_info) {                                          \
+    __resource_moniter__.Start();                                           \
+  }                                                                         \
+  if (FLAGS_profile_mode == "runtime") {                                    \
+    if (!BENCHMARK_FUNC) {                                                  \
+      std::cerr << "Failed to predict." << std::endl;                       \
+      return 0;                                                             \
+    }                                                                       \
+    double __profile_time__ = MODEL_NAME.GetProfileTime() * 1000;           \
+    std::cout << "Runtime(ms): " << __profile_time__ << "ms." << std::endl; \
+  } else {                                                                  \
+    std::cout << "Warmup " << FLAGS_warmup << " times..." << std::endl;     \
+    for (int __i__ = 0; __i__ < FLAGS_warmup; __i__++) {                    \
+      if (!BENCHMARK_FUNC) {                                                \
+        std::cerr << "Failed to predict." << std::endl;                     \
+        return 0;                                                           \
+      }                                                                     \
+    }                                                                       \
+    std::cout << "Counting time..." << std::endl;                           \
+    std::cout << "Repeat " << FLAGS_repeat << " times..." << std::endl;     \
+    fastdeploy::TimeCounter __tc__;                                         \
+    __tc__.Start();                                                         \
+    for (int __i__ = 0; __i__ < FLAGS_repeat; __i__++) {                    \
+      if (!BENCHMARK_FUNC) {                                                \
+        std::cerr << "Failed to predict." << std::endl;                     \
+        return 0;                                                           \
+      }                                                                     \
+    }                                                                       \
+    __tc__.End();                                                           \
+    double __end2end__ = __tc__.Duration() / FLAGS_repeat * 1000;           \
+    std::cout << "End2End(ms): " << __end2end__ << "ms." << std::endl;      \
+  }                                                                         \
+  if (FLAGS_collect_memory_info) {                                          \
+    float __cpu_mem__ = __resource_moniter__.GetMaxCpuMem();                \
+    float __gpu_mem__ = __resource_moniter__.GetMaxGpuMem();                \
+    float __gpu_util__ = __resource_moniter__.GetMaxGpuUtil();              \
+    std::cout << "cpu_pss_mb: " << __cpu_mem__ << "MB." << std::endl;       \
+    std::cout << "gpu_pss_mb: " << __gpu_mem__ << "MB." << std::endl;       \
+    std::cout << "gpu_util: " << __gpu_util__ << std::endl;                 \
+    __resource_moniter__.Stop();                                            \
+  }                                                                         \
+}
diff --git a/benchmark/cpp/option.h b/benchmark/cpp/option.h
new file mode 100755
index 000000000..9989255e5
--- /dev/null
+++ b/benchmark/cpp/option.h
@@ -0,0 +1,92 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "fastdeploy/vision.h"
+
+static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
+  if (FLAGS_profile_mode == "runtime") {
+    option->EnableProfiling(FLAGS_include_h2d_d2h, FLAGS_repeat, FLAGS_warmup);
+  }
+  if (FLAGS_device == "gpu") {
+    option->UseGpu(FLAGS_device_id);
+    if (FLAGS_backend == "ort") {
+      option->UseOrtBackend();
+    } else if (FLAGS_backend == "paddle") {
+      option->UsePaddleInferBackend();
+    } else if (FLAGS_backend == "trt" || FLAGS_backend == "paddle_trt") {
+      option->UseTrtBackend();
+      if (FLAGS_backend == "paddle_trt") {
+        option->EnablePaddleToTrt();
+      }
+      if (FLAGS_use_fp16) {
+        option->EnableTrtFP16();
+      }
+    } else if (FLAGS_backend == "default") {
+      return true;
+    } else {
+      std::cout << "While inference with GPU, only support "
+                   "default/ort/paddle/trt/paddle_trt now, "
+                << FLAGS_backend << " is not supported." << std::endl;
+      return false;
+    }
+  } else if (FLAGS_device == "cpu") {
+    option->SetCpuThreadNum(FLAGS_cpu_thread_nums);
+    if (FLAGS_backend == "ort") {
+      option->UseOrtBackend();
+    } else if (FLAGS_backend == "ov") {
+      option->UseOpenVINOBackend();
+    } else if (FLAGS_backend == "paddle") {
+      option->UsePaddleInferBackend();
+    } else if (FLAGS_backend == "lite") {
+      option->UsePaddleLiteBackend();
+      if (FLAGS_use_fp16) {
+        option->EnableLiteFP16();
+      }
+    } else if (FLAGS_backend == "default") {
+      return true;
+    } else {
+      std::cout << "While inference with CPU, only support "
+                   "default/ort/ov/paddle/lite now, "
+                << FLAGS_backend << " is not supported." << std::endl;
+      return false;
+    }
+  } else if (FLAGS_device == "xpu") {
+    option->UseKunlunXin(FLAGS_device_id);
+    if (FLAGS_backend == "ort") {
+      option->UseOrtBackend();
+    } else if (FLAGS_backend == "paddle") {
+      option->UsePaddleInferBackend();
+    } else if (FLAGS_backend == "lite") {
+      option->UsePaddleLiteBackend();
+      if (FLAGS_use_fp16) {
+        option->EnableLiteFP16();
+      }
+    } else if (FLAGS_backend == "default") {
+      return true;
+    } else {
+      std::cout << "While inference with XPU, only support "
+                   "default/ort/paddle/lite now, "
+                << FLAGS_backend << " is not supported." << std::endl;
+      return false;
+    }
+  } else {
+    std::cerr << "Only support device CPU/GPU/XPU now, " << FLAGS_device
+              << " is not supported." << std::endl;
+    return false;
+  }
+
+  return true;
+}