[Benchmark]Add ResourceUsageMonitor to collect memory info (#1269)

* add GPL lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * support yolov8 * add pybind for yolov8 * add yolov8 readme * add cpp benchmark * add cpu and gpu mem * public part split * add runtime mode * fixed bugs * add cpu_thread_nums * deal with comments * deal with comments * deal with comments * rm useless code * add FASTDEPLOY_DECL * add FASTDEPLOY_DECL * fixed for windows * mv rss to pss * mv rss to pss * Update utils.cc * use thread to collect mem * Add ResourceUsageMonitor * rm useless code * fixed bug * fixed typo * update ResourceUsageMonitor * fixed bug * fixed bug * add note for ResourceUsageMonitor * deal with comments --------- Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
2025-10-20 23:29:39 +08:00 · 2023-02-10 17:13:22 +08:00
parent b732e4c711
commit 59a4ab343f
6 changed files with 179 additions and 93 deletions
--- a/benchmark/cpp/benchmark_yolov5.cc
+++ b/benchmark/cpp/benchmark_yolov5.cc
@@ -17,8 +17,7 @@
 #include "flags.h"

 bool RunModel(std::string model_file, std::string image_file, size_t warmup,
-              size_t repeats, size_t dump_period, std::string cpu_mem_file_name,
-              std::string gpu_mem_file_name) {
+              size_t repeats, size_t sampling_interval) {
  // Initialization
  auto option = fastdeploy::RuntimeOption();
  if (!CreateRuntimeOption(&option)) {
@@ -34,6 +33,12 @@ bool RunModel(std::string model_file, std::string image_file, size_t warmup,
    return false;
  }
  auto im = cv::imread(image_file);
+  // For collect memory info
+  fastdeploy::benchmark::ResourceUsageMonitor resource_moniter(
+      sampling_interval, FLAGS_device_id);
+  if (FLAGS_collect_memory_info) {
+    resource_moniter.Start();
+  }
  // For Runtime
  if (FLAGS_profile_mode == "runtime") {
    fastdeploy::vision::DetectionResult res;
@@ -57,35 +62,34 @@ bool RunModel(std::string model_file, std::string image_file, size_t warmup,
        return false;
      }
    }
-    std::vector<float> end2end_statis;
    // Step2: repeat for repeats times
    std::cout << "Counting time..." << std::endl;
-    fastdeploy::TimeCounter tc;
+    std::cout << "Repeat " << repeats << " times..." << std::endl;
    fastdeploy::vision::DetectionResult res;
-    for (int i = 0; i < repeats; i++) {
-      if (FLAGS_collect_memory_info && i % dump_period == 0) {
-        fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name);
-#if defined(WITH_GPU)
-        fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name,
-                                                         FLAGS_device_id);
-#endif
-      }
+    fastdeploy::TimeCounter tc;
    tc.Start();
+    for (int i = 0; i < repeats; i++) {
      if (!model.Predict(im, &res)) {
        std::cerr << "Failed to predict." << std::endl;
        return false;
      }
-      tc.End();
-      end2end_statis.push_back(tc.Duration() * 1000);
    }
-    float end2end = std::accumulate(end2end_statis.end() - repeats,
-                                    end2end_statis.end(), 0.f) /
-                    repeats;
+    tc.End();
+    double end2end = tc.Duration() / repeats * 1000;
    std::cout << "End2End(ms): " << end2end << "ms." << std::endl;
    auto vis_im = fastdeploy::vision::VisDetection(im, res);
    cv::imwrite("vis_result.jpg", vis_im);
    std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
  }
+  if (FLAGS_collect_memory_info) {
+    float cpu_mem = resource_moniter.GetMaxCpuMem();
+    float gpu_mem = resource_moniter.GetMaxGpuMem();
+    float gpu_util = resource_moniter.GetMaxGpuUtil();
+    std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
+    std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
+    std::cout << "gpu_util: " << gpu_util << std::endl;
+    resource_moniter.Stop();
+  }

  return true;
 }
@@ -94,21 +98,10 @@ int main(int argc, char* argv[]) {
  google::ParseCommandLineFlags(&argc, &argv, true);
  int repeats = FLAGS_repeat;
  int warmup = FLAGS_warmup;
-  int dump_period = FLAGS_dump_period;
-  std::string cpu_mem_file_name = "result_cpu.txt";
-  std::string gpu_mem_file_name = "result_gpu.txt";
+  int sampling_interval = FLAGS_sampling_interval;
  // Run model
-  if (RunModel(FLAGS_model, FLAGS_image, warmup, repeats, dump_period,
-               cpu_mem_file_name, gpu_mem_file_name) != true) {
+  if (!RunModel(FLAGS_model, FLAGS_image, warmup, repeats, sampling_interval)) {
    exit(1);
  }
-  if (FLAGS_collect_memory_info) {
-    float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name);
-    std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
-#if defined(WITH_GPU)
-    float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name);
-    std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
-#endif
-  }
  return 0;
 }
--- a/benchmark/cpp/flags.h
+++ b/benchmark/cpp/flags.h
@@ -37,7 +37,7 @@ DEFINE_bool(
    "and 'lite' backend");
 DEFINE_bool(
    collect_memory_info, false, "Whether to collect memory info");
-DEFINE_int32(dump_period, 100, "How often to collect memory info.");
+DEFINE_int32(sampling_interval, 50, "How often to collect memory info(ms).");

 void PrintUsage() {
  std::cout << "Usage: infer_demo --model model_path --image img_path --device "
--- a/fastdeploy/benchmark/utils.cc
+++ b/fastdeploy/benchmark/utils.cc
@@ -36,86 +36,131 @@ static std::string strip(const std::string& str, char ch = ' ') {
  return str.substr(i, j + 1 - i);
 }

-void DumpCurrentCpuMemoryUsage(const std::string& name) {
+// Split string
+static void split(const std::string& s, std::vector<std::string>& tokens,
+                  char delim = ' ') {
+  tokens.clear();
+  size_t lastPos = s.find_first_not_of(delim, 0);
+  size_t pos = s.find(delim, lastPos);
+  while (lastPos != std::string::npos) {
+    tokens.emplace_back(s.substr(lastPos, pos - lastPos));
+    lastPos = s.find_first_not_of(delim, pos);
+    pos = s.find(delim, lastPos);
+  }
+  return;
+}
+
+ResourceUsageMonitor::ResourceUsageMonitor(int sampling_interval_ms, int gpu_id)
+    : is_supported_(false),
+      sampling_interval_(sampling_interval_ms),
+      gpu_id_(gpu_id) {
+#if defined(__linux__) || defined(__ANDROID__)
+  is_supported_ = true;
+#else
+  is_supported_ = false;
+#endif
+  if (!is_supported_) {
+    FDASSERT(false,
+             "Currently ResourceUsageMonitor only supports Linux and ANDROID.")
+    return;
+  }
+}
+
+void ResourceUsageMonitor::Start() {
+  if (!is_supported_) return;
+  if (check_memory_thd_ != nullptr) {
+    FDINFO << "Memory monitoring has already started!" << std::endl;
+    return;
+  }
+  FDINFO << "Start monitoring memory!" << std::endl;
+  stop_signal_ = false;
+  check_memory_thd_.reset(new std::thread(([this]() {
+    // Note we retrieve the memory usage at the very beginning of the thread.
+    while (true) {
+      std::string cpu_mem_info = GetCurrentCpuMemoryInfo();
+      // get max_cpu_mem
+      std::vector<std::string> cpu_tokens;
+      split(cpu_mem_info, cpu_tokens, ' ');
+      max_cpu_mem_ = std::max(max_cpu_mem_, stof(cpu_tokens[3]) / 1024);
+#if defined(WITH_GPU)
+      std::string gpu_mem_info = GetCurrentGpuMemoryInfo(gpu_id_);
+      // get max_gpu_mem and max_gpu_util
+      std::vector<std::string> gpu_tokens;
+      split(gpu_mem_info, gpu_tokens, ',');
+      max_gpu_mem_ = std::max(max_gpu_mem_, stof(gpu_tokens[6]));
+      max_gpu_util_ = std::max(max_gpu_util_, stof(gpu_tokens[7]));
+#endif
+      if (stop_signal_) break;
+      std::this_thread::sleep_for(
+          std::chrono::milliseconds(sampling_interval_));
+    }
+  })));
+}
+
+void ResourceUsageMonitor::Stop() {
+  if (!is_supported_) {
+    return;
+  }
+  if (check_memory_thd_ == nullptr) {
+    FDINFO << "Memory monitoring hasn't started yet or has stopped!"
+           << std::endl;
+    return;
+  }
+  FDINFO << "Stop monitoring memory!" << std::endl;
+  StopInternal();
+}
+
+void ResourceUsageMonitor::StopInternal() {
+  stop_signal_ = true;
+  if (check_memory_thd_ == nullptr) {
+    return;
+  }
+  if (check_memory_thd_ != nullptr) {
+    check_memory_thd_->join();
+  }
+  check_memory_thd_.reset(nullptr);
+}
+
+std::string ResourceUsageMonitor::GetCurrentCpuMemoryInfo() {
+  std::string result = "";
 #if defined(__linux__) || defined(__ANDROID__)
  int iPid = static_cast<int>(getpid());
  std::string command = "pmap -x " + std::to_string(iPid) + " | grep total";
  FILE* pp = popen(command.data(), "r");
-  if (!pp) return;
+  if (!pp) return "";
  char tmp[1024];

  while (fgets(tmp, sizeof(tmp), pp) != NULL) {
-    std::ofstream write;
-    write.open(name, std::ios::app);
-    write << tmp;
-    write.close();
+    result += tmp;
  }
  pclose(pp);
 #else
  FDASSERT(false,
           "Currently collect cpu memory info only supports Linux and ANDROID.")
 #endif
-  return;
+  return result;
 }

-void DumpCurrentGpuMemoryUsage(const std::string& name, int device_id) {
+std::string ResourceUsageMonitor::GetCurrentGpuMemoryInfo(int device_id) {
+  std::string result = "";
 #if defined(__linux__) && defined(WITH_GPU)
  std::string command = "nvidia-smi --id=" + std::to_string(device_id) +
                        " --query-gpu=index,uuid,name,timestamp,memory.total,"
                        "memory.free,memory.used,utilization.gpu,utilization."
                        "memory --format=csv,noheader,nounits";
  FILE* pp = popen(command.data(), "r");
-  if (!pp) return;
+  if (!pp) return "";
  char tmp[1024];

  while (fgets(tmp, sizeof(tmp), pp) != NULL) {
-    std::ofstream write;
-    write.open(name, std::ios::app);
-    write << tmp;
-    write.close();
+    result += tmp;
  }
  pclose(pp);
 #else
  FDASSERT(false,
           "Currently collect gpu memory info only supports Linux in GPU.")
 #endif
-  return;
-}
-
-float GetCpuMemoryUsage(const std::string& name) {
-  std::ifstream read(name);
-  std::string line;
-  float max_cpu_mem = -1;
-  while (getline(read, line)) {
-    std::stringstream ss(line);
-    std::string tmp;
-    std::vector<std::string> nums;
-    while (getline(ss, tmp, ' ')) {
-      tmp = strip(tmp);
-      if (tmp.empty()) continue;
-      nums.push_back(tmp);
-    }
-    max_cpu_mem = std::max(max_cpu_mem, stof(nums[3]));
-  }
-  return max_cpu_mem / 1024;
-}
-
-float GetGpuMemoryUsage(const std::string& name) {
-  std::ifstream read(name);
-  std::string line;
-  float max_gpu_mem = -1;
-  while (getline(read, line)) {
-    std::stringstream ss(line);
-    std::string tmp;
-    std::vector<std::string> nums;
-    while (getline(ss, tmp, ',')) {
-      tmp = strip(tmp);
-      if (tmp.empty()) continue;
-      nums.push_back(tmp);
-    }
-    max_gpu_mem = std::max(max_gpu_mem, stof(nums[6]));
-  }
-  return max_gpu_mem;
+  return result;
 }

 }  // namespace benchmark
--- a/fastdeploy/benchmark/utils.h
+++ b/fastdeploy/benchmark/utils.h
@@ -13,23 +13,72 @@
 // limitations under the License.
 #pragma once

+#include <memory>
+#include <thread>  // NOLINT
 #include "fastdeploy/utils/utils.h"

 namespace fastdeploy {
 namespace benchmark {
+/*! @brief ResourceUsageMonitor object used when to collect memory info.
+ */
+class FASTDEPLOY_DECL ResourceUsageMonitor {
+ public:
+   /** \brief  Set sampling_interval_ms and gpu_id for ResourceUsageMonitor.
+   *
+   * \param[in] sampling_interval_ms How often to collect memory info(ms).
+   * \param[in] gpu_id Device(gpu) id, default 0.
+   */
+  explicit ResourceUsageMonitor(int sampling_interval_ms, int gpu_id = 0);

-// Record current cpu memory usage into file
-FASTDEPLOY_DECL void DumpCurrentCpuMemoryUsage(const std::string& name);
+  ~ResourceUsageMonitor() { StopInternal(); }

-// Record current gpu memory usage into file
-FASTDEPLOY_DECL void DumpCurrentGpuMemoryUsage(const std::string& name,
-                                               int device_id);
+  /// Start memory info collect
+  void Start();
+  /// Stop memory info collect
+  void Stop();
+  /// Get maximum cpu memory usage
+  float GetMaxCpuMem() const {
+    if (!is_supported_ || check_memory_thd_ == nullptr) {
+      return -1.0f;
+    }
+    return max_cpu_mem_;
+  }
+  /// Get maximum gpu memory usage
+  float GetMaxGpuMem() const {
+    if (!is_supported_ || check_memory_thd_ == nullptr) {
+      return -1.0f;
+    }
+    return max_gpu_mem_;
+  }
+  /// Get maximum gpu util
+  float GetMaxGpuUtil() const {
+    if (!is_supported_ || check_memory_thd_ == nullptr) {
+      return -1.0f;
+    }
+    return max_gpu_util_;
+  }

-// Get Max cpu memory usage
-FASTDEPLOY_DECL float GetCpuMemoryUsage(const std::string& name);
+  ResourceUsageMonitor(ResourceUsageMonitor&) = delete;
+  ResourceUsageMonitor& operator=(const ResourceUsageMonitor&) = delete;
+  ResourceUsageMonitor(ResourceUsageMonitor&&) = delete;
+  ResourceUsageMonitor& operator=(const ResourceUsageMonitor&&) = delete;

-// Get Max gpu memory usage
-FASTDEPLOY_DECL float GetGpuMemoryUsage(const std::string& name);
+ private:
+  void StopInternal();
+  // Get current cpu memory info
+  std::string GetCurrentCpuMemoryInfo();
+  // Get current gpu memory info
+  std::string GetCurrentGpuMemoryInfo(int device_id);
+
+  bool is_supported_ = false;
+  bool stop_signal_ = false;
+  const int sampling_interval_;
+  float max_cpu_mem_ = 0.0f;
+  float max_gpu_mem_ = 0.0f;
+  float max_gpu_util_ = 0.0f;
+  const int gpu_id_ = 0;
+  std::unique_ptr<std::thread> check_memory_thd_ = nullptr;
+};

 }  // namespace benchmark
 }  // namespace fastdeploy
--- a/fastdeploy/runtime/runtime_option.h
+++ b/fastdeploy/runtime/runtime_option.h
@@ -198,7 +198,6 @@ struct FASTDEPLOY_DECL RuntimeOption {

  // *** The belowing api are deprecated, will be removed in v1.2.0
  // *** Do not use it anymore
-
  void SetPaddleMKLDNN(bool pd_mkldnn = true);
  void EnablePaddleToTrt();
  void DeletePaddleBackendPass(const std::string& delete_pass_name);
--- a/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h
+++ b/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h
@@ -56,7 +56,7 @@ class FASTDEPLOY_DECL RKYOLOPostprocessor {
  float GetNMSThreshold() const { return nms_threshold_; }

  /// Set height and weight
-  void SetHeightAndWeight(int height,int width) {
+  void SetHeightAndWeight(int height, int width) {
    height_ = height;
    width_ = width;
  }