[Benchmark] Remove pmap and use mem api (#1309)

* add GPL lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * support yolov8 * add pybind for yolov8 * add yolov8 readme * add cpp benchmark * add cpu and gpu mem * public part split * add runtime mode * fixed bugs * add cpu_thread_nums * deal with comments * deal with comments * deal with comments * rm useless code * add FASTDEPLOY_DECL * add FASTDEPLOY_DECL * fixed for windows * mv rss to pss * mv rss to pss * Update utils.cc * use thread to collect mem * Add ResourceUsageMonitor * rm useless code * fixed bug * fixed typo * update ResourceUsageMonitor * fixed bug * fixed bug * add note for ResourceUsageMonitor * deal with comments * add macros * deal with comments * deal with comments * deal with comments * re-lint * rm pmap and use mem api * rm pmap and use mem api * add mem api * Add PrintBenchmarkInfo func * Add PrintBenchmarkInfo func * Add PrintBenchmarkInfo func * deal with comments --------- Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
2025-10-06 00:57:33 +08:00 · 2023-02-14 15:00:05 +08:00
parent abae858f09
commit 2dfda1db85
6 changed files with 74 additions and 51 deletions
--- a/benchmark/cpp/benchmark_ppyolov8.cc
+++ b/benchmark/cpp/benchmark_ppyolov8.cc
@@ -12,16 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "flags.h"
 #include "macros.h"
+#include "flags.h"
 #include "option.h"

-#ifdef WIN32
-const char sep = '\\';
-#else
-const char sep = '/';
-#endif
-
 int main(int argc, char* argv[]) {
  google::ParseCommandLineFlags(&argc, &argv, true);
  auto im = cv::imread(FLAGS_image);
@@ -31,6 +25,7 @@ int main(int argc, char* argv[]) {
    PrintUsage();
    return false;
  }
+  PrintBenchmarkInfo();
  auto model_file = FLAGS_model + sep + "model.pdmodel";
  auto params_file = FLAGS_model + sep + "model.pdiparams";
  auto config_file = FLAGS_model + sep + "infer_cfg.yml";
--- a/benchmark/cpp/benchmark_yolov5.cc
+++ b/benchmark/cpp/benchmark_yolov5.cc
@@ -12,8 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "flags.h"
 #include "macros.h"
+#include "flags.h"
 #include "option.h"

 int main(int argc, char* argv[]) {
@@ -25,6 +25,7 @@ int main(int argc, char* argv[]) {
    PrintUsage();
    return false;
  }
+  PrintBenchmarkInfo();
  auto model_yolov5 =
      fastdeploy::vision::detection::YOLOv5(FLAGS_model, "", option);
  fastdeploy::vision::DetectionResult res;
--- a/benchmark/cpp/flags.h
+++ b/benchmark/cpp/flags.h
@@ -16,6 +16,12 @@

 #include "gflags/gflags.h"

+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
 DEFINE_string(model, "", "Directory of the inference model.");
 DEFINE_string(image, "", "Path of the image file.");
 DEFINE_string(device, "cpu",
@@ -48,3 +54,35 @@ void PrintUsage() {
  std::cout << "Default value of backend: default" << std::endl;
  std::cout << "Default value of use_fp16: false" << std::endl;
 }
+
+void PrintBenchmarkInfo() {
+  // Get model name
+  std::vector<std::string> model_names;
+  fastdeploy::benchmark::Split(FLAGS_model, model_names, sep);
+  // Save benchmark info
+  std::stringstream ss;
+  ss.precision(3);
+  ss << "\n======= Model Info =======\n";
+  ss << "model_name: " << model_names[model_names.size() - 1] << std::endl;
+  ss << "profile_mode: " << FLAGS_profile_mode << std::endl;
+  if (FLAGS_profile_mode == "runtime") {
+    ss << "include_h2d_d2h: " << FLAGS_include_h2d_d2h << std::endl;
+  }
+  ss << "\n======= Backend Info =======\n";
+  ss << "warmup: " << FLAGS_warmup << std::endl;
+  ss << "repeats: " << FLAGS_repeat << std::endl;
+  ss << "device: " << FLAGS_device << std::endl;
+  if (FLAGS_device == "gpu") {
+    ss << "device_id: " << FLAGS_device_id << std::endl;
+  }
+  ss << "backend: " << FLAGS_backend << std::endl;
+  ss << "cpu_thread_nums: " << FLAGS_cpu_thread_nums << std::endl;
+  ss << "use_fp16: " << FLAGS_use_fp16 << std::endl;
+  ss << "collect_memory_info: " << FLAGS_collect_memory_info << std::endl;
+  if (FLAGS_collect_memory_info) {
+    ss << "sampling_interval: " << std::to_string(FLAGS_sampling_interval)
+       << "ms" << std::endl;
+  }
+  std::cout << ss.str() << std::endl;
+  return;
+}
--- a/benchmark/cpp/macros.h
+++ b/benchmark/cpp/macros.h
@@ -18,7 +18,6 @@

 #define BENCHMARK_MODEL(MODEL_NAME, BENCHMARK_FUNC)                         \
 {                                                                           \
-  std::cout << "====" << #MODEL_NAME << "====" << std::endl;                \
  if (!MODEL_NAME.Initialized()) {                                          \
    std::cerr << "Failed to initialize." << std::endl;                      \
    return 0;                                                               \
@@ -62,8 +61,8 @@
    float __cpu_mem__ = __resource_moniter__.GetMaxCpuMem();                \
    float __gpu_mem__ = __resource_moniter__.GetMaxGpuMem();                \
    float __gpu_util__ = __resource_moniter__.GetMaxGpuUtil();              \
-    std::cout << "cpu_pss_mb: " << __cpu_mem__ << "MB." << std::endl;       \
-    std::cout << "gpu_pss_mb: " << __gpu_mem__ << "MB." << std::endl;       \
+    std::cout << "cpu_rss_mb: " << __cpu_mem__ << "MB." << std::endl;       \
+    std::cout << "gpu_rss_mb: " << __gpu_mem__ << "MB." << std::endl;       \
    std::cout << "gpu_util: " << __gpu_util__ << std::endl;                 \
    __resource_moniter__.Stop();                                            \
  }                                                                         \
--- a/fastdeploy/benchmark/utils.cc
+++ b/fastdeploy/benchmark/utils.cc
@@ -13,8 +13,8 @@
 // limitations under the License.

 #include <sys/types.h>
-#if defined(__linux__) || defined(__ANDROID__)
-#include <unistd.h>
+#ifdef __linux__
+#include <sys/resource.h>
 #endif
 #include <cmath>

@@ -23,8 +23,7 @@
 namespace fastdeploy {
 namespace benchmark {

-// Remove the ch characters at both ends of str
-static std::string strip(const std::string& str, char ch = ' ') {
+std::string Strip(const std::string& str, char ch) {
  int i = 0;
  while (str[i] == ch) {
    i++;
@@ -36,9 +35,8 @@ static std::string strip(const std::string& str, char ch = ' ') {
  return str.substr(i, j + 1 - i);
 }

-// Split string
-static void split(const std::string& s, std::vector<std::string>& tokens,
-                  char delim = ' ') {
+void Split(const std::string& s, std::vector<std::string>& tokens,
+           char delim) {
  tokens.clear();
  size_t lastPos = s.find_first_not_of(delim, 0);
  size_t pos = s.find(delim, lastPos);
@@ -54,7 +52,7 @@ ResourceUsageMonitor::ResourceUsageMonitor(int sampling_interval_ms, int gpu_id)
    : is_supported_(false),
      sampling_interval_(sampling_interval_ms),
      gpu_id_(gpu_id) {
-#if defined(__linux__) || defined(__ANDROID__)
+#ifdef __linux__
  is_supported_ = true;
 #else
  is_supported_ = false;
@@ -67,7 +65,9 @@ ResourceUsageMonitor::ResourceUsageMonitor(int sampling_interval_ms, int gpu_id)
 }

 void ResourceUsageMonitor::Start() {
-  if (!is_supported_) return;
+  if (!is_supported_) {
+    return;
+  }
  if (check_memory_thd_ != nullptr) {
    FDINFO << "Memory monitoring has already started!" << std::endl;
    return;
@@ -77,20 +77,24 @@ void ResourceUsageMonitor::Start() {
  check_memory_thd_.reset(new std::thread(([this]() {
    // Note we retrieve the memory usage at the very beginning of the thread.
    while (true) {
-      std::string cpu_mem_info = GetCurrentCpuMemoryInfo();
-      // get max_cpu_mem
-      std::vector<std::string> cpu_tokens;
-      split(cpu_mem_info, cpu_tokens, ' ');
-      max_cpu_mem_ = std::max(max_cpu_mem_, stof(cpu_tokens[3]) / 1024);
+#ifdef __linux__
+      rusage res;
+      if (getrusage(RUSAGE_SELF, &res) == 0) {
+        max_cpu_mem_ =
+            std::max(max_cpu_mem_, static_cast<float>(res.ru_maxrss / 1024.0));
+      }
+#endif
 #if defined(WITH_GPU)
      std::string gpu_mem_info = GetCurrentGpuMemoryInfo(gpu_id_);
      // get max_gpu_mem and max_gpu_util
      std::vector<std::string> gpu_tokens;
-      split(gpu_mem_info, gpu_tokens, ',');
+      Split(gpu_mem_info, gpu_tokens, ',');
      max_gpu_mem_ = std::max(max_gpu_mem_, stof(gpu_tokens[6]));
      max_gpu_util_ = std::max(max_gpu_util_, stof(gpu_tokens[7]));
 #endif
-      if (stop_signal_) break;
+      if (stop_signal_) {
+        break;
+      }
      std::this_thread::sleep_for(
          std::chrono::milliseconds(sampling_interval_));
    }
@@ -121,26 +125,6 @@ void ResourceUsageMonitor::StopInternal() {
  check_memory_thd_.reset(nullptr);
 }

-std::string ResourceUsageMonitor::GetCurrentCpuMemoryInfo() {
-  std::string result = "";
-#if defined(__linux__) || defined(__ANDROID__)
-  int iPid = static_cast<int>(getpid());
-  std::string command = "pmap -x " + std::to_string(iPid) + " | grep total";
-  FILE* pp = popen(command.data(), "r");
-  if (!pp) return "";
-  char tmp[1024];
-
-  while (fgets(tmp, sizeof(tmp), pp) != NULL) {
-    result += tmp;
-  }
-  pclose(pp);
-#else
-  FDASSERT(false,
-           "Currently collect cpu memory info only supports Linux and ANDROID.")
-#endif
-  return result;
-}
-
 std::string ResourceUsageMonitor::GetCurrentGpuMemoryInfo(int device_id) {
  std::string result = "";
 #if defined(__linux__) && defined(WITH_GPU)
--- a/fastdeploy/benchmark/utils.h
+++ b/fastdeploy/benchmark/utils.h
@@ -65,20 +65,26 @@ class FASTDEPLOY_DECL ResourceUsageMonitor {

 private:
  void StopInternal();
-  // Get current cpu memory info
-  std::string GetCurrentCpuMemoryInfo();
  // Get current gpu memory info
  std::string GetCurrentGpuMemoryInfo(int device_id);

  bool is_supported_ = false;
  bool stop_signal_ = false;
  const int sampling_interval_;
-  float max_cpu_mem_ = 0.0f;
-  float max_gpu_mem_ = 0.0f;
+  float max_cpu_mem_ = 0.0f;  // MB
+  float max_gpu_mem_ = 0.0f;  // MB
  float max_gpu_util_ = 0.0f;
  const int gpu_id_ = 0;
  std::unique_ptr<std::thread> check_memory_thd_ = nullptr;
 };

+// Remove the ch characters at both ends of str
+FASTDEPLOY_DECL std::string Strip(const std::string& str, char ch = ' ');
+
+// Split string
+FASTDEPLOY_DECL void Split(const std::string& s,
+                           std::vector<std::string>& tokens,
+                           char delim = ' ');
+
 }  // namespace benchmark
 }  // namespace fastdeploy