From 2dfda1db85fdf9adf131f7fe82af4ac9a76fd9e7 Mon Sep 17 00:00:00 2001 From: WJJ1995 Date: Tue, 14 Feb 2023 15:00:05 +0800 Subject: [PATCH] [Benchmark] Remove pmap and use mem api (#1309) * add GPL lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * support yolov8 * add pybind for yolov8 * add yolov8 readme * add cpp benchmark * add cpu and gpu mem * public part split * add runtime mode * fixed bugs * add cpu_thread_nums * deal with comments * deal with comments * deal with comments * rm useless code * add FASTDEPLOY_DECL * add FASTDEPLOY_DECL * fixed for windows * mv rss to pss * mv rss to pss * Update utils.cc * use thread to collect mem * Add ResourceUsageMonitor * rm useless code * fixed bug * fixed typo * update ResourceUsageMonitor * fixed bug * fixed bug * add note for ResourceUsageMonitor * deal with comments * add macros * deal with comments * deal with comments * deal with comments * re-lint * rm pmap and use mem api * rm pmap and use mem api * add mem api * Add PrintBenchmarkInfo func * Add PrintBenchmarkInfo func * Add PrintBenchmarkInfo func * deal with comments --------- Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com> --- benchmark/cpp/benchmark_ppyolov8.cc | 9 ++--- benchmark/cpp/benchmark_yolov5.cc | 3 +- benchmark/cpp/flags.h | 38 ++++++++++++++++++++ benchmark/cpp/macros.h | 5 ++- fastdeploy/benchmark/utils.cc | 56 +++++++++++------------------ fastdeploy/benchmark/utils.h | 14 +++++--- 6 files changed, 74 insertions(+), 51 deletions(-) mode change 100644 => 100755 benchmark/cpp/benchmark_ppyolov8.cc mode change 100644 => 100755 benchmark/cpp/benchmark_yolov5.cc diff --git a/benchmark/cpp/benchmark_ppyolov8.cc b/benchmark/cpp/benchmark_ppyolov8.cc old mode 100644 new mode 100755 index 545474635..5541696d5 --- a/benchmark/cpp/benchmark_ppyolov8.cc +++ b/benchmark/cpp/benchmark_ppyolov8.cc @@ -12,16 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "flags.h" #include "macros.h" +#include "flags.h" #include "option.h" -#ifdef WIN32 -const char sep = '\\'; -#else -const char sep = '/'; -#endif - int main(int argc, char* argv[]) { google::ParseCommandLineFlags(&argc, &argv, true); auto im = cv::imread(FLAGS_image); @@ -31,6 +25,7 @@ int main(int argc, char* argv[]) { PrintUsage(); return false; } + PrintBenchmarkInfo(); auto model_file = FLAGS_model + sep + "model.pdmodel"; auto params_file = FLAGS_model + sep + "model.pdiparams"; auto config_file = FLAGS_model + sep + "infer_cfg.yml"; diff --git a/benchmark/cpp/benchmark_yolov5.cc b/benchmark/cpp/benchmark_yolov5.cc old mode 100644 new mode 100755 index 5b2cab855..6ab3c5990 --- a/benchmark/cpp/benchmark_yolov5.cc +++ b/benchmark/cpp/benchmark_yolov5.cc @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "flags.h" #include "macros.h" +#include "flags.h" #include "option.h" int main(int argc, char* argv[]) { @@ -25,6 +25,7 @@ int main(int argc, char* argv[]) { PrintUsage(); return false; } + PrintBenchmarkInfo(); auto model_yolov5 = fastdeploy::vision::detection::YOLOv5(FLAGS_model, "", option); fastdeploy::vision::DetectionResult res; diff --git a/benchmark/cpp/flags.h b/benchmark/cpp/flags.h index 7f8c3a29f..fd20e685c 100755 --- a/benchmark/cpp/flags.h +++ b/benchmark/cpp/flags.h @@ -16,6 +16,12 @@ #include "gflags/gflags.h" +#ifdef WIN32 +const char sep = '\\'; +#else +const char sep = '/'; +#endif + DEFINE_string(model, "", "Directory of the inference model."); DEFINE_string(image, "", "Path of the image file."); DEFINE_string(device, "cpu", @@ -48,3 +54,35 @@ void PrintUsage() { std::cout << "Default value of backend: default" << std::endl; std::cout << "Default value of use_fp16: false" << std::endl; } + +void PrintBenchmarkInfo() { + // Get model name + std::vector model_names; + fastdeploy::benchmark::Split(FLAGS_model, model_names, sep); + // Save benchmark info + std::stringstream ss; + ss.precision(3); + ss << "\n======= Model Info =======\n"; + ss << "model_name: " << model_names[model_names.size() - 1] << std::endl; + ss << "profile_mode: " << FLAGS_profile_mode << std::endl; + if (FLAGS_profile_mode == "runtime") { + ss << "include_h2d_d2h: " << FLAGS_include_h2d_d2h << std::endl; + } + ss << "\n======= Backend Info =======\n"; + ss << "warmup: " << FLAGS_warmup << std::endl; + ss << "repeats: " << FLAGS_repeat << std::endl; + ss << "device: " << FLAGS_device << std::endl; + if (FLAGS_device == "gpu") { + ss << "device_id: " << FLAGS_device_id << std::endl; + } + ss << "backend: " << FLAGS_backend << std::endl; + ss << "cpu_thread_nums: " << FLAGS_cpu_thread_nums << std::endl; + ss << "use_fp16: " << FLAGS_use_fp16 << std::endl; + ss << "collect_memory_info: " << FLAGS_collect_memory_info << std::endl; + if (FLAGS_collect_memory_info) { + ss << "sampling_interval: " << std::to_string(FLAGS_sampling_interval) + << "ms" << std::endl; + } + std::cout << ss.str() << std::endl; + return; +} diff --git a/benchmark/cpp/macros.h b/benchmark/cpp/macros.h index bebd26e0d..77df0c657 100755 --- a/benchmark/cpp/macros.h +++ b/benchmark/cpp/macros.h @@ -18,7 +18,6 @@ #define BENCHMARK_MODEL(MODEL_NAME, BENCHMARK_FUNC) \ { \ - std::cout << "====" << #MODEL_NAME << "====" << std::endl; \ if (!MODEL_NAME.Initialized()) { \ std::cerr << "Failed to initialize." << std::endl; \ return 0; \ @@ -62,8 +61,8 @@ float __cpu_mem__ = __resource_moniter__.GetMaxCpuMem(); \ float __gpu_mem__ = __resource_moniter__.GetMaxGpuMem(); \ float __gpu_util__ = __resource_moniter__.GetMaxGpuUtil(); \ - std::cout << "cpu_pss_mb: " << __cpu_mem__ << "MB." << std::endl; \ - std::cout << "gpu_pss_mb: " << __gpu_mem__ << "MB." << std::endl; \ + std::cout << "cpu_rss_mb: " << __cpu_mem__ << "MB." << std::endl; \ + std::cout << "gpu_rss_mb: " << __gpu_mem__ << "MB." << std::endl; \ std::cout << "gpu_util: " << __gpu_util__ << std::endl; \ __resource_moniter__.Stop(); \ } \ diff --git a/fastdeploy/benchmark/utils.cc b/fastdeploy/benchmark/utils.cc index a33db1dc2..8857f10c4 100755 --- a/fastdeploy/benchmark/utils.cc +++ b/fastdeploy/benchmark/utils.cc @@ -13,8 +13,8 @@ // limitations under the License. #include -#if defined(__linux__) || defined(__ANDROID__) -#include +#ifdef __linux__ +#include #endif #include @@ -23,8 +23,7 @@ namespace fastdeploy { namespace benchmark { -// Remove the ch characters at both ends of str -static std::string strip(const std::string& str, char ch = ' ') { +std::string Strip(const std::string& str, char ch) { int i = 0; while (str[i] == ch) { i++; @@ -36,9 +35,8 @@ static std::string strip(const std::string& str, char ch = ' ') { return str.substr(i, j + 1 - i); } -// Split string -static void split(const std::string& s, std::vector& tokens, - char delim = ' ') { +void Split(const std::string& s, std::vector& tokens, + char delim) { tokens.clear(); size_t lastPos = s.find_first_not_of(delim, 0); size_t pos = s.find(delim, lastPos); @@ -54,7 +52,7 @@ ResourceUsageMonitor::ResourceUsageMonitor(int sampling_interval_ms, int gpu_id) : is_supported_(false), sampling_interval_(sampling_interval_ms), gpu_id_(gpu_id) { -#if defined(__linux__) || defined(__ANDROID__) +#ifdef __linux__ is_supported_ = true; #else is_supported_ = false; @@ -67,7 +65,9 @@ ResourceUsageMonitor::ResourceUsageMonitor(int sampling_interval_ms, int gpu_id) } void ResourceUsageMonitor::Start() { - if (!is_supported_) return; + if (!is_supported_) { + return; + } if (check_memory_thd_ != nullptr) { FDINFO << "Memory monitoring has already started!" << std::endl; return; @@ -77,20 +77,24 @@ void ResourceUsageMonitor::Start() { check_memory_thd_.reset(new std::thread(([this]() { // Note we retrieve the memory usage at the very beginning of the thread. while (true) { - std::string cpu_mem_info = GetCurrentCpuMemoryInfo(); - // get max_cpu_mem - std::vector cpu_tokens; - split(cpu_mem_info, cpu_tokens, ' '); - max_cpu_mem_ = std::max(max_cpu_mem_, stof(cpu_tokens[3]) / 1024); +#ifdef __linux__ + rusage res; + if (getrusage(RUSAGE_SELF, &res) == 0) { + max_cpu_mem_ = + std::max(max_cpu_mem_, static_cast(res.ru_maxrss / 1024.0)); + } +#endif #if defined(WITH_GPU) std::string gpu_mem_info = GetCurrentGpuMemoryInfo(gpu_id_); // get max_gpu_mem and max_gpu_util std::vector gpu_tokens; - split(gpu_mem_info, gpu_tokens, ','); + Split(gpu_mem_info, gpu_tokens, ','); max_gpu_mem_ = std::max(max_gpu_mem_, stof(gpu_tokens[6])); max_gpu_util_ = std::max(max_gpu_util_, stof(gpu_tokens[7])); #endif - if (stop_signal_) break; + if (stop_signal_) { + break; + } std::this_thread::sleep_for( std::chrono::milliseconds(sampling_interval_)); } @@ -121,26 +125,6 @@ void ResourceUsageMonitor::StopInternal() { check_memory_thd_.reset(nullptr); } -std::string ResourceUsageMonitor::GetCurrentCpuMemoryInfo() { - std::string result = ""; -#if defined(__linux__) || defined(__ANDROID__) - int iPid = static_cast(getpid()); - std::string command = "pmap -x " + std::to_string(iPid) + " | grep total"; - FILE* pp = popen(command.data(), "r"); - if (!pp) return ""; - char tmp[1024]; - - while (fgets(tmp, sizeof(tmp), pp) != NULL) { - result += tmp; - } - pclose(pp); -#else - FDASSERT(false, - "Currently collect cpu memory info only supports Linux and ANDROID.") -#endif - return result; -} - std::string ResourceUsageMonitor::GetCurrentGpuMemoryInfo(int device_id) { std::string result = ""; #if defined(__linux__) && defined(WITH_GPU) diff --git a/fastdeploy/benchmark/utils.h b/fastdeploy/benchmark/utils.h index f81cb29c1..4037cd09c 100755 --- a/fastdeploy/benchmark/utils.h +++ b/fastdeploy/benchmark/utils.h @@ -65,20 +65,26 @@ class FASTDEPLOY_DECL ResourceUsageMonitor { private: void StopInternal(); - // Get current cpu memory info - std::string GetCurrentCpuMemoryInfo(); // Get current gpu memory info std::string GetCurrentGpuMemoryInfo(int device_id); bool is_supported_ = false; bool stop_signal_ = false; const int sampling_interval_; - float max_cpu_mem_ = 0.0f; - float max_gpu_mem_ = 0.0f; + float max_cpu_mem_ = 0.0f; // MB + float max_gpu_mem_ = 0.0f; // MB float max_gpu_util_ = 0.0f; const int gpu_id_ = 0; std::unique_ptr check_memory_thd_ = nullptr; }; +// Remove the ch characters at both ends of str +FASTDEPLOY_DECL std::string Strip(const std::string& str, char ch = ' '); + +// Split string +FASTDEPLOY_DECL void Split(const std::string& s, + std::vector& tokens, + char delim = ' '); + } // namespace benchmark } // namespace fastdeploy