From 59a4ab343f2d1dfc20861117ad05e483a8695f78 Mon Sep 17 00:00:00 2001 From: WJJ1995 Date: Fri, 10 Feb 2023 17:13:22 +0800 Subject: [PATCH 1/4] [Benchmark]Add ResourceUsageMonitor to collect memory info (#1269) * add GPL lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * support yolov8 * add pybind for yolov8 * add yolov8 readme * add cpp benchmark * add cpu and gpu mem * public part split * add runtime mode * fixed bugs * add cpu_thread_nums * deal with comments * deal with comments * deal with comments * rm useless code * add FASTDEPLOY_DECL * add FASTDEPLOY_DECL * fixed for windows * mv rss to pss * mv rss to pss * Update utils.cc * use thread to collect mem * Add ResourceUsageMonitor * rm useless code * fixed bug * fixed typo * update ResourceUsageMonitor * fixed bug * fixed bug * add note for ResourceUsageMonitor * deal with comments --------- Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com> --- benchmark/cpp/benchmark_yolov5.cc | 53 +++---- benchmark/cpp/flags.h | 2 +- fastdeploy/benchmark/utils.cc | 145 ++++++++++++------ fastdeploy/benchmark/utils.h | 67 ++++++-- fastdeploy/runtime/runtime_option.h | 3 +- .../detection/contrib/rknpu2/postprocessor.h | 2 +- 6 files changed, 179 insertions(+), 93 deletions(-) mode change 100644 => 100755 benchmark/cpp/benchmark_yolov5.cc mode change 100644 => 100755 fastdeploy/runtime/runtime_option.h diff --git a/benchmark/cpp/benchmark_yolov5.cc b/benchmark/cpp/benchmark_yolov5.cc old mode 100644 new mode 100755 index ae16dd8d8..2e5df6b1c --- a/benchmark/cpp/benchmark_yolov5.cc +++ b/benchmark/cpp/benchmark_yolov5.cc @@ -17,8 +17,7 @@ #include "flags.h" bool RunModel(std::string model_file, std::string image_file, size_t warmup, - size_t repeats, size_t dump_period, std::string cpu_mem_file_name, - std::string gpu_mem_file_name) { + size_t repeats, size_t sampling_interval) { // Initialization auto option = fastdeploy::RuntimeOption(); if (!CreateRuntimeOption(&option)) { @@ -34,6 +33,12 @@ bool RunModel(std::string model_file, std::string image_file, size_t warmup, return false; } auto im = cv::imread(image_file); + // For collect memory info + fastdeploy::benchmark::ResourceUsageMonitor resource_moniter( + sampling_interval, FLAGS_device_id); + if (FLAGS_collect_memory_info) { + resource_moniter.Start(); + } // For Runtime if (FLAGS_profile_mode == "runtime") { fastdeploy::vision::DetectionResult res; @@ -57,35 +62,34 @@ bool RunModel(std::string model_file, std::string image_file, size_t warmup, return false; } } - std::vector end2end_statis; // Step2: repeat for repeats times std::cout << "Counting time..." << std::endl; - fastdeploy::TimeCounter tc; + std::cout << "Repeat " << repeats << " times..." << std::endl; fastdeploy::vision::DetectionResult res; + fastdeploy::TimeCounter tc; + tc.Start(); for (int i = 0; i < repeats; i++) { - if (FLAGS_collect_memory_info && i % dump_period == 0) { - fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name); -#if defined(WITH_GPU) - fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name, - FLAGS_device_id); -#endif - } - tc.Start(); if (!model.Predict(im, &res)) { std::cerr << "Failed to predict." << std::endl; return false; } - tc.End(); - end2end_statis.push_back(tc.Duration() * 1000); } - float end2end = std::accumulate(end2end_statis.end() - repeats, - end2end_statis.end(), 0.f) / - repeats; + tc.End(); + double end2end = tc.Duration() / repeats * 1000; std::cout << "End2End(ms): " << end2end << "ms." << std::endl; auto vis_im = fastdeploy::vision::VisDetection(im, res); cv::imwrite("vis_result.jpg", vis_im); std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl; } + if (FLAGS_collect_memory_info) { + float cpu_mem = resource_moniter.GetMaxCpuMem(); + float gpu_mem = resource_moniter.GetMaxGpuMem(); + float gpu_util = resource_moniter.GetMaxGpuUtil(); + std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl; + std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl; + std::cout << "gpu_util: " << gpu_util << std::endl; + resource_moniter.Stop(); + } return true; } @@ -94,21 +98,10 @@ int main(int argc, char* argv[]) { google::ParseCommandLineFlags(&argc, &argv, true); int repeats = FLAGS_repeat; int warmup = FLAGS_warmup; - int dump_period = FLAGS_dump_period; - std::string cpu_mem_file_name = "result_cpu.txt"; - std::string gpu_mem_file_name = "result_gpu.txt"; + int sampling_interval = FLAGS_sampling_interval; // Run model - if (RunModel(FLAGS_model, FLAGS_image, warmup, repeats, dump_period, - cpu_mem_file_name, gpu_mem_file_name) != true) { + if (!RunModel(FLAGS_model, FLAGS_image, warmup, repeats, sampling_interval)) { exit(1); } - if (FLAGS_collect_memory_info) { - float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name); - std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl; -#if defined(WITH_GPU) - float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name); - std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl; -#endif - } return 0; } \ No newline at end of file diff --git a/benchmark/cpp/flags.h b/benchmark/cpp/flags.h index 6ecf9b33e..64f22c702 100755 --- a/benchmark/cpp/flags.h +++ b/benchmark/cpp/flags.h @@ -37,7 +37,7 @@ DEFINE_bool( "and 'lite' backend"); DEFINE_bool( collect_memory_info, false, "Whether to collect memory info"); -DEFINE_int32(dump_period, 100, "How often to collect memory info."); +DEFINE_int32(sampling_interval, 50, "How often to collect memory info(ms)."); void PrintUsage() { std::cout << "Usage: infer_demo --model model_path --image img_path --device " diff --git a/fastdeploy/benchmark/utils.cc b/fastdeploy/benchmark/utils.cc index 2b0bd9df1..a33db1dc2 100755 --- a/fastdeploy/benchmark/utils.cc +++ b/fastdeploy/benchmark/utils.cc @@ -36,86 +36,131 @@ static std::string strip(const std::string& str, char ch = ' ') { return str.substr(i, j + 1 - i); } -void DumpCurrentCpuMemoryUsage(const std::string& name) { +// Split string +static void split(const std::string& s, std::vector& tokens, + char delim = ' ') { + tokens.clear(); + size_t lastPos = s.find_first_not_of(delim, 0); + size_t pos = s.find(delim, lastPos); + while (lastPos != std::string::npos) { + tokens.emplace_back(s.substr(lastPos, pos - lastPos)); + lastPos = s.find_first_not_of(delim, pos); + pos = s.find(delim, lastPos); + } + return; +} + +ResourceUsageMonitor::ResourceUsageMonitor(int sampling_interval_ms, int gpu_id) + : is_supported_(false), + sampling_interval_(sampling_interval_ms), + gpu_id_(gpu_id) { +#if defined(__linux__) || defined(__ANDROID__) + is_supported_ = true; +#else + is_supported_ = false; +#endif + if (!is_supported_) { + FDASSERT(false, + "Currently ResourceUsageMonitor only supports Linux and ANDROID.") + return; + } +} + +void ResourceUsageMonitor::Start() { + if (!is_supported_) return; + if (check_memory_thd_ != nullptr) { + FDINFO << "Memory monitoring has already started!" << std::endl; + return; + } + FDINFO << "Start monitoring memory!" << std::endl; + stop_signal_ = false; + check_memory_thd_.reset(new std::thread(([this]() { + // Note we retrieve the memory usage at the very beginning of the thread. + while (true) { + std::string cpu_mem_info = GetCurrentCpuMemoryInfo(); + // get max_cpu_mem + std::vector cpu_tokens; + split(cpu_mem_info, cpu_tokens, ' '); + max_cpu_mem_ = std::max(max_cpu_mem_, stof(cpu_tokens[3]) / 1024); +#if defined(WITH_GPU) + std::string gpu_mem_info = GetCurrentGpuMemoryInfo(gpu_id_); + // get max_gpu_mem and max_gpu_util + std::vector gpu_tokens; + split(gpu_mem_info, gpu_tokens, ','); + max_gpu_mem_ = std::max(max_gpu_mem_, stof(gpu_tokens[6])); + max_gpu_util_ = std::max(max_gpu_util_, stof(gpu_tokens[7])); +#endif + if (stop_signal_) break; + std::this_thread::sleep_for( + std::chrono::milliseconds(sampling_interval_)); + } + }))); +} + +void ResourceUsageMonitor::Stop() { + if (!is_supported_) { + return; + } + if (check_memory_thd_ == nullptr) { + FDINFO << "Memory monitoring hasn't started yet or has stopped!" + << std::endl; + return; + } + FDINFO << "Stop monitoring memory!" << std::endl; + StopInternal(); +} + +void ResourceUsageMonitor::StopInternal() { + stop_signal_ = true; + if (check_memory_thd_ == nullptr) { + return; + } + if (check_memory_thd_ != nullptr) { + check_memory_thd_->join(); + } + check_memory_thd_.reset(nullptr); +} + +std::string ResourceUsageMonitor::GetCurrentCpuMemoryInfo() { + std::string result = ""; #if defined(__linux__) || defined(__ANDROID__) int iPid = static_cast(getpid()); std::string command = "pmap -x " + std::to_string(iPid) + " | grep total"; FILE* pp = popen(command.data(), "r"); - if (!pp) return; + if (!pp) return ""; char tmp[1024]; while (fgets(tmp, sizeof(tmp), pp) != NULL) { - std::ofstream write; - write.open(name, std::ios::app); - write << tmp; - write.close(); + result += tmp; } pclose(pp); #else FDASSERT(false, "Currently collect cpu memory info only supports Linux and ANDROID.") #endif - return; + return result; } -void DumpCurrentGpuMemoryUsage(const std::string& name, int device_id) { +std::string ResourceUsageMonitor::GetCurrentGpuMemoryInfo(int device_id) { + std::string result = ""; #if defined(__linux__) && defined(WITH_GPU) std::string command = "nvidia-smi --id=" + std::to_string(device_id) + " --query-gpu=index,uuid,name,timestamp,memory.total," "memory.free,memory.used,utilization.gpu,utilization." "memory --format=csv,noheader,nounits"; FILE* pp = popen(command.data(), "r"); - if (!pp) return; + if (!pp) return ""; char tmp[1024]; while (fgets(tmp, sizeof(tmp), pp) != NULL) { - std::ofstream write; - write.open(name, std::ios::app); - write << tmp; - write.close(); + result += tmp; } pclose(pp); #else FDASSERT(false, "Currently collect gpu memory info only supports Linux in GPU.") #endif - return; -} - -float GetCpuMemoryUsage(const std::string& name) { - std::ifstream read(name); - std::string line; - float max_cpu_mem = -1; - while (getline(read, line)) { - std::stringstream ss(line); - std::string tmp; - std::vector nums; - while (getline(ss, tmp, ' ')) { - tmp = strip(tmp); - if (tmp.empty()) continue; - nums.push_back(tmp); - } - max_cpu_mem = std::max(max_cpu_mem, stof(nums[3])); - } - return max_cpu_mem / 1024; -} - -float GetGpuMemoryUsage(const std::string& name) { - std::ifstream read(name); - std::string line; - float max_gpu_mem = -1; - while (getline(read, line)) { - std::stringstream ss(line); - std::string tmp; - std::vector nums; - while (getline(ss, tmp, ',')) { - tmp = strip(tmp); - if (tmp.empty()) continue; - nums.push_back(tmp); - } - max_gpu_mem = std::max(max_gpu_mem, stof(nums[6])); - } - return max_gpu_mem; + return result; } } // namespace benchmark diff --git a/fastdeploy/benchmark/utils.h b/fastdeploy/benchmark/utils.h index 12770f365..f81cb29c1 100755 --- a/fastdeploy/benchmark/utils.h +++ b/fastdeploy/benchmark/utils.h @@ -13,23 +13,72 @@ // limitations under the License. #pragma once +#include +#include // NOLINT #include "fastdeploy/utils/utils.h" namespace fastdeploy { namespace benchmark { +/*! @brief ResourceUsageMonitor object used when to collect memory info. + */ +class FASTDEPLOY_DECL ResourceUsageMonitor { + public: + /** \brief Set sampling_interval_ms and gpu_id for ResourceUsageMonitor. + * + * \param[in] sampling_interval_ms How often to collect memory info(ms). + * \param[in] gpu_id Device(gpu) id, default 0. + */ + explicit ResourceUsageMonitor(int sampling_interval_ms, int gpu_id = 0); -// Record current cpu memory usage into file -FASTDEPLOY_DECL void DumpCurrentCpuMemoryUsage(const std::string& name); + ~ResourceUsageMonitor() { StopInternal(); } -// Record current gpu memory usage into file -FASTDEPLOY_DECL void DumpCurrentGpuMemoryUsage(const std::string& name, - int device_id); + /// Start memory info collect + void Start(); + /// Stop memory info collect + void Stop(); + /// Get maximum cpu memory usage + float GetMaxCpuMem() const { + if (!is_supported_ || check_memory_thd_ == nullptr) { + return -1.0f; + } + return max_cpu_mem_; + } + /// Get maximum gpu memory usage + float GetMaxGpuMem() const { + if (!is_supported_ || check_memory_thd_ == nullptr) { + return -1.0f; + } + return max_gpu_mem_; + } + /// Get maximum gpu util + float GetMaxGpuUtil() const { + if (!is_supported_ || check_memory_thd_ == nullptr) { + return -1.0f; + } + return max_gpu_util_; + } -// Get Max cpu memory usage -FASTDEPLOY_DECL float GetCpuMemoryUsage(const std::string& name); + ResourceUsageMonitor(ResourceUsageMonitor&) = delete; + ResourceUsageMonitor& operator=(const ResourceUsageMonitor&) = delete; + ResourceUsageMonitor(ResourceUsageMonitor&&) = delete; + ResourceUsageMonitor& operator=(const ResourceUsageMonitor&&) = delete; -// Get Max gpu memory usage -FASTDEPLOY_DECL float GetGpuMemoryUsage(const std::string& name); + private: + void StopInternal(); + // Get current cpu memory info + std::string GetCurrentCpuMemoryInfo(); + // Get current gpu memory info + std::string GetCurrentGpuMemoryInfo(int device_id); + + bool is_supported_ = false; + bool stop_signal_ = false; + const int sampling_interval_; + float max_cpu_mem_ = 0.0f; + float max_gpu_mem_ = 0.0f; + float max_gpu_util_ = 0.0f; + const int gpu_id_ = 0; + std::unique_ptr check_memory_thd_ = nullptr; +}; } // namespace benchmark } // namespace fastdeploy diff --git a/fastdeploy/runtime/runtime_option.h b/fastdeploy/runtime/runtime_option.h old mode 100644 new mode 100755 index 0aa6bbec8..c45dd2fe7 --- a/fastdeploy/runtime/runtime_option.h +++ b/fastdeploy/runtime/runtime_option.h @@ -198,8 +198,7 @@ struct FASTDEPLOY_DECL RuntimeOption { // *** The belowing api are deprecated, will be removed in v1.2.0 // *** Do not use it anymore - - void SetPaddleMKLDNN(bool pd_mkldnn = true); + void SetPaddleMKLDNN(bool pd_mkldnn = true); void EnablePaddleToTrt(); void DeletePaddleBackendPass(const std::string& delete_pass_name); void EnablePaddleLogInfo(); diff --git a/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h b/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h index a6b6f0cc9..de52e6b1a 100755 --- a/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h +++ b/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h @@ -56,7 +56,7 @@ class FASTDEPLOY_DECL RKYOLOPostprocessor { float GetNMSThreshold() const { return nms_threshold_; } /// Set height and weight - void SetHeightAndWeight(int height,int width) { + void SetHeightAndWeight(int height, int width) { height_ = height; width_ = width; } From 6a3ac91057203035fba9b722ac8be251b6a323e9 Mon Sep 17 00:00:00 2001 From: Zheng-Bicheng <58363586+Zheng-Bicheng@users.noreply.github.com> Date: Sat, 11 Feb 2023 09:09:53 +0800 Subject: [PATCH 2/4] [Model] Update rkyolo pybind (#1294) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 更新rkyolo pybind --- .../detection/contrib/rknpu2/postprocessor.h | 4 ++++ .../detection/contrib/rknpu2/rkyolo_pybind.cc | 4 +++- .../detection/contrib/rkyolo/rkyolov5.py | 21 ++++++++++--------- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h b/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h index de52e6b1a..7178b13b4 100755 --- a/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h +++ b/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h @@ -80,6 +80,10 @@ class FASTDEPLOY_DECL RKYOLOPostprocessor { obj_class_num_ = num; prob_box_size_ = obj_class_num_ + 5; } + /// Get the number of class + int GetClassNum() { + return obj_class_num_; + } private: std::vector anchors_ = {10, 13, 16, 30, 33, 23, 30, 61, 62, diff --git a/fastdeploy/vision/detection/contrib/rknpu2/rkyolo_pybind.cc b/fastdeploy/vision/detection/contrib/rknpu2/rkyolo_pybind.cc index 716464458..5fe70b7fd 100755 --- a/fastdeploy/vision/detection/contrib/rknpu2/rkyolo_pybind.cc +++ b/fastdeploy/vision/detection/contrib/rknpu2/rkyolo_pybind.cc @@ -65,7 +65,9 @@ void BindRKYOLO(pybind11::module& m) { .def_property("conf_threshold", &vision::detection::RKYOLOPostprocessor::GetConfThreshold, &vision::detection::RKYOLOPostprocessor::SetConfThreshold) .def_property("nms_threshold", &vision::detection::RKYOLOPostprocessor::GetNMSThreshold, - &vision::detection::RKYOLOPostprocessor::SetNMSThreshold); + &vision::detection::RKYOLOPostprocessor::SetNMSThreshold) + .def_property("class_num", &vision::detection::RKYOLOPostprocessor::GetClassNum, + &vision::detection::RKYOLOPostprocessor::SetClassNum); pybind11::class_(m, "RKYOLOV5") .def(pybind11::init Date: Sun, 12 Feb 2023 15:13:32 +0800 Subject: [PATCH 3/4] [Other] Update fast_tokenizer version (#1300) Update fast_tokenizer version --- cmake/fast_tokenizer.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/fast_tokenizer.cmake b/cmake/fast_tokenizer.cmake index 6e183dafe..fb32f8ea1 100644 --- a/cmake/fast_tokenizer.cmake +++ b/cmake/fast_tokenizer.cmake @@ -61,7 +61,7 @@ endif(WIN32) message("FASTTOKENIZER_COMPILE_LIB = ${FASTTOKENIZER_COMPILE_LIB}") set(FASTTOKENIZER_URL_BASE "https://bj.bcebos.com/paddlenlp/fast_tokenizer/") -set(FASTTOKENIZER_VERSION "1.0.1") +set(FASTTOKENIZER_VERSION "1.0.2") # Set download url if(WIN32) From e63f5f369e79deea00895a3f0ee3d24020068574 Mon Sep 17 00:00:00 2001 From: Jack Zhou Date: Sun, 12 Feb 2023 15:13:39 +0800 Subject: [PATCH 4/4] [Backend] Update paddle inference to 2.4-dev5 (#1302) update paddle inference to 2.4-dev5 --- cmake/paddle_inference.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/paddle_inference.cmake b/cmake/paddle_inference.cmake index efd65394e..47d3d9fcf 100755 --- a/cmake/paddle_inference.cmake +++ b/cmake/paddle_inference.cmake @@ -80,7 +80,7 @@ if(PADDLEINFERENCE_DIRECTORY) endif() else() set(PADDLEINFERENCE_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/") - set(PADDLEINFERENCE_VERSION "2.4-dev4") + set(PADDLEINFERENCE_VERSION "2.4-dev5") if(WIN32) if (WITH_GPU) set(PADDLEINFERENCE_FILE "paddle_inference-win-x64-gpu-trt-${PADDLEINFERENCE_VERSION}.zip")