mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-20 23:29:39 +08:00
[Benchmark]Add ResourceUsageMonitor to collect memory info (#1269)
* add GPL lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * support yolov8 * add pybind for yolov8 * add yolov8 readme * add cpp benchmark * add cpu and gpu mem * public part split * add runtime mode * fixed bugs * add cpu_thread_nums * deal with comments * deal with comments * deal with comments * rm useless code * add FASTDEPLOY_DECL * add FASTDEPLOY_DECL * fixed for windows * mv rss to pss * mv rss to pss * Update utils.cc * use thread to collect mem * Add ResourceUsageMonitor * rm useless code * fixed bug * fixed typo * update ResourceUsageMonitor * fixed bug * fixed bug * add note for ResourceUsageMonitor * deal with comments --------- Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
This commit is contained in:
53
benchmark/cpp/benchmark_yolov5.cc
Normal file → Executable file
53
benchmark/cpp/benchmark_yolov5.cc
Normal file → Executable file
@@ -17,8 +17,7 @@
|
||||
#include "flags.h"
|
||||
|
||||
bool RunModel(std::string model_file, std::string image_file, size_t warmup,
|
||||
size_t repeats, size_t dump_period, std::string cpu_mem_file_name,
|
||||
std::string gpu_mem_file_name) {
|
||||
size_t repeats, size_t sampling_interval) {
|
||||
// Initialization
|
||||
auto option = fastdeploy::RuntimeOption();
|
||||
if (!CreateRuntimeOption(&option)) {
|
||||
@@ -34,6 +33,12 @@ bool RunModel(std::string model_file, std::string image_file, size_t warmup,
|
||||
return false;
|
||||
}
|
||||
auto im = cv::imread(image_file);
|
||||
// For collect memory info
|
||||
fastdeploy::benchmark::ResourceUsageMonitor resource_moniter(
|
||||
sampling_interval, FLAGS_device_id);
|
||||
if (FLAGS_collect_memory_info) {
|
||||
resource_moniter.Start();
|
||||
}
|
||||
// For Runtime
|
||||
if (FLAGS_profile_mode == "runtime") {
|
||||
fastdeploy::vision::DetectionResult res;
|
||||
@@ -57,35 +62,34 @@ bool RunModel(std::string model_file, std::string image_file, size_t warmup,
|
||||
return false;
|
||||
}
|
||||
}
|
||||
std::vector<float> end2end_statis;
|
||||
// Step2: repeat for repeats times
|
||||
std::cout << "Counting time..." << std::endl;
|
||||
fastdeploy::TimeCounter tc;
|
||||
std::cout << "Repeat " << repeats << " times..." << std::endl;
|
||||
fastdeploy::vision::DetectionResult res;
|
||||
for (int i = 0; i < repeats; i++) {
|
||||
if (FLAGS_collect_memory_info && i % dump_period == 0) {
|
||||
fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name);
|
||||
#if defined(WITH_GPU)
|
||||
fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name,
|
||||
FLAGS_device_id);
|
||||
#endif
|
||||
}
|
||||
fastdeploy::TimeCounter tc;
|
||||
tc.Start();
|
||||
for (int i = 0; i < repeats; i++) {
|
||||
if (!model.Predict(im, &res)) {
|
||||
std::cerr << "Failed to predict." << std::endl;
|
||||
return false;
|
||||
}
|
||||
tc.End();
|
||||
end2end_statis.push_back(tc.Duration() * 1000);
|
||||
}
|
||||
float end2end = std::accumulate(end2end_statis.end() - repeats,
|
||||
end2end_statis.end(), 0.f) /
|
||||
repeats;
|
||||
tc.End();
|
||||
double end2end = tc.Duration() / repeats * 1000;
|
||||
std::cout << "End2End(ms): " << end2end << "ms." << std::endl;
|
||||
auto vis_im = fastdeploy::vision::VisDetection(im, res);
|
||||
cv::imwrite("vis_result.jpg", vis_im);
|
||||
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
|
||||
}
|
||||
if (FLAGS_collect_memory_info) {
|
||||
float cpu_mem = resource_moniter.GetMaxCpuMem();
|
||||
float gpu_mem = resource_moniter.GetMaxGpuMem();
|
||||
float gpu_util = resource_moniter.GetMaxGpuUtil();
|
||||
std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
|
||||
std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
|
||||
std::cout << "gpu_util: " << gpu_util << std::endl;
|
||||
resource_moniter.Stop();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -94,21 +98,10 @@ int main(int argc, char* argv[]) {
|
||||
google::ParseCommandLineFlags(&argc, &argv, true);
|
||||
int repeats = FLAGS_repeat;
|
||||
int warmup = FLAGS_warmup;
|
||||
int dump_period = FLAGS_dump_period;
|
||||
std::string cpu_mem_file_name = "result_cpu.txt";
|
||||
std::string gpu_mem_file_name = "result_gpu.txt";
|
||||
int sampling_interval = FLAGS_sampling_interval;
|
||||
// Run model
|
||||
if (RunModel(FLAGS_model, FLAGS_image, warmup, repeats, dump_period,
|
||||
cpu_mem_file_name, gpu_mem_file_name) != true) {
|
||||
if (!RunModel(FLAGS_model, FLAGS_image, warmup, repeats, sampling_interval)) {
|
||||
exit(1);
|
||||
}
|
||||
if (FLAGS_collect_memory_info) {
|
||||
float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name);
|
||||
std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
|
||||
#if defined(WITH_GPU)
|
||||
float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name);
|
||||
std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
@@ -37,7 +37,7 @@ DEFINE_bool(
|
||||
"and 'lite' backend");
|
||||
DEFINE_bool(
|
||||
collect_memory_info, false, "Whether to collect memory info");
|
||||
DEFINE_int32(dump_period, 100, "How often to collect memory info.");
|
||||
DEFINE_int32(sampling_interval, 50, "How often to collect memory info(ms).");
|
||||
|
||||
void PrintUsage() {
|
||||
std::cout << "Usage: infer_demo --model model_path --image img_path --device "
|
||||
|
@@ -36,86 +36,131 @@ static std::string strip(const std::string& str, char ch = ' ') {
|
||||
return str.substr(i, j + 1 - i);
|
||||
}
|
||||
|
||||
void DumpCurrentCpuMemoryUsage(const std::string& name) {
|
||||
// Split string
|
||||
static void split(const std::string& s, std::vector<std::string>& tokens,
|
||||
char delim = ' ') {
|
||||
tokens.clear();
|
||||
size_t lastPos = s.find_first_not_of(delim, 0);
|
||||
size_t pos = s.find(delim, lastPos);
|
||||
while (lastPos != std::string::npos) {
|
||||
tokens.emplace_back(s.substr(lastPos, pos - lastPos));
|
||||
lastPos = s.find_first_not_of(delim, pos);
|
||||
pos = s.find(delim, lastPos);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
ResourceUsageMonitor::ResourceUsageMonitor(int sampling_interval_ms, int gpu_id)
|
||||
: is_supported_(false),
|
||||
sampling_interval_(sampling_interval_ms),
|
||||
gpu_id_(gpu_id) {
|
||||
#if defined(__linux__) || defined(__ANDROID__)
|
||||
is_supported_ = true;
|
||||
#else
|
||||
is_supported_ = false;
|
||||
#endif
|
||||
if (!is_supported_) {
|
||||
FDASSERT(false,
|
||||
"Currently ResourceUsageMonitor only supports Linux and ANDROID.")
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void ResourceUsageMonitor::Start() {
|
||||
if (!is_supported_) return;
|
||||
if (check_memory_thd_ != nullptr) {
|
||||
FDINFO << "Memory monitoring has already started!" << std::endl;
|
||||
return;
|
||||
}
|
||||
FDINFO << "Start monitoring memory!" << std::endl;
|
||||
stop_signal_ = false;
|
||||
check_memory_thd_.reset(new std::thread(([this]() {
|
||||
// Note we retrieve the memory usage at the very beginning of the thread.
|
||||
while (true) {
|
||||
std::string cpu_mem_info = GetCurrentCpuMemoryInfo();
|
||||
// get max_cpu_mem
|
||||
std::vector<std::string> cpu_tokens;
|
||||
split(cpu_mem_info, cpu_tokens, ' ');
|
||||
max_cpu_mem_ = std::max(max_cpu_mem_, stof(cpu_tokens[3]) / 1024);
|
||||
#if defined(WITH_GPU)
|
||||
std::string gpu_mem_info = GetCurrentGpuMemoryInfo(gpu_id_);
|
||||
// get max_gpu_mem and max_gpu_util
|
||||
std::vector<std::string> gpu_tokens;
|
||||
split(gpu_mem_info, gpu_tokens, ',');
|
||||
max_gpu_mem_ = std::max(max_gpu_mem_, stof(gpu_tokens[6]));
|
||||
max_gpu_util_ = std::max(max_gpu_util_, stof(gpu_tokens[7]));
|
||||
#endif
|
||||
if (stop_signal_) break;
|
||||
std::this_thread::sleep_for(
|
||||
std::chrono::milliseconds(sampling_interval_));
|
||||
}
|
||||
})));
|
||||
}
|
||||
|
||||
void ResourceUsageMonitor::Stop() {
|
||||
if (!is_supported_) {
|
||||
return;
|
||||
}
|
||||
if (check_memory_thd_ == nullptr) {
|
||||
FDINFO << "Memory monitoring hasn't started yet or has stopped!"
|
||||
<< std::endl;
|
||||
return;
|
||||
}
|
||||
FDINFO << "Stop monitoring memory!" << std::endl;
|
||||
StopInternal();
|
||||
}
|
||||
|
||||
void ResourceUsageMonitor::StopInternal() {
|
||||
stop_signal_ = true;
|
||||
if (check_memory_thd_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
if (check_memory_thd_ != nullptr) {
|
||||
check_memory_thd_->join();
|
||||
}
|
||||
check_memory_thd_.reset(nullptr);
|
||||
}
|
||||
|
||||
std::string ResourceUsageMonitor::GetCurrentCpuMemoryInfo() {
|
||||
std::string result = "";
|
||||
#if defined(__linux__) || defined(__ANDROID__)
|
||||
int iPid = static_cast<int>(getpid());
|
||||
std::string command = "pmap -x " + std::to_string(iPid) + " | grep total";
|
||||
FILE* pp = popen(command.data(), "r");
|
||||
if (!pp) return;
|
||||
if (!pp) return "";
|
||||
char tmp[1024];
|
||||
|
||||
while (fgets(tmp, sizeof(tmp), pp) != NULL) {
|
||||
std::ofstream write;
|
||||
write.open(name, std::ios::app);
|
||||
write << tmp;
|
||||
write.close();
|
||||
result += tmp;
|
||||
}
|
||||
pclose(pp);
|
||||
#else
|
||||
FDASSERT(false,
|
||||
"Currently collect cpu memory info only supports Linux and ANDROID.")
|
||||
#endif
|
||||
return;
|
||||
return result;
|
||||
}
|
||||
|
||||
void DumpCurrentGpuMemoryUsage(const std::string& name, int device_id) {
|
||||
std::string ResourceUsageMonitor::GetCurrentGpuMemoryInfo(int device_id) {
|
||||
std::string result = "";
|
||||
#if defined(__linux__) && defined(WITH_GPU)
|
||||
std::string command = "nvidia-smi --id=" + std::to_string(device_id) +
|
||||
" --query-gpu=index,uuid,name,timestamp,memory.total,"
|
||||
"memory.free,memory.used,utilization.gpu,utilization."
|
||||
"memory --format=csv,noheader,nounits";
|
||||
FILE* pp = popen(command.data(), "r");
|
||||
if (!pp) return;
|
||||
if (!pp) return "";
|
||||
char tmp[1024];
|
||||
|
||||
while (fgets(tmp, sizeof(tmp), pp) != NULL) {
|
||||
std::ofstream write;
|
||||
write.open(name, std::ios::app);
|
||||
write << tmp;
|
||||
write.close();
|
||||
result += tmp;
|
||||
}
|
||||
pclose(pp);
|
||||
#else
|
||||
FDASSERT(false,
|
||||
"Currently collect gpu memory info only supports Linux in GPU.")
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
float GetCpuMemoryUsage(const std::string& name) {
|
||||
std::ifstream read(name);
|
||||
std::string line;
|
||||
float max_cpu_mem = -1;
|
||||
while (getline(read, line)) {
|
||||
std::stringstream ss(line);
|
||||
std::string tmp;
|
||||
std::vector<std::string> nums;
|
||||
while (getline(ss, tmp, ' ')) {
|
||||
tmp = strip(tmp);
|
||||
if (tmp.empty()) continue;
|
||||
nums.push_back(tmp);
|
||||
}
|
||||
max_cpu_mem = std::max(max_cpu_mem, stof(nums[3]));
|
||||
}
|
||||
return max_cpu_mem / 1024;
|
||||
}
|
||||
|
||||
float GetGpuMemoryUsage(const std::string& name) {
|
||||
std::ifstream read(name);
|
||||
std::string line;
|
||||
float max_gpu_mem = -1;
|
||||
while (getline(read, line)) {
|
||||
std::stringstream ss(line);
|
||||
std::string tmp;
|
||||
std::vector<std::string> nums;
|
||||
while (getline(ss, tmp, ',')) {
|
||||
tmp = strip(tmp);
|
||||
if (tmp.empty()) continue;
|
||||
nums.push_back(tmp);
|
||||
}
|
||||
max_gpu_mem = std::max(max_gpu_mem, stof(nums[6]));
|
||||
}
|
||||
return max_gpu_mem;
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace benchmark
|
||||
|
@@ -13,23 +13,72 @@
|
||||
// limitations under the License.
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <thread> // NOLINT
|
||||
#include "fastdeploy/utils/utils.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
namespace benchmark {
|
||||
/*! @brief ResourceUsageMonitor object used when to collect memory info.
|
||||
*/
|
||||
class FASTDEPLOY_DECL ResourceUsageMonitor {
|
||||
public:
|
||||
/** \brief Set sampling_interval_ms and gpu_id for ResourceUsageMonitor.
|
||||
*
|
||||
* \param[in] sampling_interval_ms How often to collect memory info(ms).
|
||||
* \param[in] gpu_id Device(gpu) id, default 0.
|
||||
*/
|
||||
explicit ResourceUsageMonitor(int sampling_interval_ms, int gpu_id = 0);
|
||||
|
||||
// Record current cpu memory usage into file
|
||||
FASTDEPLOY_DECL void DumpCurrentCpuMemoryUsage(const std::string& name);
|
||||
~ResourceUsageMonitor() { StopInternal(); }
|
||||
|
||||
// Record current gpu memory usage into file
|
||||
FASTDEPLOY_DECL void DumpCurrentGpuMemoryUsage(const std::string& name,
|
||||
int device_id);
|
||||
/// Start memory info collect
|
||||
void Start();
|
||||
/// Stop memory info collect
|
||||
void Stop();
|
||||
/// Get maximum cpu memory usage
|
||||
float GetMaxCpuMem() const {
|
||||
if (!is_supported_ || check_memory_thd_ == nullptr) {
|
||||
return -1.0f;
|
||||
}
|
||||
return max_cpu_mem_;
|
||||
}
|
||||
/// Get maximum gpu memory usage
|
||||
float GetMaxGpuMem() const {
|
||||
if (!is_supported_ || check_memory_thd_ == nullptr) {
|
||||
return -1.0f;
|
||||
}
|
||||
return max_gpu_mem_;
|
||||
}
|
||||
/// Get maximum gpu util
|
||||
float GetMaxGpuUtil() const {
|
||||
if (!is_supported_ || check_memory_thd_ == nullptr) {
|
||||
return -1.0f;
|
||||
}
|
||||
return max_gpu_util_;
|
||||
}
|
||||
|
||||
// Get Max cpu memory usage
|
||||
FASTDEPLOY_DECL float GetCpuMemoryUsage(const std::string& name);
|
||||
ResourceUsageMonitor(ResourceUsageMonitor&) = delete;
|
||||
ResourceUsageMonitor& operator=(const ResourceUsageMonitor&) = delete;
|
||||
ResourceUsageMonitor(ResourceUsageMonitor&&) = delete;
|
||||
ResourceUsageMonitor& operator=(const ResourceUsageMonitor&&) = delete;
|
||||
|
||||
// Get Max gpu memory usage
|
||||
FASTDEPLOY_DECL float GetGpuMemoryUsage(const std::string& name);
|
||||
private:
|
||||
void StopInternal();
|
||||
// Get current cpu memory info
|
||||
std::string GetCurrentCpuMemoryInfo();
|
||||
// Get current gpu memory info
|
||||
std::string GetCurrentGpuMemoryInfo(int device_id);
|
||||
|
||||
bool is_supported_ = false;
|
||||
bool stop_signal_ = false;
|
||||
const int sampling_interval_;
|
||||
float max_cpu_mem_ = 0.0f;
|
||||
float max_gpu_mem_ = 0.0f;
|
||||
float max_gpu_util_ = 0.0f;
|
||||
const int gpu_id_ = 0;
|
||||
std::unique_ptr<std::thread> check_memory_thd_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace benchmark
|
||||
} // namespace fastdeploy
|
||||
|
1
fastdeploy/runtime/runtime_option.h
Normal file → Executable file
1
fastdeploy/runtime/runtime_option.h
Normal file → Executable file
@@ -198,7 +198,6 @@ struct FASTDEPLOY_DECL RuntimeOption {
|
||||
|
||||
// *** The belowing api are deprecated, will be removed in v1.2.0
|
||||
// *** Do not use it anymore
|
||||
|
||||
void SetPaddleMKLDNN(bool pd_mkldnn = true);
|
||||
void EnablePaddleToTrt();
|
||||
void DeletePaddleBackendPass(const std::string& delete_pass_name);
|
||||
|
@@ -56,7 +56,7 @@ class FASTDEPLOY_DECL RKYOLOPostprocessor {
|
||||
float GetNMSThreshold() const { return nms_threshold_; }
|
||||
|
||||
/// Set height and weight
|
||||
void SetHeightAndWeight(int height,int width) {
|
||||
void SetHeightAndWeight(int height, int width) {
|
||||
height_ = height;
|
||||
width_ = width;
|
||||
}
|
||||
|
Reference in New Issue
Block a user