mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 00:57:33 +08:00
[Benchmark] Remove pmap and use mem api (#1309)
* add GPL lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * support yolov8 * add pybind for yolov8 * add yolov8 readme * add cpp benchmark * add cpu and gpu mem * public part split * add runtime mode * fixed bugs * add cpu_thread_nums * deal with comments * deal with comments * deal with comments * rm useless code * add FASTDEPLOY_DECL * add FASTDEPLOY_DECL * fixed for windows * mv rss to pss * mv rss to pss * Update utils.cc * use thread to collect mem * Add ResourceUsageMonitor * rm useless code * fixed bug * fixed typo * update ResourceUsageMonitor * fixed bug * fixed bug * add note for ResourceUsageMonitor * deal with comments * add macros * deal with comments * deal with comments * deal with comments * re-lint * rm pmap and use mem api * rm pmap and use mem api * add mem api * Add PrintBenchmarkInfo func * Add PrintBenchmarkInfo func * Add PrintBenchmarkInfo func * deal with comments --------- Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
This commit is contained in:
9
benchmark/cpp/benchmark_ppyolov8.cc
Normal file → Executable file
9
benchmark/cpp/benchmark_ppyolov8.cc
Normal file → Executable file
@@ -12,16 +12,10 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "flags.h"
|
||||
#include "macros.h"
|
||||
#include "flags.h"
|
||||
#include "option.h"
|
||||
|
||||
#ifdef WIN32
|
||||
const char sep = '\\';
|
||||
#else
|
||||
const char sep = '/';
|
||||
#endif
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
google::ParseCommandLineFlags(&argc, &argv, true);
|
||||
auto im = cv::imread(FLAGS_image);
|
||||
@@ -31,6 +25,7 @@ int main(int argc, char* argv[]) {
|
||||
PrintUsage();
|
||||
return false;
|
||||
}
|
||||
PrintBenchmarkInfo();
|
||||
auto model_file = FLAGS_model + sep + "model.pdmodel";
|
||||
auto params_file = FLAGS_model + sep + "model.pdiparams";
|
||||
auto config_file = FLAGS_model + sep + "infer_cfg.yml";
|
||||
|
3
benchmark/cpp/benchmark_yolov5.cc
Normal file → Executable file
3
benchmark/cpp/benchmark_yolov5.cc
Normal file → Executable file
@@ -12,8 +12,8 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "flags.h"
|
||||
#include "macros.h"
|
||||
#include "flags.h"
|
||||
#include "option.h"
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
@@ -25,6 +25,7 @@ int main(int argc, char* argv[]) {
|
||||
PrintUsage();
|
||||
return false;
|
||||
}
|
||||
PrintBenchmarkInfo();
|
||||
auto model_yolov5 =
|
||||
fastdeploy::vision::detection::YOLOv5(FLAGS_model, "", option);
|
||||
fastdeploy::vision::DetectionResult res;
|
||||
|
@@ -16,6 +16,12 @@
|
||||
|
||||
#include "gflags/gflags.h"
|
||||
|
||||
#ifdef WIN32
|
||||
const char sep = '\\';
|
||||
#else
|
||||
const char sep = '/';
|
||||
#endif
|
||||
|
||||
DEFINE_string(model, "", "Directory of the inference model.");
|
||||
DEFINE_string(image, "", "Path of the image file.");
|
||||
DEFINE_string(device, "cpu",
|
||||
@@ -48,3 +54,35 @@ void PrintUsage() {
|
||||
std::cout << "Default value of backend: default" << std::endl;
|
||||
std::cout << "Default value of use_fp16: false" << std::endl;
|
||||
}
|
||||
|
||||
void PrintBenchmarkInfo() {
|
||||
// Get model name
|
||||
std::vector<std::string> model_names;
|
||||
fastdeploy::benchmark::Split(FLAGS_model, model_names, sep);
|
||||
// Save benchmark info
|
||||
std::stringstream ss;
|
||||
ss.precision(3);
|
||||
ss << "\n======= Model Info =======\n";
|
||||
ss << "model_name: " << model_names[model_names.size() - 1] << std::endl;
|
||||
ss << "profile_mode: " << FLAGS_profile_mode << std::endl;
|
||||
if (FLAGS_profile_mode == "runtime") {
|
||||
ss << "include_h2d_d2h: " << FLAGS_include_h2d_d2h << std::endl;
|
||||
}
|
||||
ss << "\n======= Backend Info =======\n";
|
||||
ss << "warmup: " << FLAGS_warmup << std::endl;
|
||||
ss << "repeats: " << FLAGS_repeat << std::endl;
|
||||
ss << "device: " << FLAGS_device << std::endl;
|
||||
if (FLAGS_device == "gpu") {
|
||||
ss << "device_id: " << FLAGS_device_id << std::endl;
|
||||
}
|
||||
ss << "backend: " << FLAGS_backend << std::endl;
|
||||
ss << "cpu_thread_nums: " << FLAGS_cpu_thread_nums << std::endl;
|
||||
ss << "use_fp16: " << FLAGS_use_fp16 << std::endl;
|
||||
ss << "collect_memory_info: " << FLAGS_collect_memory_info << std::endl;
|
||||
if (FLAGS_collect_memory_info) {
|
||||
ss << "sampling_interval: " << std::to_string(FLAGS_sampling_interval)
|
||||
<< "ms" << std::endl;
|
||||
}
|
||||
std::cout << ss.str() << std::endl;
|
||||
return;
|
||||
}
|
||||
|
@@ -18,7 +18,6 @@
|
||||
|
||||
#define BENCHMARK_MODEL(MODEL_NAME, BENCHMARK_FUNC) \
|
||||
{ \
|
||||
std::cout << "====" << #MODEL_NAME << "====" << std::endl; \
|
||||
if (!MODEL_NAME.Initialized()) { \
|
||||
std::cerr << "Failed to initialize." << std::endl; \
|
||||
return 0; \
|
||||
@@ -62,8 +61,8 @@
|
||||
float __cpu_mem__ = __resource_moniter__.GetMaxCpuMem(); \
|
||||
float __gpu_mem__ = __resource_moniter__.GetMaxGpuMem(); \
|
||||
float __gpu_util__ = __resource_moniter__.GetMaxGpuUtil(); \
|
||||
std::cout << "cpu_pss_mb: " << __cpu_mem__ << "MB." << std::endl; \
|
||||
std::cout << "gpu_pss_mb: " << __gpu_mem__ << "MB." << std::endl; \
|
||||
std::cout << "cpu_rss_mb: " << __cpu_mem__ << "MB." << std::endl; \
|
||||
std::cout << "gpu_rss_mb: " << __gpu_mem__ << "MB." << std::endl; \
|
||||
std::cout << "gpu_util: " << __gpu_util__ << std::endl; \
|
||||
__resource_moniter__.Stop(); \
|
||||
} \
|
||||
|
@@ -13,8 +13,8 @@
|
||||
// limitations under the License.
|
||||
|
||||
#include <sys/types.h>
|
||||
#if defined(__linux__) || defined(__ANDROID__)
|
||||
#include <unistd.h>
|
||||
#ifdef __linux__
|
||||
#include <sys/resource.h>
|
||||
#endif
|
||||
#include <cmath>
|
||||
|
||||
@@ -23,8 +23,7 @@
|
||||
namespace fastdeploy {
|
||||
namespace benchmark {
|
||||
|
||||
// Remove the ch characters at both ends of str
|
||||
static std::string strip(const std::string& str, char ch = ' ') {
|
||||
std::string Strip(const std::string& str, char ch) {
|
||||
int i = 0;
|
||||
while (str[i] == ch) {
|
||||
i++;
|
||||
@@ -36,9 +35,8 @@ static std::string strip(const std::string& str, char ch = ' ') {
|
||||
return str.substr(i, j + 1 - i);
|
||||
}
|
||||
|
||||
// Split string
|
||||
static void split(const std::string& s, std::vector<std::string>& tokens,
|
||||
char delim = ' ') {
|
||||
void Split(const std::string& s, std::vector<std::string>& tokens,
|
||||
char delim) {
|
||||
tokens.clear();
|
||||
size_t lastPos = s.find_first_not_of(delim, 0);
|
||||
size_t pos = s.find(delim, lastPos);
|
||||
@@ -54,7 +52,7 @@ ResourceUsageMonitor::ResourceUsageMonitor(int sampling_interval_ms, int gpu_id)
|
||||
: is_supported_(false),
|
||||
sampling_interval_(sampling_interval_ms),
|
||||
gpu_id_(gpu_id) {
|
||||
#if defined(__linux__) || defined(__ANDROID__)
|
||||
#ifdef __linux__
|
||||
is_supported_ = true;
|
||||
#else
|
||||
is_supported_ = false;
|
||||
@@ -67,7 +65,9 @@ ResourceUsageMonitor::ResourceUsageMonitor(int sampling_interval_ms, int gpu_id)
|
||||
}
|
||||
|
||||
void ResourceUsageMonitor::Start() {
|
||||
if (!is_supported_) return;
|
||||
if (!is_supported_) {
|
||||
return;
|
||||
}
|
||||
if (check_memory_thd_ != nullptr) {
|
||||
FDINFO << "Memory monitoring has already started!" << std::endl;
|
||||
return;
|
||||
@@ -77,20 +77,24 @@ void ResourceUsageMonitor::Start() {
|
||||
check_memory_thd_.reset(new std::thread(([this]() {
|
||||
// Note we retrieve the memory usage at the very beginning of the thread.
|
||||
while (true) {
|
||||
std::string cpu_mem_info = GetCurrentCpuMemoryInfo();
|
||||
// get max_cpu_mem
|
||||
std::vector<std::string> cpu_tokens;
|
||||
split(cpu_mem_info, cpu_tokens, ' ');
|
||||
max_cpu_mem_ = std::max(max_cpu_mem_, stof(cpu_tokens[3]) / 1024);
|
||||
#ifdef __linux__
|
||||
rusage res;
|
||||
if (getrusage(RUSAGE_SELF, &res) == 0) {
|
||||
max_cpu_mem_ =
|
||||
std::max(max_cpu_mem_, static_cast<float>(res.ru_maxrss / 1024.0));
|
||||
}
|
||||
#endif
|
||||
#if defined(WITH_GPU)
|
||||
std::string gpu_mem_info = GetCurrentGpuMemoryInfo(gpu_id_);
|
||||
// get max_gpu_mem and max_gpu_util
|
||||
std::vector<std::string> gpu_tokens;
|
||||
split(gpu_mem_info, gpu_tokens, ',');
|
||||
Split(gpu_mem_info, gpu_tokens, ',');
|
||||
max_gpu_mem_ = std::max(max_gpu_mem_, stof(gpu_tokens[6]));
|
||||
max_gpu_util_ = std::max(max_gpu_util_, stof(gpu_tokens[7]));
|
||||
#endif
|
||||
if (stop_signal_) break;
|
||||
if (stop_signal_) {
|
||||
break;
|
||||
}
|
||||
std::this_thread::sleep_for(
|
||||
std::chrono::milliseconds(sampling_interval_));
|
||||
}
|
||||
@@ -121,26 +125,6 @@ void ResourceUsageMonitor::StopInternal() {
|
||||
check_memory_thd_.reset(nullptr);
|
||||
}
|
||||
|
||||
std::string ResourceUsageMonitor::GetCurrentCpuMemoryInfo() {
|
||||
std::string result = "";
|
||||
#if defined(__linux__) || defined(__ANDROID__)
|
||||
int iPid = static_cast<int>(getpid());
|
||||
std::string command = "pmap -x " + std::to_string(iPid) + " | grep total";
|
||||
FILE* pp = popen(command.data(), "r");
|
||||
if (!pp) return "";
|
||||
char tmp[1024];
|
||||
|
||||
while (fgets(tmp, sizeof(tmp), pp) != NULL) {
|
||||
result += tmp;
|
||||
}
|
||||
pclose(pp);
|
||||
#else
|
||||
FDASSERT(false,
|
||||
"Currently collect cpu memory info only supports Linux and ANDROID.")
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string ResourceUsageMonitor::GetCurrentGpuMemoryInfo(int device_id) {
|
||||
std::string result = "";
|
||||
#if defined(__linux__) && defined(WITH_GPU)
|
||||
|
@@ -65,20 +65,26 @@ class FASTDEPLOY_DECL ResourceUsageMonitor {
|
||||
|
||||
private:
|
||||
void StopInternal();
|
||||
// Get current cpu memory info
|
||||
std::string GetCurrentCpuMemoryInfo();
|
||||
// Get current gpu memory info
|
||||
std::string GetCurrentGpuMemoryInfo(int device_id);
|
||||
|
||||
bool is_supported_ = false;
|
||||
bool stop_signal_ = false;
|
||||
const int sampling_interval_;
|
||||
float max_cpu_mem_ = 0.0f;
|
||||
float max_gpu_mem_ = 0.0f;
|
||||
float max_cpu_mem_ = 0.0f; // MB
|
||||
float max_gpu_mem_ = 0.0f; // MB
|
||||
float max_gpu_util_ = 0.0f;
|
||||
const int gpu_id_ = 0;
|
||||
std::unique_ptr<std::thread> check_memory_thd_ = nullptr;
|
||||
};
|
||||
|
||||
// Remove the ch characters at both ends of str
|
||||
FASTDEPLOY_DECL std::string Strip(const std::string& str, char ch = ' ');
|
||||
|
||||
// Split string
|
||||
FASTDEPLOY_DECL void Split(const std::string& s,
|
||||
std::vector<std::string>& tokens,
|
||||
char delim = ' ');
|
||||
|
||||
} // namespace benchmark
|
||||
} // namespace fastdeploy
|
||||
|
Reference in New Issue
Block a user