[Benchmark] Remove pmap and use mem api (#1309)

* add GPL lisence

* add GPL-3.0 lisence

* add GPL-3.0 lisence

* add GPL-3.0 lisence

* support yolov8

* add pybind for yolov8

* add yolov8 readme

* add cpp benchmark

* add cpu and gpu mem

* public part split

* add runtime mode

* fixed bugs

* add cpu_thread_nums

* deal with comments

* deal with comments

* deal with comments

* rm useless code

* add FASTDEPLOY_DECL

* add FASTDEPLOY_DECL

* fixed for windows

* mv rss to pss

* mv rss to pss

* Update utils.cc

* use thread to collect mem

* Add ResourceUsageMonitor

* rm useless code

* fixed bug

* fixed typo

* update ResourceUsageMonitor

* fixed bug

* fixed bug

* add note for ResourceUsageMonitor

* deal with comments

* add macros

* deal with comments

* deal with comments

* deal with comments

* re-lint

* rm pmap and use mem api

* rm pmap and use mem api

* add mem api

* Add PrintBenchmarkInfo func

* Add PrintBenchmarkInfo func

* Add PrintBenchmarkInfo func

* deal with comments

---------

Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
This commit is contained in:
WJJ1995
2023-02-14 15:00:05 +08:00
committed by GitHub
parent abae858f09
commit 2dfda1db85
6 changed files with 74 additions and 51 deletions

9
benchmark/cpp/benchmark_ppyolov8.cc Normal file → Executable file
View File

@@ -12,16 +12,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "flags.h"
#include "macros.h"
#include "flags.h"
#include "option.h"
#ifdef WIN32
const char sep = '\\';
#else
const char sep = '/';
#endif
int main(int argc, char* argv[]) {
google::ParseCommandLineFlags(&argc, &argv, true);
auto im = cv::imread(FLAGS_image);
@@ -31,6 +25,7 @@ int main(int argc, char* argv[]) {
PrintUsage();
return false;
}
PrintBenchmarkInfo();
auto model_file = FLAGS_model + sep + "model.pdmodel";
auto params_file = FLAGS_model + sep + "model.pdiparams";
auto config_file = FLAGS_model + sep + "infer_cfg.yml";

3
benchmark/cpp/benchmark_yolov5.cc Normal file → Executable file
View File

@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "flags.h"
#include "macros.h"
#include "flags.h"
#include "option.h"
int main(int argc, char* argv[]) {
@@ -25,6 +25,7 @@ int main(int argc, char* argv[]) {
PrintUsage();
return false;
}
PrintBenchmarkInfo();
auto model_yolov5 =
fastdeploy::vision::detection::YOLOv5(FLAGS_model, "", option);
fastdeploy::vision::DetectionResult res;

View File

@@ -16,6 +16,12 @@
#include "gflags/gflags.h"
#ifdef WIN32
const char sep = '\\';
#else
const char sep = '/';
#endif
DEFINE_string(model, "", "Directory of the inference model.");
DEFINE_string(image, "", "Path of the image file.");
DEFINE_string(device, "cpu",
@@ -48,3 +54,35 @@ void PrintUsage() {
std::cout << "Default value of backend: default" << std::endl;
std::cout << "Default value of use_fp16: false" << std::endl;
}
void PrintBenchmarkInfo() {
// Get model name
std::vector<std::string> model_names;
fastdeploy::benchmark::Split(FLAGS_model, model_names, sep);
// Save benchmark info
std::stringstream ss;
ss.precision(3);
ss << "\n======= Model Info =======\n";
ss << "model_name: " << model_names[model_names.size() - 1] << std::endl;
ss << "profile_mode: " << FLAGS_profile_mode << std::endl;
if (FLAGS_profile_mode == "runtime") {
ss << "include_h2d_d2h: " << FLAGS_include_h2d_d2h << std::endl;
}
ss << "\n======= Backend Info =======\n";
ss << "warmup: " << FLAGS_warmup << std::endl;
ss << "repeats: " << FLAGS_repeat << std::endl;
ss << "device: " << FLAGS_device << std::endl;
if (FLAGS_device == "gpu") {
ss << "device_id: " << FLAGS_device_id << std::endl;
}
ss << "backend: " << FLAGS_backend << std::endl;
ss << "cpu_thread_nums: " << FLAGS_cpu_thread_nums << std::endl;
ss << "use_fp16: " << FLAGS_use_fp16 << std::endl;
ss << "collect_memory_info: " << FLAGS_collect_memory_info << std::endl;
if (FLAGS_collect_memory_info) {
ss << "sampling_interval: " << std::to_string(FLAGS_sampling_interval)
<< "ms" << std::endl;
}
std::cout << ss.str() << std::endl;
return;
}

View File

@@ -18,7 +18,6 @@
#define BENCHMARK_MODEL(MODEL_NAME, BENCHMARK_FUNC) \
{ \
std::cout << "====" << #MODEL_NAME << "====" << std::endl; \
if (!MODEL_NAME.Initialized()) { \
std::cerr << "Failed to initialize." << std::endl; \
return 0; \
@@ -62,8 +61,8 @@
float __cpu_mem__ = __resource_moniter__.GetMaxCpuMem(); \
float __gpu_mem__ = __resource_moniter__.GetMaxGpuMem(); \
float __gpu_util__ = __resource_moniter__.GetMaxGpuUtil(); \
std::cout << "cpu_pss_mb: " << __cpu_mem__ << "MB." << std::endl; \
std::cout << "gpu_pss_mb: " << __gpu_mem__ << "MB." << std::endl; \
std::cout << "cpu_rss_mb: " << __cpu_mem__ << "MB." << std::endl; \
std::cout << "gpu_rss_mb: " << __gpu_mem__ << "MB." << std::endl; \
std::cout << "gpu_util: " << __gpu_util__ << std::endl; \
__resource_moniter__.Stop(); \
} \

View File

@@ -13,8 +13,8 @@
// limitations under the License.
#include <sys/types.h>
#if defined(__linux__) || defined(__ANDROID__)
#include <unistd.h>
#ifdef __linux__
#include <sys/resource.h>
#endif
#include <cmath>
@@ -23,8 +23,7 @@
namespace fastdeploy {
namespace benchmark {
// Remove the ch characters at both ends of str
static std::string strip(const std::string& str, char ch = ' ') {
std::string Strip(const std::string& str, char ch) {
int i = 0;
while (str[i] == ch) {
i++;
@@ -36,9 +35,8 @@ static std::string strip(const std::string& str, char ch = ' ') {
return str.substr(i, j + 1 - i);
}
// Split string
static void split(const std::string& s, std::vector<std::string>& tokens,
char delim = ' ') {
void Split(const std::string& s, std::vector<std::string>& tokens,
char delim) {
tokens.clear();
size_t lastPos = s.find_first_not_of(delim, 0);
size_t pos = s.find(delim, lastPos);
@@ -54,7 +52,7 @@ ResourceUsageMonitor::ResourceUsageMonitor(int sampling_interval_ms, int gpu_id)
: is_supported_(false),
sampling_interval_(sampling_interval_ms),
gpu_id_(gpu_id) {
#if defined(__linux__) || defined(__ANDROID__)
#ifdef __linux__
is_supported_ = true;
#else
is_supported_ = false;
@@ -67,7 +65,9 @@ ResourceUsageMonitor::ResourceUsageMonitor(int sampling_interval_ms, int gpu_id)
}
void ResourceUsageMonitor::Start() {
if (!is_supported_) return;
if (!is_supported_) {
return;
}
if (check_memory_thd_ != nullptr) {
FDINFO << "Memory monitoring has already started!" << std::endl;
return;
@@ -77,20 +77,24 @@ void ResourceUsageMonitor::Start() {
check_memory_thd_.reset(new std::thread(([this]() {
// Note we retrieve the memory usage at the very beginning of the thread.
while (true) {
std::string cpu_mem_info = GetCurrentCpuMemoryInfo();
// get max_cpu_mem
std::vector<std::string> cpu_tokens;
split(cpu_mem_info, cpu_tokens, ' ');
max_cpu_mem_ = std::max(max_cpu_mem_, stof(cpu_tokens[3]) / 1024);
#ifdef __linux__
rusage res;
if (getrusage(RUSAGE_SELF, &res) == 0) {
max_cpu_mem_ =
std::max(max_cpu_mem_, static_cast<float>(res.ru_maxrss / 1024.0));
}
#endif
#if defined(WITH_GPU)
std::string gpu_mem_info = GetCurrentGpuMemoryInfo(gpu_id_);
// get max_gpu_mem and max_gpu_util
std::vector<std::string> gpu_tokens;
split(gpu_mem_info, gpu_tokens, ',');
Split(gpu_mem_info, gpu_tokens, ',');
max_gpu_mem_ = std::max(max_gpu_mem_, stof(gpu_tokens[6]));
max_gpu_util_ = std::max(max_gpu_util_, stof(gpu_tokens[7]));
#endif
if (stop_signal_) break;
if (stop_signal_) {
break;
}
std::this_thread::sleep_for(
std::chrono::milliseconds(sampling_interval_));
}
@@ -121,26 +125,6 @@ void ResourceUsageMonitor::StopInternal() {
check_memory_thd_.reset(nullptr);
}
std::string ResourceUsageMonitor::GetCurrentCpuMemoryInfo() {
std::string result = "";
#if defined(__linux__) || defined(__ANDROID__)
int iPid = static_cast<int>(getpid());
std::string command = "pmap -x " + std::to_string(iPid) + " | grep total";
FILE* pp = popen(command.data(), "r");
if (!pp) return "";
char tmp[1024];
while (fgets(tmp, sizeof(tmp), pp) != NULL) {
result += tmp;
}
pclose(pp);
#else
FDASSERT(false,
"Currently collect cpu memory info only supports Linux and ANDROID.")
#endif
return result;
}
std::string ResourceUsageMonitor::GetCurrentGpuMemoryInfo(int device_id) {
std::string result = "";
#if defined(__linux__) && defined(WITH_GPU)

View File

@@ -65,20 +65,26 @@ class FASTDEPLOY_DECL ResourceUsageMonitor {
private:
void StopInternal();
// Get current cpu memory info
std::string GetCurrentCpuMemoryInfo();
// Get current gpu memory info
std::string GetCurrentGpuMemoryInfo(int device_id);
bool is_supported_ = false;
bool stop_signal_ = false;
const int sampling_interval_;
float max_cpu_mem_ = 0.0f;
float max_gpu_mem_ = 0.0f;
float max_cpu_mem_ = 0.0f; // MB
float max_gpu_mem_ = 0.0f; // MB
float max_gpu_util_ = 0.0f;
const int gpu_id_ = 0;
std::unique_ptr<std::thread> check_memory_thd_ = nullptr;
};
// Remove the ch characters at both ends of str
FASTDEPLOY_DECL std::string Strip(const std::string& str, char ch = ' ');
// Split string
FASTDEPLOY_DECL void Split(const std::string& s,
std::vector<std::string>& tokens,
char delim = ' ');
} // namespace benchmark
} // namespace fastdeploy