[Benchmark] Add run_benchmark_cpu.sh (#1465)

* add GPL lisence

* add GPL-3.0 lisence

* add GPL-3.0 lisence

* add GPL-3.0 lisence

* support yolov8

* add pybind for yolov8

* add yolov8 readme

* add cpp benchmark

* add cpu and gpu mem

* public part split

* add runtime mode

* fixed bugs

* add cpu_thread_nums

* deal with comments

* deal with comments

* deal with comments

* rm useless code

* add FASTDEPLOY_DECL

* add FASTDEPLOY_DECL

* fixed for windows

* mv rss to pss

* mv rss to pss

* Update utils.cc

* use thread to collect mem

* Add ResourceUsageMonitor

* rm useless code

* fixed bug

* fixed typo

* update ResourceUsageMonitor

* fixed bug

* fixed bug

* add note for ResourceUsageMonitor

* deal with comments

* add macros

* deal with comments

* deal with comments

* deal with comments

* re-lint

* rm pmap and use mem api

* rm pmap and use mem api

* add mem api

* Add PrintBenchmarkInfo func

* Add PrintBenchmarkInfo func

* Add PrintBenchmarkInfo func

* deal with comments

* fixed enable_paddle_to_trt

* add log for paddle_trt

* support ppcls benchmark

* use new trt option api

* update benchmark info

* simplify benchmark.cc

* simplify benchmark.cc

* deal with comments

* Add ppseg && ppocr benchmark

* add OCR rec img

* add ocr benchmark

* fixed trt shape

* add trt shape

* resolve conflict

* add ENABLE_BENCHMARK define

* Add ClassifyDiff

* Add Resize for ClassifyResult

* deal with comments

* add convert info script

* resolve conflict

* Add SaveBenchmarkResult func

* fixed bug

* fixed bug

* fixed bug

* add config.txt for option

* fixed bug

* fixed bug

* fixed bug

* add benchmark.sh

* mv thread_nums from 8 to 1

* deal with comments

* deal with comments

* fixed readme

* deal with comments

---------

Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
This commit is contained in:
WJJ1995
2023-03-02 19:11:21 +08:00
committed by GitHub
parent a157da17a4
commit 99b1bc5d6e
17 changed files with 432 additions and 353 deletions

View File

@@ -22,15 +22,18 @@
std::cerr << "Failed to initialize." << std::endl; \
return 0; \
} \
auto __im__ = cv::imread(FLAGS_image); \
std::unordered_map<std::string, std::string> __config_info__; \
fastdeploy::benchmark::ResultManager::LoadBenchmarkConfig( \
FLAGS_config_path, &__config_info__); \
std::stringstream __ss__; \
__ss__.precision(6); \
fastdeploy::benchmark::ResourceUsageMonitor __resource_moniter__( \
FLAGS_sampling_interval, FLAGS_device_id); \
if (FLAGS_collect_memory_info) { \
std::stoi(__config_info__["sampling_interval"]), \
std::stoi(__config_info__["device_id"])); \
if (__config_info__["collect_memory_info"] == "true") { \
__resource_moniter__.Start(); \
} \
if (FLAGS_profile_mode == "runtime") { \
if (__config_info__["profile_mode"] == "runtime") { \
if (!BENCHMARK_FUNC) { \
std::cerr << "Failed to predict." << std::endl; \
return 0; \
@@ -39,29 +42,35 @@
std::cout << "Runtime(ms): " << __profile_time__ << "ms." << std::endl; \
__ss__ << "Runtime(ms): " << __profile_time__ << "ms." << std::endl; \
} else { \
std::cout << "Warmup " << FLAGS_warmup << " times..." << std::endl; \
for (int __i__ = 0; __i__ < FLAGS_warmup; __i__++) { \
std::cout << "Warmup " \
<< __config_info__["warmup"] \
<< " times..." << std::endl; \
int __warmup__ = std::stoi(__config_info__["warmup"]); \
for (int __i__ = 0; __i__ < __warmup__; __i__++) { \
if (!BENCHMARK_FUNC) { \
std::cerr << "Failed to predict." << std::endl; \
return 0; \
} \
} \
std::cout << "Counting time..." << std::endl; \
std::cout << "Repeat " << FLAGS_repeat << " times..." << std::endl; \
std::cout << "Repeat " \
<< __config_info__["repeat"] \
<< " times..." << std::endl; \
fastdeploy::TimeCounter __tc__; \
__tc__.Start(); \
for (int __i__ = 0; __i__ < FLAGS_repeat; __i__++) { \
int __repeat__ = std::stoi(__config_info__["repeat"]); \
for (int __i__ = 0; __i__ < __repeat__; __i__++) { \
if (!BENCHMARK_FUNC) { \
std::cerr << "Failed to predict." << std::endl; \
return 0; \
} \
} \
__tc__.End(); \
double __end2end__ = __tc__.Duration() / FLAGS_repeat * 1000; \
double __end2end__ = __tc__.Duration() / __repeat__ * 1000; \
std::cout << "End2End(ms): " << __end2end__ << "ms." << std::endl; \
__ss__ << "End2End(ms): " << __end2end__ << "ms." << std::endl; \
} \
if (FLAGS_collect_memory_info) { \
if (__config_info__["collect_memory_info"] == "true") { \
float __cpu_mem__ = __resource_moniter__.GetMaxCpuMem(); \
float __gpu_mem__ = __resource_moniter__.GetMaxGpuMem(); \
float __gpu_util__ = __resource_moniter__.GetMaxGpuUtil(); \
@@ -74,5 +83,5 @@
__resource_moniter__.Stop(); \
} \
fastdeploy::benchmark::ResultManager::SaveBenchmarkResult(__ss__.str(), \
FLAGS_result_path); \
__config_info__["result_path"]); \
}