mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 00:33:03 +08:00
[Benchmark] Add SaveBenchmarkResult func for benchmark (#1442)
* add GPL lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * support yolov8 * add pybind for yolov8 * add yolov8 readme * add cpp benchmark * add cpu and gpu mem * public part split * add runtime mode * fixed bugs * add cpu_thread_nums * deal with comments * deal with comments * deal with comments * rm useless code * add FASTDEPLOY_DECL * add FASTDEPLOY_DECL * fixed for windows * mv rss to pss * mv rss to pss * Update utils.cc * use thread to collect mem * Add ResourceUsageMonitor * rm useless code * fixed bug * fixed typo * update ResourceUsageMonitor * fixed bug * fixed bug * add note for ResourceUsageMonitor * deal with comments * add macros * deal with comments * deal with comments * deal with comments * re-lint * rm pmap and use mem api * rm pmap and use mem api * add mem api * Add PrintBenchmarkInfo func * Add PrintBenchmarkInfo func * Add PrintBenchmarkInfo func * deal with comments * fixed enable_paddle_to_trt * add log for paddle_trt * support ppcls benchmark * use new trt option api * update benchmark info * simplify benchmark.cc * simplify benchmark.cc * deal with comments * Add ppseg && ppocr benchmark * add OCR rec img * add ocr benchmark * fixed trt shape * add trt shape * resolve conflict * add ENABLE_BENCHMARK define * Add ClassifyDiff * Add Resize for ClassifyResult * deal with comments * add convert info script * resolve conflict * Add SaveBenchmarkResult func * fixed bug * fixed bug * fixed bug --------- Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
This commit is contained in:
@@ -36,4 +36,7 @@ else()
|
|||||||
target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags)
|
target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags)
|
||||||
target_link_libraries(benchmark_ppocr_rec ${FASTDEPLOY_LIBS} gflags)
|
target_link_libraries(benchmark_ppocr_rec ${FASTDEPLOY_LIBS} gflags)
|
||||||
endif()
|
endif()
|
||||||
install_fastdeploy_libraries(${CMAKE_CURRENT_BINARY_DIR})
|
# only for Android ADB test
|
||||||
|
if(ANDROID)
|
||||||
|
install_fastdeploy_libraries(${CMAKE_CURRENT_BINARY_DIR})
|
||||||
|
endif()
|
||||||
|
0
benchmark/cpp/benchmark_ppocr_cls.cc
Normal file → Executable file
0
benchmark/cpp/benchmark_ppocr_cls.cc
Normal file → Executable file
0
benchmark/cpp/benchmark_ppocr_det.cc
Normal file → Executable file
0
benchmark/cpp/benchmark_ppocr_det.cc
Normal file → Executable file
0
benchmark/cpp/benchmark_ppocr_rec.cc
Normal file → Executable file
0
benchmark/cpp/benchmark_ppocr_rec.cc
Normal file → Executable file
0
benchmark/cpp/benchmark_precision_ppyolov8.cc
Normal file → Executable file
0
benchmark/cpp/benchmark_precision_ppyolov8.cc
Normal file → Executable file
0
benchmark/cpp/benchmark_yolov5.cc
Normal file → Executable file
0
benchmark/cpp/benchmark_yolov5.cc
Normal file → Executable file
@@ -44,6 +44,7 @@ DEFINE_bool(
|
|||||||
DEFINE_bool(
|
DEFINE_bool(
|
||||||
collect_memory_info, false, "Whether to collect memory info");
|
collect_memory_info, false, "Whether to collect memory info");
|
||||||
DEFINE_int32(sampling_interval, 50, "How often to collect memory info(ms).");
|
DEFINE_int32(sampling_interval, 50, "How often to collect memory info(ms).");
|
||||||
|
DEFINE_string(result_path, "benchmark.txt", "Path of benchmark result file.");
|
||||||
|
|
||||||
static void PrintUsage() {
|
static void PrintUsage() {
|
||||||
std::cout << "Usage: infer_demo --model model_path --image img_path --device "
|
std::cout << "Usage: infer_demo --model model_path --image img_path --device "
|
||||||
@@ -92,6 +93,9 @@ static void PrintBenchmarkInfo() {
|
|||||||
<< "ms" << std::endl;
|
<< "ms" << std::endl;
|
||||||
}
|
}
|
||||||
std::cout << ss.str() << std::endl;
|
std::cout << ss.str() << std::endl;
|
||||||
|
// Save benchmark info
|
||||||
|
fastdeploy::benchmark::ResultManager::SaveBenchmarkResult(ss.str(),
|
||||||
|
FLAGS_result_path);
|
||||||
#endif
|
#endif
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@@ -23,6 +23,8 @@
|
|||||||
return 0; \
|
return 0; \
|
||||||
} \
|
} \
|
||||||
auto __im__ = cv::imread(FLAGS_image); \
|
auto __im__ = cv::imread(FLAGS_image); \
|
||||||
|
std::stringstream __ss__; \
|
||||||
|
__ss__.precision(6); \
|
||||||
fastdeploy::benchmark::ResourceUsageMonitor __resource_moniter__( \
|
fastdeploy::benchmark::ResourceUsageMonitor __resource_moniter__( \
|
||||||
FLAGS_sampling_interval, FLAGS_device_id); \
|
FLAGS_sampling_interval, FLAGS_device_id); \
|
||||||
if (FLAGS_collect_memory_info) { \
|
if (FLAGS_collect_memory_info) { \
|
||||||
@@ -35,6 +37,7 @@
|
|||||||
} \
|
} \
|
||||||
double __profile_time__ = MODEL_NAME.GetProfileTime() * 1000; \
|
double __profile_time__ = MODEL_NAME.GetProfileTime() * 1000; \
|
||||||
std::cout << "Runtime(ms): " << __profile_time__ << "ms." << std::endl; \
|
std::cout << "Runtime(ms): " << __profile_time__ << "ms." << std::endl; \
|
||||||
|
__ss__ << "Runtime(ms): " << __profile_time__ << "ms." << std::endl; \
|
||||||
} else { \
|
} else { \
|
||||||
std::cout << "Warmup " << FLAGS_warmup << " times..." << std::endl; \
|
std::cout << "Warmup " << FLAGS_warmup << " times..." << std::endl; \
|
||||||
for (int __i__ = 0; __i__ < FLAGS_warmup; __i__++) { \
|
for (int __i__ = 0; __i__ < FLAGS_warmup; __i__++) { \
|
||||||
@@ -56,14 +59,20 @@
|
|||||||
__tc__.End(); \
|
__tc__.End(); \
|
||||||
double __end2end__ = __tc__.Duration() / FLAGS_repeat * 1000; \
|
double __end2end__ = __tc__.Duration() / FLAGS_repeat * 1000; \
|
||||||
std::cout << "End2End(ms): " << __end2end__ << "ms." << std::endl; \
|
std::cout << "End2End(ms): " << __end2end__ << "ms." << std::endl; \
|
||||||
|
__ss__ << "End2End(ms): " << __end2end__ << "ms." << std::endl; \
|
||||||
} \
|
} \
|
||||||
if (FLAGS_collect_memory_info) { \
|
if (FLAGS_collect_memory_info) { \
|
||||||
float __cpu_mem__ = __resource_moniter__.GetMaxCpuMem(); \
|
float __cpu_mem__ = __resource_moniter__.GetMaxCpuMem(); \
|
||||||
float __gpu_mem__ = __resource_moniter__.GetMaxGpuMem(); \
|
float __gpu_mem__ = __resource_moniter__.GetMaxGpuMem(); \
|
||||||
float __gpu_util__ = __resource_moniter__.GetMaxGpuUtil(); \
|
float __gpu_util__ = __resource_moniter__.GetMaxGpuUtil(); \
|
||||||
std::cout << "cpu_rss_mb: " << __cpu_mem__ << "MB." << std::endl; \
|
std::cout << "cpu_rss_mb: " << __cpu_mem__ << "MB." << std::endl; \
|
||||||
|
__ss__ << "cpu_rss_mb: " << __cpu_mem__ << "MB." << std::endl; \
|
||||||
std::cout << "gpu_rss_mb: " << __gpu_mem__ << "MB." << std::endl; \
|
std::cout << "gpu_rss_mb: " << __gpu_mem__ << "MB." << std::endl; \
|
||||||
|
__ss__ << "gpu_rss_mb: " << __gpu_mem__ << "MB." << std::endl; \
|
||||||
std::cout << "gpu_util: " << __gpu_util__ << std::endl; \
|
std::cout << "gpu_util: " << __gpu_util__ << std::endl; \
|
||||||
|
__ss__ << "gpu_rss_mb: " << __gpu_mem__ << "MB." << std::endl; \
|
||||||
__resource_moniter__.Stop(); \
|
__resource_moniter__.Stop(); \
|
||||||
} \
|
} \
|
||||||
|
fastdeploy::benchmark::ResultManager::SaveBenchmarkResult(__ss__.str(), \
|
||||||
|
FLAGS_result_path); \
|
||||||
}
|
}
|
||||||
|
15
fastdeploy/benchmark/utils.cc
Executable file → Normal file
15
fastdeploy/benchmark/utils.cc
Executable file → Normal file
@@ -350,6 +350,21 @@ TensorDiff ResultManager::CalculateDiffStatis(const FDTensor& lhs,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ResultManager::SaveBenchmarkResult(const std::string& res,
|
||||||
|
const std::string& path) {
|
||||||
|
if (path.empty()) {
|
||||||
|
FDERROR << "Benchmark data path can not be empty!" << std::endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
auto openmode = std::ios::app;
|
||||||
|
std::ofstream fs(path, openmode);
|
||||||
|
if (!fs.is_open()) {
|
||||||
|
FDERROR << "Fail to open result file: " << path << std::endl;
|
||||||
|
}
|
||||||
|
fs << res;
|
||||||
|
fs.close();
|
||||||
|
}
|
||||||
|
|
||||||
#if defined(ENABLE_VISION)
|
#if defined(ENABLE_VISION)
|
||||||
bool ResultManager::SaveDetectionResult(const vision::DetectionResult& res,
|
bool ResultManager::SaveDetectionResult(const vision::DetectionResult& res,
|
||||||
const std::string& path) {
|
const std::string& path) {
|
||||||
|
@@ -138,6 +138,9 @@ struct FASTDEPLOY_DECL ResultManager {
|
|||||||
/// Calculate diff value between two FDTensor results.
|
/// Calculate diff value between two FDTensor results.
|
||||||
static TensorDiff CalculateDiffStatis(const FDTensor& lhs,
|
static TensorDiff CalculateDiffStatis(const FDTensor& lhs,
|
||||||
const FDTensor& rhs);
|
const FDTensor& rhs);
|
||||||
|
/// Save Benchmark data
|
||||||
|
static void SaveBenchmarkResult(const std::string& res,
|
||||||
|
const std::string& path);
|
||||||
#if defined(ENABLE_VISION)
|
#if defined(ENABLE_VISION)
|
||||||
/// Save & Load functions for basic results.
|
/// Save & Load functions for basic results.
|
||||||
static bool SaveDetectionResult(const vision::DetectionResult& res,
|
static bool SaveDetectionResult(const vision::DetectionResult& res,
|
||||||
|
Reference in New Issue
Block a user