mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-17 22:21:48 +08:00
[Benchmark] Add run_benchmark_cpu.sh (#1465)
* add GPL lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * add GPL-3.0 lisence * support yolov8 * add pybind for yolov8 * add yolov8 readme * add cpp benchmark * add cpu and gpu mem * public part split * add runtime mode * fixed bugs * add cpu_thread_nums * deal with comments * deal with comments * deal with comments * rm useless code * add FASTDEPLOY_DECL * add FASTDEPLOY_DECL * fixed for windows * mv rss to pss * mv rss to pss * Update utils.cc * use thread to collect mem * Add ResourceUsageMonitor * rm useless code * fixed bug * fixed typo * update ResourceUsageMonitor * fixed bug * fixed bug * add note for ResourceUsageMonitor * deal with comments * add macros * deal with comments * deal with comments * deal with comments * re-lint * rm pmap and use mem api * rm pmap and use mem api * add mem api * Add PrintBenchmarkInfo func * Add PrintBenchmarkInfo func * Add PrintBenchmarkInfo func * deal with comments * fixed enable_paddle_to_trt * add log for paddle_trt * support ppcls benchmark * use new trt option api * update benchmark info * simplify benchmark.cc * simplify benchmark.cc * deal with comments * Add ppseg && ppocr benchmark * add OCR rec img * add ocr benchmark * fixed trt shape * add trt shape * resolve conflict * add ENABLE_BENCHMARK define * Add ClassifyDiff * Add Resize for ClassifyResult * deal with comments * add convert info script * resolve conflict * Add SaveBenchmarkResult func * fixed bug * fixed bug * fixed bug * add config.txt for option * fixed bug * fixed bug * fixed bug * add benchmark.sh * mv thread_nums from 8 to 1 * deal with comments * deal with comments * fixed readme * deal with comments --------- Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
This commit is contained in:
@@ -11,7 +11,6 @@ include_directories(${FASTDEPLOY_INCS})
|
||||
add_executable(benchmark_yolov5 ${PROJECT_SOURCE_DIR}/benchmark_yolov5.cc)
|
||||
add_executable(benchmark_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_ppyolov8.cc)
|
||||
add_executable(benchmark_ppcls ${PROJECT_SOURCE_DIR}/benchmark_ppcls.cc)
|
||||
add_executable(benchmark_precision_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_precision_ppyolov8.cc)
|
||||
add_executable(benchmark_ppseg ${PROJECT_SOURCE_DIR}/benchmark_ppseg.cc)
|
||||
add_executable(benchmark_ppocr_det ${PROJECT_SOURCE_DIR}/benchmark_ppocr_det.cc)
|
||||
add_executable(benchmark_ppocr_cls ${PROJECT_SOURCE_DIR}/benchmark_ppocr_cls.cc)
|
||||
@@ -21,7 +20,6 @@ if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
|
||||
target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_ppcls ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_precision_ppyolov8 ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_ppseg ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags pthread)
|
||||
@@ -30,7 +28,6 @@ else()
|
||||
target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_ppcls ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_precision_ppyolov8 ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_ppseg ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_ppocr_det ${FASTDEPLOY_LIBS} gflags)
|
||||
target_link_libraries(benchmark_ppocr_cls ${FASTDEPLOY_LIBS} gflags)
|
||||
|
77
benchmark/cpp/README.md
Normal file → Executable file
77
benchmark/cpp/README.md
Normal file → Executable file
@@ -11,26 +11,37 @@
|
||||
|
||||
运行FastDeploy C++ Benchmark,需先准备好相应的环境,并在ENABLE_BENCHMARK=ON模式下从源码编译FastDeploy C++ SDK. 以下将按照硬件维度,来说明相应的系统环境要求。不同环境下的详细要求,请参考[FastDeploy环境要求](../../docs/cn/build_and_install)
|
||||
|
||||
## 2. Benchmark 参数设置说明
|
||||
## 2. Benchmark 设置说明
|
||||
|
||||
<div id="参数设置说明"></div>
|
||||
具体flags.h提供选项如下:
|
||||
|
||||
<div id="选项设置说明"></div>
|
||||
|
||||
| 参数 | 作用 |
|
||||
| 选项 | 作用 |
|
||||
| -------------------- | ------------------------------------------ |
|
||||
| --model | 模型路径 |
|
||||
| --image | 图片路径 |
|
||||
| --device | 选择 CPU/GPU/XPU,默认为 CPU |
|
||||
| --cpu_thread_nums | CPU 线程数,默认为 8 |
|
||||
| --device_id | GPU/XPU 卡号,默认为 0 |
|
||||
| --warmup | 跑benchmark的warmup次数,默认为 200 |
|
||||
| --repeat | 跑benchmark的循环次数,默认为 1000 |
|
||||
| --profile_mode | 指定需要测试性能的模式,可选值为`[runtime, end2end]`,默认为 runtime |
|
||||
| --include_h2d_d2h | 是否把H2D+D2H的耗时统计在内,该参数只在profile_mode为runtime时有效,默认为 false |
|
||||
| --backend | 指定后端类型,有default, ort, ov, trt, paddle, paddle_trt, lite 等,为default时,会自动选择最优后端,推荐设置为显式设置明确的backend。默认为 default |
|
||||
| --use_fp16 | 是否开启fp16,当前只对 trt, paddle-trt, lite后端有效,默认为 false |
|
||||
| --collect_memory_info | 是否记录 cpu/gpu memory信息,默认 false |
|
||||
| --sampling_interval | 记录 cpu/gpu memory信息采样时间间隔,单位ms,默认为 50 |
|
||||
| --config_path | config.txt路径,包含具体设备、后端等信息 |
|
||||
|
||||
具体config.txt包含信息含义如下:
|
||||
|
||||
<div id="参数设置说明"></div>
|
||||
|
||||
| 参数 | 作用 |
|
||||
| -------------------- | ------------------------------------------ |
|
||||
| device | 选择 CPU/GPU/XPU,默认为 CPU |
|
||||
| device_id | GPU/XPU 卡号,默认为 0 |
|
||||
| cpu_thread_nums | CPU 线程数,默认为 1 |
|
||||
| warmup | 跑benchmark的warmup次数,默认为 200 |
|
||||
| repeat | 跑benchmark的循环次数,默认为 1000 |
|
||||
| backend | 指定后端类型,有default, ort, ov, trt, paddle, paddle_trt, lite 等,为default时,会自动选择最优后端,推荐设置为显式设置明确的backend。默认为 default |
|
||||
| profile_mode | 指定需要测试性能的模式,可选值为`[runtime, end2end]`,默认为 runtime |
|
||||
| include_h2d_d2h | 是否把H2D+D2H的耗时统计在内,该参数只在profile_mode为runtime时有效,默认为 false |
|
||||
| use_fp16 | 是否开启fp16,当前只对 trt, paddle-trt, lite后端有效,默认为 false |
|
||||
| collect_memory_info | 是否记录 cpu/gpu memory信息,默认 false |
|
||||
| sampling_interval | 记录 cpu/gpu memory信息采样时间间隔,单位ms,默认为 50 |
|
||||
| precision_compare | 是否进行精度比较,默认为 false |
|
||||
| result_path | 记录 Benchmark 数据的 txt 文件路径 |
|
||||
|
||||
## 3. X86_64 CPU 和 NVIDIA GPU 环境下运行 Benchmark
|
||||
|
||||
@@ -93,41 +104,11 @@ tar -zxvf yolov8_s_500e_coco.tgz
|
||||
|
||||
```bash
|
||||
|
||||
# 统计性能
|
||||
# CPU
|
||||
# Paddle Inference
|
||||
./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device cpu --cpu_thread_nums 8 --backend paddle --profile_mode runtime
|
||||
|
||||
# ONNX Runtime
|
||||
./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device cpu --cpu_thread_nums 8 --backend ort --profile_mode runtime
|
||||
|
||||
# OpenVINO
|
||||
./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device cpu --cpu_thread_nums 8 --backend ov --profile_mode runtime
|
||||
|
||||
# GPU
|
||||
# Paddle Inference
|
||||
./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device gpu --device_id 0 --backend paddle --profile_mode runtime --warmup 200 --repeat 2000
|
||||
|
||||
# Paddle Inference + TensorRT
|
||||
./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device gpu --device_id 0 --backend paddle_trt --profile_mode runtime --warmup 200 --repeat 2000
|
||||
|
||||
# Paddle Inference + TensorRT + FP16
|
||||
./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device gpu --device_id 0 --backend paddle --profile_mode runtime --warmup 200 --repeat 2000 --use_fp16
|
||||
|
||||
# ONNX Runtime
|
||||
./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device gpu --device_id 0 --backend ort --profile_mode runtime --warmup 200 --repeat 2000
|
||||
|
||||
# TensorRT
|
||||
./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device gpu --device_id 0 --backend paddle --profile_mode runtime --warmup 200 --repeat 2000
|
||||
|
||||
# TensorRT + FP16
|
||||
./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device gpu --device_id 0 --backend trt --profile_mode runtime --warmup 200 --repeat 2000 --use_fp16
|
||||
|
||||
# 统计内存显存占用
|
||||
# 增加--collect_memory_info选项
|
||||
./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --device cpu --cpu_thread_nums 8 --backend paddle --profile_mode runtime --collect_memory_info
|
||||
# 统计性能,用户根据需求修改config.txt文件,具体含义参考上表
|
||||
# eg:如果想测paddle gpu backend,将device改为gpu,backend修改为paddle即可
|
||||
./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --config_path config.txt
|
||||
```
|
||||
注意,为避免对性能统计产生影响,测试性能时,最好不要开启内存显存统计的功能,当指定--collect_memory_info参数时,只有内存显存参数是稳定可靠的。更多参数设置,请参考[参数设置说明](#参数设置说明)
|
||||
注意,为避免对性能统计产生影响,测试性能时,最好不要开启内存显存统计的功能,当把collect_memory_info参数设置为true时,只有内存显存参数是稳定可靠的。更多参数设置,请参考[参数设置说明](#参数设置说明)
|
||||
|
||||
|
||||
## 4. ARM CPU 环境下运行 Benchmark
|
||||
|
9
benchmark/cpp/benchmark.sh
Executable file
9
benchmark/cpp/benchmark.sh
Executable file
@@ -0,0 +1,9 @@
|
||||
# Run all models specify hardware and specify backend
|
||||
./benchmark_ppseg --model PP_HumanSegV2_Lite_192x192_with_argmax_infer --image portrait_heng.jpg --config_path config.txt
|
||||
./benchmark_ppseg --model PP_HumanSegV2_Mobile_192x192_with_argmax_infer --image portrait_heng.jpg --config_path config.txt
|
||||
./benchmark_ppcls --model MobileNetV2_ssld_infer --image ILSVRC2012_val_00000010.jpeg --config_path config.txt
|
||||
./benchmark_ppocr_det --model ch_PP-OCRv3_det_infer --image 12.jpg --config_path config.txt
|
||||
./benchmark_ppocr_cls --model ch_ppocr_mobile_v2.0_cls_infer --image rec_img.jpg --config_path config.txt
|
||||
./benchmark_ppocr_rec --model ch_PP-OCRv3_rec_infer --image rec_img.jpg --rec_label_file ppocr_keys_v1.txt --config_path config.txt
|
||||
./benchmark_ppyolov8 --model yolov8_s_500e_coco --image 000000014439.jpg --config_path config.txt
|
||||
./benchmark_yolov5 --model yolov5s.onnx --image 000000014439.jpg --config_path config.txt
|
@@ -27,8 +27,11 @@ int main(int argc, char* argv[]) {
|
||||
return -1;
|
||||
}
|
||||
auto im = cv::imread(FLAGS_image);
|
||||
std::unordered_map<std::string, std::string> config_info;
|
||||
benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path,
|
||||
&config_info);
|
||||
// Set max_batch_size 1 for best performance
|
||||
if (FLAGS_backend == "paddle_trt") {
|
||||
if (config_info["backend"] == "paddle_trt") {
|
||||
option.trt_option.max_batch_size = 1;
|
||||
}
|
||||
auto model_file = FLAGS_model + sep + "inference.pdmodel";
|
||||
@@ -37,6 +40,7 @@ int main(int argc, char* argv[]) {
|
||||
auto model_ppcls = vision::classification::PaddleClasModel(
|
||||
model_file, params_file, config_file, option);
|
||||
vision::ClassifyResult res;
|
||||
if (config_info["precision_compare"] == "true") {
|
||||
// Run once at least
|
||||
model_ppcls.Predict(im, &res);
|
||||
// 1. Test result diff
|
||||
@@ -56,6 +60,7 @@ int main(int argc, char* argv[]) {
|
||||
std::cout << "Scores diff: mean=" << cls_diff.scores.mean
|
||||
<< ", max=" << cls_diff.scores.max
|
||||
<< ", min=" << cls_diff.scores.min << std::endl;
|
||||
}
|
||||
BENCHMARK_MODEL(model_ppcls, model_ppcls.Predict(im, &res))
|
||||
#endif
|
||||
return 0;
|
||||
|
27
benchmark/cpp/benchmark_ppocr_cls.cc
Executable file → Normal file
27
benchmark/cpp/benchmark_ppocr_cls.cc
Executable file → Normal file
@@ -16,6 +16,13 @@
|
||||
#include "macros.h"
|
||||
#include "option.h"
|
||||
|
||||
namespace vision = fastdeploy::vision;
|
||||
namespace benchmark = fastdeploy::benchmark;
|
||||
|
||||
DEFINE_string(trt_shape, "1,3,48,10:4,3,48,320:8,3,48,1024",
|
||||
"Set min/opt/max shape for trt/paddle_trt backend."
|
||||
"eg:--trt_shape 1,3,48,10:4,3,48,320:8,3,48,1024");
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
|
||||
// Initialization
|
||||
@@ -24,20 +31,27 @@ int main(int argc, char* argv[]) {
|
||||
return -1;
|
||||
}
|
||||
auto im = cv::imread(FLAGS_image);
|
||||
std::unordered_map<std::string, std::string> config_info;
|
||||
benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path,
|
||||
&config_info);
|
||||
// Classification Model
|
||||
auto cls_model_file = FLAGS_model + sep + "inference.pdmodel";
|
||||
auto cls_params_file = FLAGS_model + sep + "inference.pdiparams";
|
||||
if (FLAGS_backend == "paddle_trt") {
|
||||
if (config_info["backend"] == "paddle_trt") {
|
||||
option.paddle_infer_option.collect_trt_shape = true;
|
||||
}
|
||||
if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
|
||||
option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320},
|
||||
{8, 3, 48, 1024});
|
||||
if (config_info["backend"] == "paddle_trt" ||
|
||||
config_info["backend"] == "trt") {
|
||||
std::vector<std::vector<int32_t>> trt_shapes =
|
||||
benchmark::ResultManager::GetInputShapes(FLAGS_trt_shape);
|
||||
option.trt_option.SetShape("x", trt_shapes[0], trt_shapes[1],
|
||||
trt_shapes[2]);
|
||||
}
|
||||
auto model_ppocr_cls = fastdeploy::vision::ocr::Classifier(
|
||||
cls_model_file, cls_params_file, option);
|
||||
auto model_ppocr_cls =
|
||||
vision::ocr::Classifier(cls_model_file, cls_params_file, option);
|
||||
int32_t res_label;
|
||||
float res_score;
|
||||
if (config_info["precision_compare"] == "true") {
|
||||
// Run once at least
|
||||
model_ppocr_cls.Predict(im, &res_label, &res_score);
|
||||
// 1. Test result diff
|
||||
@@ -50,6 +64,7 @@ int main(int argc, char* argv[]) {
|
||||
std::cout << "PPOCR Cls label diff: " << ppocr_cls_label_diff << std::endl;
|
||||
std::cout << "PPOCR Cls score diff: " << abs(ppocr_cls_score_diff)
|
||||
<< std::endl;
|
||||
}
|
||||
BENCHMARK_MODEL(model_ppocr_cls,
|
||||
model_ppocr_cls.Predict(im, &res_label, &res_score));
|
||||
#endif
|
||||
|
20
benchmark/cpp/benchmark_ppocr_det.cc
Executable file → Normal file
20
benchmark/cpp/benchmark_ppocr_det.cc
Executable file → Normal file
@@ -19,6 +19,10 @@
|
||||
namespace vision = fastdeploy::vision;
|
||||
namespace benchmark = fastdeploy::benchmark;
|
||||
|
||||
DEFINE_string(trt_shape, "1,3,64,64:1,3,640,640:1,3,960,960",
|
||||
"Set min/opt/max shape for trt/paddle_trt backend."
|
||||
"eg:--trt_shape 1,3,64,64:1,3,640,640:1,3,960,960");
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
|
||||
// Initialization
|
||||
@@ -27,19 +31,26 @@ int main(int argc, char* argv[]) {
|
||||
return -1;
|
||||
}
|
||||
auto im = cv::imread(FLAGS_image);
|
||||
std::unordered_map<std::string, std::string> config_info;
|
||||
benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path,
|
||||
&config_info);
|
||||
// Detection Model
|
||||
auto det_model_file = FLAGS_model + sep + "inference.pdmodel";
|
||||
auto det_params_file = FLAGS_model + sep + "inference.pdiparams";
|
||||
if (FLAGS_backend == "paddle_trt") {
|
||||
if (config_info["backend"] == "paddle_trt") {
|
||||
option.paddle_infer_option.collect_trt_shape = true;
|
||||
}
|
||||
if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
|
||||
option.trt_option.SetShape("x", {1, 3, 64, 64}, {1, 3, 640, 640},
|
||||
{1, 3, 960, 960});
|
||||
if (config_info["backend"] == "paddle_trt" ||
|
||||
config_info["backend"] == "trt") {
|
||||
std::vector<std::vector<int32_t>> trt_shapes =
|
||||
benchmark::ResultManager::GetInputShapes(FLAGS_trt_shape);
|
||||
option.trt_option.SetShape("x", trt_shapes[0], trt_shapes[1],
|
||||
trt_shapes[2]);
|
||||
}
|
||||
auto model_ppocr_det =
|
||||
vision::ocr::DBDetector(det_model_file, det_params_file, option);
|
||||
std::vector<std::array<int, 8>> res;
|
||||
if (config_info["precision_compare"] == "true") {
|
||||
// Run once at least
|
||||
model_ppocr_det.Predict(im, &res);
|
||||
// 1. Test result diff
|
||||
@@ -57,6 +68,7 @@ int main(int argc, char* argv[]) {
|
||||
std::cout << "PPOCR Boxes diff: mean=" << ppocr_det_diff.boxes.mean
|
||||
<< ", max=" << ppocr_det_diff.boxes.max
|
||||
<< ", min=" << ppocr_det_diff.boxes.min << std::endl;
|
||||
}
|
||||
BENCHMARK_MODEL(model_ppocr_det, model_ppocr_det.Predict(im, &res));
|
||||
#endif
|
||||
return 0;
|
||||
|
24
benchmark/cpp/benchmark_ppocr_rec.cc
Executable file → Normal file
24
benchmark/cpp/benchmark_ppocr_rec.cc
Executable file → Normal file
@@ -16,7 +16,13 @@
|
||||
#include "macros.h"
|
||||
#include "option.h"
|
||||
|
||||
namespace vision = fastdeploy::vision;
|
||||
namespace benchmark = fastdeploy::benchmark;
|
||||
|
||||
DEFINE_string(rec_label_file, "", "Path of Recognization label file of PPOCR.");
|
||||
DEFINE_string(trt_shape, "1,3,48,10:4,3,48,320:8,3,48,2304",
|
||||
"Set min/opt/max shape for trt/paddle_trt backend."
|
||||
"eg:--trt_shape 1,3,48,10:4,3,48,320:8,3,48,2304");
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
|
||||
@@ -26,20 +32,27 @@ int main(int argc, char* argv[]) {
|
||||
return -1;
|
||||
}
|
||||
auto im = cv::imread(FLAGS_image);
|
||||
std::unordered_map<std::string, std::string> config_info;
|
||||
benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path,
|
||||
&config_info);
|
||||
// Recognition Model
|
||||
auto rec_model_file = FLAGS_model + sep + "inference.pdmodel";
|
||||
auto rec_params_file = FLAGS_model + sep + "inference.pdiparams";
|
||||
if (FLAGS_backend == "paddle_trt") {
|
||||
if (config_info["backend"] == "paddle_trt") {
|
||||
option.paddle_infer_option.collect_trt_shape = true;
|
||||
}
|
||||
if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
|
||||
option.trt_option.SetShape("x", {1, 3, 48, 10}, {4, 3, 48, 320},
|
||||
{8, 3, 48, 2304});
|
||||
if (config_info["backend"] == "paddle_trt" ||
|
||||
config_info["backend"] == "trt") {
|
||||
std::vector<std::vector<int32_t>> trt_shapes =
|
||||
benchmark::ResultManager::GetInputShapes(FLAGS_trt_shape);
|
||||
option.trt_option.SetShape("x", trt_shapes[0], trt_shapes[1],
|
||||
trt_shapes[2]);
|
||||
}
|
||||
auto model_ppocr_rec = fastdeploy::vision::ocr::Recognizer(
|
||||
auto model_ppocr_rec = vision::ocr::Recognizer(
|
||||
rec_model_file, rec_params_file, FLAGS_rec_label_file, option);
|
||||
std::string text;
|
||||
float rec_score;
|
||||
if (config_info["precision_compare"] == "true") {
|
||||
// Run once at least
|
||||
model_ppocr_rec.Predict(im, &text, &rec_score);
|
||||
// 1. Test result diff
|
||||
@@ -52,6 +65,7 @@ int main(int argc, char* argv[]) {
|
||||
std::cout << "PPOCR Rec text diff: " << ppocr_rec_text_diff << std::endl;
|
||||
std::cout << "PPOCR Rec score diff: " << abs(ppocr_rec_score_diff)
|
||||
<< std::endl;
|
||||
}
|
||||
BENCHMARK_MODEL(model_ppocr_rec,
|
||||
model_ppocr_rec.Predict(im, &text, &rec_score));
|
||||
#endif
|
||||
|
20
benchmark/cpp/benchmark_ppseg.cc
Executable file → Normal file
20
benchmark/cpp/benchmark_ppseg.cc
Executable file → Normal file
@@ -19,6 +19,10 @@
|
||||
namespace vision = fastdeploy::vision;
|
||||
namespace benchmark = fastdeploy::benchmark;
|
||||
|
||||
DEFINE_string(trt_shape, "1,3,192,192:1,3,192,192:1,3,192,192",
|
||||
"Set min/opt/max shape for trt/paddle_trt backend."
|
||||
"eg:--trt_shape 1,3,192,192:1,3,192,192:1,3,192,192");
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
|
||||
// Initialization
|
||||
@@ -27,19 +31,26 @@ int main(int argc, char* argv[]) {
|
||||
return -1;
|
||||
}
|
||||
auto im = cv::imread(FLAGS_image);
|
||||
std::unordered_map<std::string, std::string> config_info;
|
||||
benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path,
|
||||
&config_info);
|
||||
auto model_file = FLAGS_model + sep + "model.pdmodel";
|
||||
auto params_file = FLAGS_model + sep + "model.pdiparams";
|
||||
auto config_file = FLAGS_model + sep + "deploy.yaml";
|
||||
if (FLAGS_backend == "paddle_trt") {
|
||||
if (config_info["backend"] == "paddle_trt") {
|
||||
option.paddle_infer_option.collect_trt_shape = true;
|
||||
}
|
||||
if (FLAGS_backend == "paddle_trt" || FLAGS_backend == "trt") {
|
||||
option.trt_option.SetShape("x", {1, 3, 192, 192}, {1, 3, 192, 192},
|
||||
{1, 3, 192, 192});
|
||||
if (config_info["backend"] == "paddle_trt" ||
|
||||
config_info["backend"] == "trt") {
|
||||
std::vector<std::vector<int32_t>> trt_shapes =
|
||||
benchmark::ResultManager::GetInputShapes(FLAGS_trt_shape);
|
||||
option.trt_option.SetShape("x", trt_shapes[0], trt_shapes[1],
|
||||
trt_shapes[2]);
|
||||
}
|
||||
auto model_ppseg = vision::segmentation::PaddleSegModel(
|
||||
model_file, params_file, config_file, option);
|
||||
vision::SegmentationResult res;
|
||||
if (config_info["precision_compare"] == "true") {
|
||||
// Run once at least
|
||||
model_ppseg.Predict(im, &res);
|
||||
// 1. Test result diff
|
||||
@@ -62,6 +73,7 @@ int main(int argc, char* argv[]) {
|
||||
<< ", max=" << seg_diff.scores.max
|
||||
<< ", min=" << seg_diff.scores.min << std::endl;
|
||||
}
|
||||
}
|
||||
BENCHMARK_MODEL(model_ppseg, model_ppseg.Predict(im, &res))
|
||||
auto vis_im = vision::VisSegmentation(im, res, 0.5);
|
||||
cv::imwrite("vis_result.jpg", vis_im);
|
||||
|
70
benchmark/cpp/benchmark_ppyolov8.cc
Executable file → Normal file
70
benchmark/cpp/benchmark_ppyolov8.cc
Executable file → Normal file
@@ -16,6 +16,11 @@
|
||||
#include "macros.h"
|
||||
#include "option.h"
|
||||
|
||||
namespace vision = fastdeploy::vision;
|
||||
namespace benchmark = fastdeploy::benchmark;
|
||||
|
||||
DEFINE_bool(no_nms, false, "Whether the model contains nms.");
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
|
||||
// Initialization
|
||||
@@ -24,16 +29,73 @@ int main(int argc, char* argv[]) {
|
||||
return -1;
|
||||
}
|
||||
auto im = cv::imread(FLAGS_image);
|
||||
std::unordered_map<std::string, std::string> config_info;
|
||||
benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path,
|
||||
&config_info);
|
||||
auto model_file = FLAGS_model + sep + "model.pdmodel";
|
||||
auto params_file = FLAGS_model + sep + "model.pdiparams";
|
||||
auto config_file = FLAGS_model + sep + "infer_cfg.yml";
|
||||
auto model_ppyolov8 = fastdeploy::vision::detection::PaddleYOLOv8(
|
||||
model_file, params_file, config_file, option);
|
||||
fastdeploy::vision::DetectionResult res;
|
||||
auto model_ppyolov8 = vision::detection::PaddleYOLOv8(model_file, params_file,
|
||||
config_file, option);
|
||||
vision::DetectionResult res;
|
||||
if (config_info["precision_compare"] == "true") {
|
||||
// Run once at least
|
||||
model_ppyolov8.Predict(im, &res);
|
||||
// 1. Test result diff
|
||||
std::cout << "=============== Test result diff =================\n";
|
||||
// Save result to -> disk.
|
||||
std::string det_result_path = "ppyolov8_result.txt";
|
||||
benchmark::ResultManager::SaveDetectionResult(res, det_result_path);
|
||||
// Load result from <- disk.
|
||||
vision::DetectionResult res_loaded;
|
||||
benchmark::ResultManager::LoadDetectionResult(&res_loaded, det_result_path);
|
||||
// Calculate diff between two results.
|
||||
auto det_diff =
|
||||
benchmark::ResultManager::CalculateDiffStatis(res, res_loaded);
|
||||
std::cout << "Boxes diff: mean=" << det_diff.boxes.mean
|
||||
<< ", max=" << det_diff.boxes.max
|
||||
<< ", min=" << det_diff.boxes.min << std::endl;
|
||||
std::cout << "Label_ids diff: mean=" << det_diff.labels.mean
|
||||
<< ", max=" << det_diff.labels.max
|
||||
<< ", min=" << det_diff.labels.min << std::endl;
|
||||
// 2. Test tensor diff
|
||||
std::cout << "=============== Test tensor diff =================\n";
|
||||
std::vector<vision::DetectionResult> batch_res;
|
||||
std::vector<fastdeploy::FDTensor> input_tensors, output_tensors;
|
||||
std::vector<cv::Mat> imgs;
|
||||
imgs.push_back(im);
|
||||
std::vector<vision::FDMat> fd_images = vision::WrapMat(imgs);
|
||||
|
||||
model_ppyolov8.GetPreprocessor().Run(&fd_images, &input_tensors);
|
||||
input_tensors[0].name = "image";
|
||||
input_tensors[1].name = "scale_factor";
|
||||
input_tensors[2].name = "im_shape";
|
||||
input_tensors.pop_back();
|
||||
model_ppyolov8.Infer(input_tensors, &output_tensors);
|
||||
model_ppyolov8.GetPostprocessor().Run(output_tensors, &batch_res);
|
||||
// Save tensor to -> disk.
|
||||
auto& tensor_dump = output_tensors[0];
|
||||
std::string det_tensor_path = "ppyolov8_tensor.txt";
|
||||
benchmark::ResultManager::SaveFDTensor(tensor_dump, det_tensor_path);
|
||||
// Load tensor from <- disk.
|
||||
fastdeploy::FDTensor tensor_loaded;
|
||||
benchmark::ResultManager::LoadFDTensor(&tensor_loaded, det_tensor_path);
|
||||
// Calculate diff between two tensors.
|
||||
auto det_tensor_diff = benchmark::ResultManager::CalculateDiffStatis(
|
||||
tensor_dump, tensor_loaded);
|
||||
std::cout << "Tensor diff: mean=" << det_tensor_diff.data.mean
|
||||
<< ", max=" << det_tensor_diff.data.max
|
||||
<< ", min=" << det_tensor_diff.data.min << std::endl;
|
||||
}
|
||||
// Run profiling
|
||||
if (FLAGS_no_nms) {
|
||||
model_ppyolov8.GetPostprocessor().ApplyDecodeAndNMS();
|
||||
}
|
||||
BENCHMARK_MODEL(model_ppyolov8, model_ppyolov8.Predict(im, &res))
|
||||
auto vis_im = fastdeploy::vision::VisDetection(im, res);
|
||||
auto vis_im = vision::VisDetection(im, res);
|
||||
cv::imwrite("vis_result.jpg", vis_im);
|
||||
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
@@ -1,91 +0,0 @@
|
||||
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "flags.h"
|
||||
#include "macros.h"
|
||||
#include "option.h"
|
||||
|
||||
namespace vision = fastdeploy::vision;
|
||||
namespace benchmark = fastdeploy::benchmark;
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
|
||||
// Initialization
|
||||
auto option = fastdeploy::RuntimeOption();
|
||||
if (!CreateRuntimeOption(&option, argc, argv, true)) {
|
||||
return -1;
|
||||
}
|
||||
auto im = cv::imread(FLAGS_image);
|
||||
auto model_file = FLAGS_model + sep + "model.pdmodel";
|
||||
auto params_file = FLAGS_model + sep + "model.pdiparams";
|
||||
auto config_file = FLAGS_model + sep + "infer_cfg.yml";
|
||||
auto model_ppyolov8 = vision::detection::PaddleYOLOv8(model_file, params_file,
|
||||
config_file, option);
|
||||
vision::DetectionResult res;
|
||||
// Run once at least
|
||||
model_ppyolov8.Predict(im, &res);
|
||||
// 1. Test result diff
|
||||
std::cout << "=============== Test result diff =================\n";
|
||||
// Save result to -> disk.
|
||||
std::string det_result_path = "ppyolov8_result.txt";
|
||||
benchmark::ResultManager::SaveDetectionResult(res, det_result_path);
|
||||
// Load result from <- disk.
|
||||
vision::DetectionResult res_loaded;
|
||||
benchmark::ResultManager::LoadDetectionResult(&res_loaded, det_result_path);
|
||||
// Calculate diff between two results.
|
||||
auto det_diff =
|
||||
benchmark::ResultManager::CalculateDiffStatis(res, res_loaded);
|
||||
std::cout << "Boxes diff: mean=" << det_diff.boxes.mean
|
||||
<< ", max=" << det_diff.boxes.max << ", min=" << det_diff.boxes.min
|
||||
<< std::endl;
|
||||
std::cout << "Label_ids diff: mean=" << det_diff.labels.mean
|
||||
<< ", max=" << det_diff.labels.max
|
||||
<< ", min=" << det_diff.labels.min << std::endl;
|
||||
// 2. Test tensor diff
|
||||
std::cout << "=============== Test tensor diff =================\n";
|
||||
std::vector<vision::DetectionResult> batch_res;
|
||||
std::vector<fastdeploy::FDTensor> input_tensors, output_tensors;
|
||||
std::vector<cv::Mat> imgs;
|
||||
imgs.push_back(im);
|
||||
std::vector<vision::FDMat> fd_images = vision::WrapMat(imgs);
|
||||
|
||||
model_ppyolov8.GetPreprocessor().Run(&fd_images, &input_tensors);
|
||||
input_tensors[0].name = "image";
|
||||
input_tensors[1].name = "scale_factor";
|
||||
input_tensors[2].name = "im_shape";
|
||||
input_tensors.pop_back();
|
||||
model_ppyolov8.Infer(input_tensors, &output_tensors);
|
||||
model_ppyolov8.GetPostprocessor().Run(output_tensors, &batch_res);
|
||||
// Save tensor to -> disk.
|
||||
auto& tensor_dump = output_tensors[0];
|
||||
std::string det_tensor_path = "ppyolov8_tensor.txt";
|
||||
benchmark::ResultManager::SaveFDTensor(tensor_dump, det_tensor_path);
|
||||
// Load tensor from <- disk.
|
||||
fastdeploy::FDTensor tensor_loaded;
|
||||
benchmark::ResultManager::LoadFDTensor(&tensor_loaded, det_tensor_path);
|
||||
// Calculate diff between two tensors.
|
||||
auto det_tensor_diff =
|
||||
benchmark::ResultManager::CalculateDiffStatis(tensor_dump, tensor_loaded);
|
||||
std::cout << "Tensor diff: mean=" << det_tensor_diff.data.mean
|
||||
<< ", max=" << det_tensor_diff.data.max
|
||||
<< ", min=" << det_tensor_diff.data.min << std::endl;
|
||||
// 3. Run profiling
|
||||
BENCHMARK_MODEL(model_ppyolov8, model_ppyolov8.Predict(im, &res))
|
||||
auto vis_im = vision::VisDetection(im, res);
|
||||
cv::imwrite("vis_result.jpg", vis_im);
|
||||
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
@@ -27,8 +27,12 @@ int main(int argc, char* argv[]) {
|
||||
return -1;
|
||||
}
|
||||
auto im = cv::imread(FLAGS_image);
|
||||
std::unordered_map<std::string, std::string> config_info;
|
||||
benchmark::ResultManager::LoadBenchmarkConfig(FLAGS_config_path,
|
||||
&config_info);
|
||||
auto model_yolov5 = vision::detection::YOLOv5(FLAGS_model, "", option);
|
||||
vision::DetectionResult res;
|
||||
if (config_info["precision_compare"] == "true") {
|
||||
// Run once at least
|
||||
model_yolov5.Predict(im, &res);
|
||||
// 1. Test result diff
|
||||
@@ -43,11 +47,12 @@ int main(int argc, char* argv[]) {
|
||||
auto det_diff =
|
||||
benchmark::ResultManager::CalculateDiffStatis(res, res_loaded);
|
||||
std::cout << "Boxes diff: mean=" << det_diff.boxes.mean
|
||||
<< ", max=" << det_diff.boxes.max << ", min=" << det_diff.boxes.min
|
||||
<< std::endl;
|
||||
<< ", max=" << det_diff.boxes.max
|
||||
<< ", min=" << det_diff.boxes.min << std::endl;
|
||||
std::cout << "Label_ids diff: mean=" << det_diff.labels.mean
|
||||
<< ", max=" << det_diff.labels.max
|
||||
<< ", min=" << det_diff.labels.min << std::endl;
|
||||
}
|
||||
BENCHMARK_MODEL(model_yolov5, model_yolov5.Predict(im, &res))
|
||||
auto vis_im = vision::VisDetection(im, res);
|
||||
cv::imwrite("vis_result.jpg", vis_im);
|
||||
|
13
benchmark/cpp/config.txt
Executable file
13
benchmark/cpp/config.txt
Executable file
@@ -0,0 +1,13 @@
|
||||
device: cpu
|
||||
device_id: 0
|
||||
cpu_thread_nums: 1
|
||||
warmup: 200
|
||||
repeat: 1000
|
||||
backend: default
|
||||
profile_mode: runtime
|
||||
include_h2d_d2h: true
|
||||
use_fp16: false
|
||||
collect_memory_info: false
|
||||
sampling_interval: 1
|
||||
precision_compare: false
|
||||
result_path: benchmark_cpu.txt
|
@@ -14,6 +14,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
#include "gflags/gflags.h"
|
||||
#include "fastdeploy/benchmark/utils.h"
|
||||
|
||||
@@ -25,39 +26,19 @@ static const char sep = '/';
|
||||
|
||||
DEFINE_string(model, "", "Directory of the inference model.");
|
||||
DEFINE_string(image, "", "Path of the image file.");
|
||||
DEFINE_string(device, "cpu",
|
||||
"Type of inference device, support 'cpu/gpu/xpu'.");
|
||||
DEFINE_int32(device_id, 0, "device(gpu/xpu/...) id.");
|
||||
DEFINE_int32(warmup, 200, "Number of warmup for profiling.");
|
||||
DEFINE_int32(repeat, 1000, "Number of repeats for profiling.");
|
||||
DEFINE_string(profile_mode, "runtime", "runtime or end2end.");
|
||||
DEFINE_string(backend, "default",
|
||||
"The inference runtime backend, support: ['default', 'ort', "
|
||||
"'paddle', 'ov', 'trt', 'paddle_trt', 'lite']");
|
||||
DEFINE_int32(cpu_thread_nums, 8, "Set numbers of cpu thread.");
|
||||
DEFINE_bool(
|
||||
include_h2d_d2h, false, "Whether run profiling with h2d and d2h.");
|
||||
DEFINE_bool(
|
||||
use_fp16, false,
|
||||
"Whether to use FP16 mode, only support 'trt', 'paddle_trt' "
|
||||
"and 'lite' backend");
|
||||
DEFINE_bool(
|
||||
collect_memory_info, false, "Whether to collect memory info");
|
||||
DEFINE_int32(sampling_interval, 50, "How often to collect memory info(ms).");
|
||||
DEFINE_string(result_path, "benchmark.txt", "Path of benchmark result file.");
|
||||
DEFINE_string(config_path, "config.txt", "Path of benchmark config.");
|
||||
|
||||
static void PrintUsage() {
|
||||
std::cout << "Usage: infer_demo --model model_path --image img_path --device "
|
||||
"[cpu|gpu|xpu] --backend "
|
||||
"[default|ort|paddle|ov|trt|paddle_trt|lite] "
|
||||
"--use_fp16 false"
|
||||
std::cout << "Usage: infer_demo --model model_path --image img_path "
|
||||
"--config_path config.txt[Path of benchmark config.] "
|
||||
<< std::endl;
|
||||
std::cout << "Default value of device: cpu" << std::endl;
|
||||
std::cout << "Default value of backend: default" << std::endl;
|
||||
std::cout << "Default value of use_fp16: false" << std::endl;
|
||||
}
|
||||
|
||||
static void PrintBenchmarkInfo() {
|
||||
static void PrintBenchmarkInfo(std::unordered_map<std::string,
|
||||
std::string> config_info) {
|
||||
#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
|
||||
// Get model name
|
||||
std::vector<std::string> model_names;
|
||||
@@ -71,31 +52,32 @@ static void PrintBenchmarkInfo() {
|
||||
ss.precision(3);
|
||||
ss << "\n======= Model Info =======\n";
|
||||
ss << "model_name: " << model_names[model_names.size() - 1] << std::endl;
|
||||
ss << "profile_mode: " << FLAGS_profile_mode << std::endl;
|
||||
if (FLAGS_profile_mode == "runtime") {
|
||||
ss << "include_h2d_d2h: " << FLAGS_include_h2d_d2h << std::endl;
|
||||
ss << "profile_mode: " << config_info["profile_mode"] << std::endl;
|
||||
if (config_info["profile_mode"] == "runtime") {
|
||||
ss << "include_h2d_d2h: " << config_info["include_h2d_d2h"] << std::endl;
|
||||
}
|
||||
ss << "\n======= Backend Info =======\n";
|
||||
ss << "warmup: " << FLAGS_warmup << std::endl;
|
||||
ss << "repeats: " << FLAGS_repeat << std::endl;
|
||||
ss << "device: " << FLAGS_device << std::endl;
|
||||
if (FLAGS_device == "gpu") {
|
||||
ss << "device_id: " << FLAGS_device_id << std::endl;
|
||||
ss << "warmup: " << config_info["warmup"] << std::endl;
|
||||
ss << "repeats: " << config_info["repeat"] << std::endl;
|
||||
ss << "device: " << config_info["device"] << std::endl;
|
||||
if (config_info["device"] == "gpu") {
|
||||
ss << "device_id: " << config_info["device_id"] << std::endl;
|
||||
ss << "use_fp16: " << config_info["use_fp16"] << std::endl;
|
||||
}
|
||||
ss << "backend: " << FLAGS_backend << std::endl;
|
||||
if (FLAGS_device == "cpu") {
|
||||
ss << "cpu_thread_nums: " << FLAGS_cpu_thread_nums << std::endl;
|
||||
ss << "backend: " << config_info["backend"] << std::endl;
|
||||
if (config_info["device"] == "cpu") {
|
||||
ss << "cpu_thread_nums: " << config_info["cpu_thread_nums"] << std::endl;
|
||||
}
|
||||
ss << "use_fp16: " << FLAGS_use_fp16 << std::endl;
|
||||
ss << "collect_memory_info: " << FLAGS_collect_memory_info << std::endl;
|
||||
if (FLAGS_collect_memory_info) {
|
||||
ss << "sampling_interval: " << std::to_string(FLAGS_sampling_interval)
|
||||
ss << "collect_memory_info: "
|
||||
<< config_info["collect_memory_info"] << std::endl;
|
||||
if (config_info["collect_memory_info"] == "true") {
|
||||
ss << "sampling_interval: " << config_info["sampling_interval"]
|
||||
<< "ms" << std::endl;
|
||||
}
|
||||
std::cout << ss.str() << std::endl;
|
||||
// Save benchmark info
|
||||
fastdeploy::benchmark::ResultManager::SaveBenchmarkResult(ss.str(),
|
||||
FLAGS_result_path);
|
||||
config_info["result_path"]);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
@@ -22,15 +22,18 @@
|
||||
std::cerr << "Failed to initialize." << std::endl; \
|
||||
return 0; \
|
||||
} \
|
||||
auto __im__ = cv::imread(FLAGS_image); \
|
||||
std::unordered_map<std::string, std::string> __config_info__; \
|
||||
fastdeploy::benchmark::ResultManager::LoadBenchmarkConfig( \
|
||||
FLAGS_config_path, &__config_info__); \
|
||||
std::stringstream __ss__; \
|
||||
__ss__.precision(6); \
|
||||
fastdeploy::benchmark::ResourceUsageMonitor __resource_moniter__( \
|
||||
FLAGS_sampling_interval, FLAGS_device_id); \
|
||||
if (FLAGS_collect_memory_info) { \
|
||||
std::stoi(__config_info__["sampling_interval"]), \
|
||||
std::stoi(__config_info__["device_id"])); \
|
||||
if (__config_info__["collect_memory_info"] == "true") { \
|
||||
__resource_moniter__.Start(); \
|
||||
} \
|
||||
if (FLAGS_profile_mode == "runtime") { \
|
||||
if (__config_info__["profile_mode"] == "runtime") { \
|
||||
if (!BENCHMARK_FUNC) { \
|
||||
std::cerr << "Failed to predict." << std::endl; \
|
||||
return 0; \
|
||||
@@ -39,29 +42,35 @@
|
||||
std::cout << "Runtime(ms): " << __profile_time__ << "ms." << std::endl; \
|
||||
__ss__ << "Runtime(ms): " << __profile_time__ << "ms." << std::endl; \
|
||||
} else { \
|
||||
std::cout << "Warmup " << FLAGS_warmup << " times..." << std::endl; \
|
||||
for (int __i__ = 0; __i__ < FLAGS_warmup; __i__++) { \
|
||||
std::cout << "Warmup " \
|
||||
<< __config_info__["warmup"] \
|
||||
<< " times..." << std::endl; \
|
||||
int __warmup__ = std::stoi(__config_info__["warmup"]); \
|
||||
for (int __i__ = 0; __i__ < __warmup__; __i__++) { \
|
||||
if (!BENCHMARK_FUNC) { \
|
||||
std::cerr << "Failed to predict." << std::endl; \
|
||||
return 0; \
|
||||
} \
|
||||
} \
|
||||
std::cout << "Counting time..." << std::endl; \
|
||||
std::cout << "Repeat " << FLAGS_repeat << " times..." << std::endl; \
|
||||
std::cout << "Repeat " \
|
||||
<< __config_info__["repeat"] \
|
||||
<< " times..." << std::endl; \
|
||||
fastdeploy::TimeCounter __tc__; \
|
||||
__tc__.Start(); \
|
||||
for (int __i__ = 0; __i__ < FLAGS_repeat; __i__++) { \
|
||||
int __repeat__ = std::stoi(__config_info__["repeat"]); \
|
||||
for (int __i__ = 0; __i__ < __repeat__; __i__++) { \
|
||||
if (!BENCHMARK_FUNC) { \
|
||||
std::cerr << "Failed to predict." << std::endl; \
|
||||
return 0; \
|
||||
} \
|
||||
} \
|
||||
__tc__.End(); \
|
||||
double __end2end__ = __tc__.Duration() / FLAGS_repeat * 1000; \
|
||||
double __end2end__ = __tc__.Duration() / __repeat__ * 1000; \
|
||||
std::cout << "End2End(ms): " << __end2end__ << "ms." << std::endl; \
|
||||
__ss__ << "End2End(ms): " << __end2end__ << "ms." << std::endl; \
|
||||
} \
|
||||
if (FLAGS_collect_memory_info) { \
|
||||
if (__config_info__["collect_memory_info"] == "true") { \
|
||||
float __cpu_mem__ = __resource_moniter__.GetMaxCpuMem(); \
|
||||
float __gpu_mem__ = __resource_moniter__.GetMaxGpuMem(); \
|
||||
float __gpu_util__ = __resource_moniter__.GetMaxGpuUtil(); \
|
||||
@@ -74,5 +83,5 @@
|
||||
__resource_moniter__.Stop(); \
|
||||
} \
|
||||
fastdeploy::benchmark::ResultManager::SaveBenchmarkResult(__ss__.str(), \
|
||||
FLAGS_result_path); \
|
||||
__config_info__["result_path"]); \
|
||||
}
|
||||
|
@@ -19,81 +19,89 @@
|
||||
static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option,
|
||||
int argc, char* argv[], bool remove_flags) {
|
||||
google::ParseCommandLineFlags(&argc, &argv, remove_flags);
|
||||
if (FLAGS_profile_mode == "runtime") {
|
||||
option->EnableProfiling(FLAGS_include_h2d_d2h, FLAGS_repeat, FLAGS_warmup);
|
||||
option->DisableValidBackendCheck();
|
||||
std::unordered_map<std::string, std::string> config_info;
|
||||
fastdeploy::benchmark::ResultManager::LoadBenchmarkConfig(
|
||||
FLAGS_config_path, &config_info);
|
||||
if (config_info["profile_mode"] == "runtime") {
|
||||
option->EnableProfiling(config_info["include_h2d_d2h"] == "true",
|
||||
std::stoi(config_info["repeat"]),
|
||||
std::stoi(config_info["warmup"]));
|
||||
}
|
||||
if (FLAGS_device == "gpu") {
|
||||
option->UseGpu(FLAGS_device_id);
|
||||
if (FLAGS_backend == "ort") {
|
||||
if (config_info["device"] == "gpu") {
|
||||
option->UseGpu(std::stoi(config_info["device_id"]));
|
||||
if (config_info["backend"] == "ort") {
|
||||
option->UseOrtBackend();
|
||||
} else if (FLAGS_backend == "paddle") {
|
||||
} else if (config_info["backend"] == "paddle") {
|
||||
option->UsePaddleInferBackend();
|
||||
} else if (FLAGS_backend == "trt" || FLAGS_backend == "paddle_trt") {
|
||||
} else if (config_info["backend"] == "trt" ||
|
||||
config_info["backend"] == "paddle_trt") {
|
||||
option->UseTrtBackend();
|
||||
if (FLAGS_backend == "paddle_trt") {
|
||||
if (config_info["backend"] == "paddle_trt") {
|
||||
option->UsePaddleInferBackend();
|
||||
option->paddle_infer_option.enable_trt = true;
|
||||
}
|
||||
if (FLAGS_use_fp16) {
|
||||
if (config_info["use_fp16"] == "true") {
|
||||
option->trt_option.enable_fp16 = true;
|
||||
}
|
||||
} else if (FLAGS_backend == "default") {
|
||||
} else if (config_info["backend"] == "default") {
|
||||
return true;
|
||||
} else {
|
||||
std::cout << "While inference with GPU, only support "
|
||||
"default/ort/paddle/trt/paddle_trt now, "
|
||||
<< FLAGS_backend << " is not supported." << std::endl;
|
||||
<< config_info["backend"] << " is not supported." << std::endl;
|
||||
PrintUsage();
|
||||
return false;
|
||||
}
|
||||
} else if (FLAGS_device == "cpu") {
|
||||
option->SetCpuThreadNum(FLAGS_cpu_thread_nums);
|
||||
if (FLAGS_backend == "ort") {
|
||||
} else if (config_info["device"] == "cpu") {
|
||||
option->SetCpuThreadNum(std::stoi(config_info["cpu_thread_nums"]));
|
||||
if (config_info["backend"] == "ort") {
|
||||
option->UseOrtBackend();
|
||||
} else if (FLAGS_backend == "ov") {
|
||||
} else if (config_info["backend"] == "ov") {
|
||||
option->UseOpenVINOBackend();
|
||||
} else if (FLAGS_backend == "paddle") {
|
||||
} else if (config_info["backend"] == "paddle") {
|
||||
option->UsePaddleInferBackend();
|
||||
} else if (FLAGS_backend == "lite") {
|
||||
} else if (config_info["backend"] == "lite") {
|
||||
option->UsePaddleLiteBackend();
|
||||
if (FLAGS_use_fp16) {
|
||||
if (config_info["use_fp16"] == "true") {
|
||||
option->paddle_lite_option.enable_fp16 = true;
|
||||
}
|
||||
} else if (FLAGS_backend == "default") {
|
||||
} else if (config_info["backend"] == "default") {
|
||||
return true;
|
||||
} else {
|
||||
std::cout << "While inference with CPU, only support "
|
||||
"default/ort/ov/paddle/lite now, "
|
||||
<< FLAGS_backend << " is not supported." << std::endl;
|
||||
<< config_info["backend"] << " is not supported." << std::endl;
|
||||
PrintUsage();
|
||||
return false;
|
||||
}
|
||||
} else if (FLAGS_device == "xpu") {
|
||||
option->UseKunlunXin(FLAGS_device_id);
|
||||
if (FLAGS_backend == "ort") {
|
||||
} else if (config_info["device"] == "xpu") {
|
||||
option->UseKunlunXin(std::stoi(config_info["device_id"]));
|
||||
if (config_info["backend"] == "ort") {
|
||||
option->UseOrtBackend();
|
||||
} else if (FLAGS_backend == "paddle") {
|
||||
} else if (config_info["backend"] == "paddle") {
|
||||
option->UsePaddleInferBackend();
|
||||
} else if (FLAGS_backend == "lite") {
|
||||
} else if (config_info["backend"] == "lite") {
|
||||
option->UsePaddleLiteBackend();
|
||||
if (FLAGS_use_fp16) {
|
||||
if (config_info["use_fp16"] == "true") {
|
||||
option->paddle_lite_option.enable_fp16 = true;
|
||||
}
|
||||
} else if (FLAGS_backend == "default") {
|
||||
} else if (config_info["backend"] == "default") {
|
||||
return true;
|
||||
} else {
|
||||
std::cout << "While inference with XPU, only support "
|
||||
"default/ort/paddle/lite now, "
|
||||
<< FLAGS_backend << " is not supported." << std::endl;
|
||||
<< config_info["backend"] << " is not supported." << std::endl;
|
||||
PrintUsage();
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
std::cerr << "Only support device CPU/GPU/XPU now, " << FLAGS_device
|
||||
std::cerr << "Only support device CPU/GPU/XPU now, "
|
||||
<< config_info["device"]
|
||||
<< " is not supported." << std::endl;
|
||||
PrintUsage();
|
||||
return false;
|
||||
}
|
||||
PrintBenchmarkInfo();
|
||||
PrintBenchmarkInfo(config_info);
|
||||
return true;
|
||||
}
|
||||
|
49
fastdeploy/benchmark/utils.cc
Normal file → Executable file
49
fastdeploy/benchmark/utils.cc
Normal file → Executable file
@@ -249,7 +249,7 @@ bool ResultManager::SaveFDTensor(const FDTensor& tensor,
|
||||
|
||||
bool ResultManager::LoadFDTensor(FDTensor* tensor, const std::string& path) {
|
||||
if (!CheckFileExists(path)) {
|
||||
FDERROR << "Can't found file from" << path << std::endl;
|
||||
FDERROR << "Can't found file from " << path << std::endl;
|
||||
return false;
|
||||
}
|
||||
auto lines = ReadLines(path);
|
||||
@@ -365,6 +365,45 @@ void ResultManager::SaveBenchmarkResult(const std::string& res,
|
||||
fs.close();
|
||||
}
|
||||
|
||||
bool ResultManager::LoadBenchmarkConfig(
|
||||
const std::string& path,
|
||||
std::unordered_map<std::string, std::string>* config_info) {
|
||||
if (!CheckFileExists(path)) {
|
||||
FDERROR << "Can't found file from " << path << std::endl;
|
||||
return false;
|
||||
}
|
||||
auto lines = ReadLines(path);
|
||||
for (auto line : lines) {
|
||||
std::vector<std::string> tokens;
|
||||
Split(line, tokens, ':');
|
||||
(*config_info)[tokens[0]] = Strip(tokens[1], ' ');
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<std::vector<int32_t>> ResultManager::GetInputShapes(
|
||||
const std::string& raw_shapes) {
|
||||
std::vector<std::vector<int32_t>> shapes;
|
||||
std::vector<std::string> shape_tokens;
|
||||
Split(raw_shapes, shape_tokens, ':');
|
||||
for (auto str_shape : shape_tokens) {
|
||||
std::vector<int32_t> shape;
|
||||
std::string tmp_str = str_shape;
|
||||
while (!tmp_str.empty()) {
|
||||
int dim = atoi(tmp_str.data());
|
||||
shape.push_back(dim);
|
||||
size_t next_offset = tmp_str.find(",");
|
||||
if (next_offset == std::string::npos) {
|
||||
break;
|
||||
} else {
|
||||
tmp_str = tmp_str.substr(next_offset + 1);
|
||||
}
|
||||
}
|
||||
shapes.push_back(shape);
|
||||
}
|
||||
return shapes;
|
||||
}
|
||||
|
||||
#if defined(ENABLE_VISION)
|
||||
bool ResultManager::SaveDetectionResult(const vision::DetectionResult& res,
|
||||
const std::string& path) {
|
||||
@@ -520,7 +559,7 @@ bool ResultManager::SaveOCRDetResult(const std::vector<std::array<int, 8>>& res,
|
||||
bool ResultManager::LoadDetectionResult(vision::DetectionResult* res,
|
||||
const std::string& path) {
|
||||
if (!CheckFileExists(path)) {
|
||||
FDERROR << "Can't found file from" << path << std::endl;
|
||||
FDERROR << "Can't found file from " << path << std::endl;
|
||||
return false;
|
||||
}
|
||||
auto lines = ReadLines(path);
|
||||
@@ -553,7 +592,7 @@ bool ResultManager::LoadDetectionResult(vision::DetectionResult* res,
|
||||
bool ResultManager::LoadClassifyResult(vision::ClassifyResult* res,
|
||||
const std::string& path) {
|
||||
if (!CheckFileExists(path)) {
|
||||
FDERROR << "Can't found file from" << path << std::endl;
|
||||
FDERROR << "Can't found file from " << path << std::endl;
|
||||
return false;
|
||||
}
|
||||
auto lines = ReadLines(path);
|
||||
@@ -575,7 +614,7 @@ bool ResultManager::LoadClassifyResult(vision::ClassifyResult* res,
|
||||
bool ResultManager::LoadSegmentationResult(vision::SegmentationResult* res,
|
||||
const std::string& path) {
|
||||
if (!CheckFileExists(path)) {
|
||||
FDERROR << "Can't found file from" << path << std::endl;
|
||||
FDERROR << "Can't found file from " << path << std::endl;
|
||||
return false;
|
||||
}
|
||||
auto lines = ReadLines(path);
|
||||
@@ -602,7 +641,7 @@ bool ResultManager::LoadSegmentationResult(vision::SegmentationResult* res,
|
||||
bool ResultManager::LoadOCRDetResult(std::vector<std::array<int, 8>>* res,
|
||||
const std::string& path) {
|
||||
if (!CheckFileExists(path)) {
|
||||
FDERROR << "Can't found file from" << path << std::endl;
|
||||
FDERROR << "Can't found file from " << path << std::endl;
|
||||
return false;
|
||||
}
|
||||
auto lines = ReadLines(path);
|
||||
|
@@ -15,6 +15,7 @@
|
||||
|
||||
#include <memory>
|
||||
#include <thread> // NOLINT
|
||||
#include <unordered_map>
|
||||
#include "fastdeploy/utils/utils.h"
|
||||
#include "fastdeploy/core/fd_tensor.h"
|
||||
#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
|
||||
@@ -141,6 +142,12 @@ struct FASTDEPLOY_DECL ResultManager {
|
||||
/// Save Benchmark data
|
||||
static void SaveBenchmarkResult(const std::string& res,
|
||||
const std::string& path);
|
||||
/// Load Benchmark config
|
||||
static bool LoadBenchmarkConfig(const std::string& path,
|
||||
std::unordered_map<std::string, std::string>* config_info);
|
||||
/// Get Input Shapes
|
||||
static std::vector<std::vector<int32_t>> GetInputShapes(
|
||||
const std::string& raw_shapes);
|
||||
#if defined(ENABLE_VISION)
|
||||
/// Save & Load functions for basic results.
|
||||
static bool SaveDetectionResult(const vision::DetectionResult& res,
|
||||
|
Reference in New Issue
Block a user