[Benchmark]Add End2End and meminfo configs (#1645)

* add GPL lisence

* add GPL-3.0 lisence

* add GPL-3.0 lisence

* add GPL-3.0 lisence

* support yolov8

* add pybind for yolov8

* add yolov8 readme

* add cpp benchmark

* add cpu and gpu mem

* public part split

* add runtime mode

* fixed bugs

* add cpu_thread_nums

* deal with comments

* deal with comments

* deal with comments

* rm useless code

* add FASTDEPLOY_DECL

* add FASTDEPLOY_DECL

* fixed for windows

* mv rss to pss

* mv rss to pss

* Update utils.cc

* use thread to collect mem

* Add ResourceUsageMonitor

* rm useless code

* fixed bug

* fixed typo

* update ResourceUsageMonitor

* fixed bug

* fixed bug

* add note for ResourceUsageMonitor

* deal with comments

* add macros

* deal with comments

* deal with comments

* deal with comments

* re-lint

* rm pmap and use mem api

* rm pmap and use mem api

* add mem api

* Add PrintBenchmarkInfo func

* Add PrintBenchmarkInfo func

* Add PrintBenchmarkInfo func

* deal with comments

* fixed enable_paddle_to_trt

* add log for paddle_trt

* support ppcls benchmark

* use new trt option api

* update benchmark info

* simplify benchmark.cc

* simplify benchmark.cc

* deal with comments

* Add ppseg && ppocr benchmark

* add OCR rec img

* add ocr benchmark

* fixed trt shape

* add trt shape

* resolve conflict

* add ENABLE_BENCHMARK define

* Add ClassifyDiff

* Add Resize for ClassifyResult

* deal with comments

* add convert info script

* resolve conflict

* Add SaveBenchmarkResult func

* fixed bug

* fixed bug

* fixed bug

* add config.txt for option

* fixed bug

* fixed bug

* fixed bug

* add benchmark.sh

* mv thread_nums from 8 to 1

* deal with comments

* deal with comments

* fixed readme

* deal with comments

* add all platform shell

* Update config.arm.txt

* Update config.gpu.txt

* Update config.x86.txt

* fixed printinfo bug

* rm proxy

* add more model support

* all backend config.txt

* deal with comments

* Add MattingDiff compare

* fixed predict bug

* adjust warmup/repeat times

* add e2e/mem configs

* fixed typo

---------

Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
This commit is contained in:
WJJ1995
2023-03-17 17:30:47 +08:00
committed by GitHub
parent 95820a9892
commit 1b37cb0c03
32 changed files with 343 additions and 8 deletions

View File

@@ -0,0 +1,14 @@
device: cpu
device_id: 0
cpu_thread_nums: 1
warmup: 10
repeat: 20
backend: lite
profile_mode: end2end
include_h2d_d2h: false
use_fp16: true
collect_memory_info: true
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_arm_lite_fp16_e2e_mem.txt

View File

@@ -0,0 +1,14 @@
device: cpu
device_id: 0
cpu_thread_nums: 1
warmup: 20
repeat: 100
backend: lite
profile_mode: end2end
include_h2d_d2h: false
use_fp16: true
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_arm_lite_fp16_e2e.txt

View File

@@ -0,0 +1,14 @@
device: cpu
device_id: 0
cpu_thread_nums: 1
warmup: 10
repeat: 20
backend: lite
profile_mode: end2end
include_h2d_d2h: false
use_fp16: false
collect_memory_info: true
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_arm_lite_fp32_e2e_mem.txt

View File

@@ -0,0 +1,14 @@
device: cpu
device_id: 0
cpu_thread_nums: 1
warmup: 20
repeat: 100
backend: lite
profile_mode: end2end
include_h2d_d2h: false
use_fp16: false
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_arm_lite_fp32_e2e.txt

View File

@@ -0,0 +1,14 @@
device: gpu
device_id: 3
cpu_thread_nums: 1
warmup: 10
repeat: 20
backend: ort
profile_mode: end2end
include_h2d_d2h: false
use_fp16: false
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_gpu_ort_fp32_e2e_mem.txt

View File

@@ -0,0 +1,14 @@
device: gpu
device_id: 3
cpu_thread_nums: 1
warmup: 20
repeat: 100
backend: ort
profile_mode: end2end
include_h2d_d2h: false
use_fp16: false
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_gpu_ort_fp32_e2e.txt

View File

@@ -1,5 +1,5 @@
device: gpu
device_id: 0
device_id: 3
cpu_thread_nums: 1
warmup: 20
repeat: 100

View File

@@ -0,0 +1,14 @@
device: gpu
device_id: 3
cpu_thread_nums: 1
warmup: 10
repeat: 20
backend: paddle
profile_mode: end2end
include_h2d_d2h: false
use_fp16: false
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_gpu_paddle_fp32_e2e_mem.txt

View File

@@ -0,0 +1,14 @@
device: gpu
device_id: 3
cpu_thread_nums: 1
warmup: 20
repeat: 100
backend: paddle
profile_mode: end2end
include_h2d_d2h: false
use_fp16: false
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_gpu_paddle_fp32_e2e.txt

View File

@@ -1,5 +1,5 @@
device: gpu
device_id: 0
device_id: 3
cpu_thread_nums: 1
warmup: 20
repeat: 100

View File

@@ -0,0 +1,14 @@
device: gpu
device_id: 3
cpu_thread_nums: 1
warmup: 10
repeat: 20
backend: paddle_trt
profile_mode: end2end
include_h2d_d2h: false
use_fp16: true
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_gpu_paddle_trt_fp16_e2e_mem.txt

View File

@@ -0,0 +1,14 @@
device: gpu
device_id: 3
cpu_thread_nums: 1
warmup: 20
repeat: 100
backend: paddle_trt
profile_mode: end2end
include_h2d_d2h: false
use_fp16: true
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_gpu_paddle_trt_fp16_e2e.txt

View File

@@ -1,5 +1,5 @@
device: gpu
device_id: 0
device_id: 3
cpu_thread_nums: 1
warmup: 20
repeat: 100

View File

@@ -0,0 +1,14 @@
device: gpu
device_id: 3
cpu_thread_nums: 1
warmup: 10
repeat: 20
backend: paddle_trt
profile_mode: end2end
include_h2d_d2h: false
use_fp16: false
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_gpu_paddle_trt_fp32_e2e_mem.txt

View File

@@ -0,0 +1,14 @@
device: gpu
device_id: 3
cpu_thread_nums: 1
warmup: 20
repeat: 100
backend: paddle_trt
profile_mode: end2end
include_h2d_d2h: false
use_fp16: false
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_gpu_paddle_trt_fp32_e2e.txt

View File

@@ -1,5 +1,5 @@
device: gpu
device_id: 0
device_id: 3
cpu_thread_nums: 1
warmup: 20
repeat: 100

View File

@@ -0,0 +1,14 @@
device: gpu
device_id: 3
cpu_thread_nums: 1
warmup: 10
repeat: 20
backend: trt
profile_mode: end2end
include_h2d_d2h: false
use_fp16: true
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_gpu_trt_fp16_e2e_mem.txt

View File

@@ -0,0 +1,14 @@
device: gpu
device_id: 3
cpu_thread_nums: 1
warmup: 20
repeat: 100
backend: trt
profile_mode: end2end
include_h2d_d2h: false
use_fp16: true
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_gpu_trt_fp16_e2e.txt

View File

@@ -1,5 +1,5 @@
device: gpu
device_id: 0
device_id: 3
cpu_thread_nums: 1
warmup: 20
repeat: 100

View File

@@ -0,0 +1,14 @@
device: gpu
device_id: 3
cpu_thread_nums: 1
warmup: 10
repeat: 20
backend: trt
profile_mode: end2end
include_h2d_d2h: false
use_fp16: false
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_gpu_trt_fp32_e2e_mem.txt

View File

@@ -0,0 +1,14 @@
device: gpu
device_id: 3
cpu_thread_nums: 1
warmup: 20
repeat: 100
backend: trt
profile_mode: end2end
include_h2d_d2h: false
use_fp16: false
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_gpu_trt_fp32_e2e.txt

View File

@@ -1,5 +1,5 @@
device: gpu
device_id: 0
device_id: 3
cpu_thread_nums: 1
warmup: 20
repeat: 100

View File

@@ -0,0 +1,14 @@
device: cpu
device_id: 0
cpu_thread_nums: 1
warmup: 10
repeat: 20
backend: ort
profile_mode: end2end
include_h2d_d2h: false
use_fp16: false
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_x86_ort_fp32_e2e_mem.txt

View File

@@ -0,0 +1,14 @@
device: cpu
device_id: 0
cpu_thread_nums: 1
warmup: 20
repeat: 100
backend: ort
profile_mode: end2end
include_h2d_d2h: false
use_fp16: false
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_x86_ort_fp32_e2e.txt

View File

@@ -0,0 +1,14 @@
device: cpu
device_id: 0
cpu_thread_nums: 1
warmup: 10
repeat: 20
backend: ov
profile_mode: end2end
include_h2d_d2h: false
use_fp16: false
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_x86_ov_fp32_e2e_mem.txt

View File

@@ -0,0 +1,14 @@
device: cpu
device_id: 0
cpu_thread_nums: 1
warmup: 20
repeat: 100
backend: ov
profile_mode: end2end
include_h2d_d2h: false
use_fp16: false
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_x86_ov_fp32_e2e.txt

View File

@@ -0,0 +1,14 @@
device: cpu
device_id: 0
cpu_thread_nums: 1
warmup: 10
repeat: 20
backend: paddle
profile_mode: end2end
include_h2d_d2h: false
use_fp16: false
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_x86_paddle_fp32_e2e_mem.txt

View File

@@ -0,0 +1,14 @@
device: cpu
device_id: 0
cpu_thread_nums: 1
warmup: 20
repeat: 100
backend: paddle
profile_mode: end2end
include_h2d_d2h: false
use_fp16: false
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_x86_paddle_fp32_e2e.txt

View File

@@ -0,0 +1,14 @@
device: xpu
device_id: 0
cpu_thread_nums: 1
warmup: 10
repeat: 20
backend: lite
profile_mode: end2end
include_h2d_d2h: false
use_fp16: false
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_xpu_lite_fp32_e2e_mem.txt

View File

@@ -0,0 +1,14 @@
device: xpu
device_id: 0
cpu_thread_nums: 1
warmup: 20
repeat: 100
backend: lite
profile_mode: end2end
include_h2d_d2h: false
use_fp16: false
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_xpu_lite_fp32_e2e.txt

View File

@@ -79,7 +79,7 @@
std::cout << "gpu_rss_mb: " << __gpu_mem__ << "MB." << std::endl; \
__ss__ << "gpu_rss_mb: " << __gpu_mem__ << "MB." << std::endl; \
std::cout << "gpu_util: " << __gpu_util__ << std::endl; \
__ss__ << "gpu_rss_mb: " << __gpu_mem__ << "MB." << std::endl; \
__ss__ << "gpu_util: " << __gpu_util__ << "MB." << std::endl; \
__resource_moniter__.Stop(); \
} \
fastdeploy::benchmark::ResultManager::SaveBenchmarkResult(__ss__.str(), \

View File

@@ -134,7 +134,6 @@ else()
endif(PADDLEINFERENCE_DIRECTORY)
if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
message("?????????????? ${PATCHELF_EXE}")
add_custom_target(patchelf_paddle_inference ALL COMMAND bash -c "PATCHELF_EXE=${PATCHELF_EXE} python ${PROJECT_SOURCE_DIR}/scripts/patch_paddle_inference.py ${PADDLEINFERENCE_INSTALL_DIR}/paddle/lib/libpaddle_inference.so" DEPENDS ${LIBRARY_NAME})
endif()