diff --git a/benchmark/cpp/config/config.arm.lite.fp16.e2e.mem.txt b/benchmark/cpp/config/config.arm.lite.fp16.e2e.mem.txt new file mode 100755 index 000000000..8fb379488 --- /dev/null +++ b/benchmark/cpp/config/config.arm.lite.fp16.e2e.mem.txt @@ -0,0 +1,14 @@ +device: cpu +device_id: 0 +cpu_thread_nums: 1 +warmup: 10 +repeat: 20 +backend: lite +profile_mode: end2end +include_h2d_d2h: false +use_fp16: true +collect_memory_info: true +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 0 +result_path: benchmark_arm_lite_fp16_e2e_mem.txt \ No newline at end of file diff --git a/benchmark/cpp/config/config.arm.lite.fp16.e2e.txt b/benchmark/cpp/config/config.arm.lite.fp16.e2e.txt new file mode 100755 index 000000000..87b1a7f79 --- /dev/null +++ b/benchmark/cpp/config/config.arm.lite.fp16.e2e.txt @@ -0,0 +1,14 @@ +device: cpu +device_id: 0 +cpu_thread_nums: 1 +warmup: 20 +repeat: 100 +backend: lite +profile_mode: end2end +include_h2d_d2h: false +use_fp16: true +collect_memory_info: false +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 0 +result_path: benchmark_arm_lite_fp16_e2e.txt \ No newline at end of file diff --git a/benchmark/cpp/config/config.arm.lite.fp32.e2e.mem.txt b/benchmark/cpp/config/config.arm.lite.fp32.e2e.mem.txt new file mode 100755 index 000000000..9fb93fb27 --- /dev/null +++ b/benchmark/cpp/config/config.arm.lite.fp32.e2e.mem.txt @@ -0,0 +1,14 @@ +device: cpu +device_id: 0 +cpu_thread_nums: 1 +warmup: 10 +repeat: 20 +backend: lite +profile_mode: end2end +include_h2d_d2h: false +use_fp16: false +collect_memory_info: true +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 0 +result_path: benchmark_arm_lite_fp32_e2e_mem.txt \ No newline at end of file diff --git a/benchmark/cpp/config/config.arm.lite.fp32.e2e.txt b/benchmark/cpp/config/config.arm.lite.fp32.e2e.txt new file mode 100755 index 000000000..2fad23322 --- /dev/null +++ b/benchmark/cpp/config/config.arm.lite.fp32.e2e.txt @@ -0,0 +1,14 @@ +device: cpu +device_id: 0 +cpu_thread_nums: 1 +warmup: 20 +repeat: 100 +backend: lite +profile_mode: end2end +include_h2d_d2h: false +use_fp16: false +collect_memory_info: false +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 0 +result_path: benchmark_arm_lite_fp32_e2e.txt \ No newline at end of file diff --git a/benchmark/cpp/config/config.gpu.ort.fp32.e2e.mem.txt b/benchmark/cpp/config/config.gpu.ort.fp32.e2e.mem.txt new file mode 100755 index 000000000..b95718160 --- /dev/null +++ b/benchmark/cpp/config/config.gpu.ort.fp32.e2e.mem.txt @@ -0,0 +1,14 @@ +device: gpu +device_id: 3 +cpu_thread_nums: 1 +warmup: 10 +repeat: 20 +backend: ort +profile_mode: end2end +include_h2d_d2h: false +use_fp16: false +collect_memory_info: false +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 0 +result_path: benchmark_gpu_ort_fp32_e2e_mem.txt \ No newline at end of file diff --git a/benchmark/cpp/config/config.gpu.ort.fp32.e2e.txt b/benchmark/cpp/config/config.gpu.ort.fp32.e2e.txt new file mode 100755 index 000000000..a1bf97be9 --- /dev/null +++ b/benchmark/cpp/config/config.gpu.ort.fp32.e2e.txt @@ -0,0 +1,14 @@ +device: gpu +device_id: 3 +cpu_thread_nums: 1 +warmup: 20 +repeat: 100 +backend: ort +profile_mode: end2end +include_h2d_d2h: false +use_fp16: false +collect_memory_info: false +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 0 +result_path: benchmark_gpu_ort_fp32_e2e.txt \ No newline at end of file diff --git a/benchmark/cpp/config/config.gpu.ort.fp32.txt b/benchmark/cpp/config/config.gpu.ort.fp32.txt index 3f48bde01..cb1d2b5b4 100755 --- a/benchmark/cpp/config/config.gpu.ort.fp32.txt +++ b/benchmark/cpp/config/config.gpu.ort.fp32.txt @@ -1,5 +1,5 @@ device: gpu -device_id: 0 +device_id: 3 cpu_thread_nums: 1 warmup: 20 repeat: 100 diff --git a/benchmark/cpp/config/config.gpu.paddle.fp32.e2e.mem.txt b/benchmark/cpp/config/config.gpu.paddle.fp32.e2e.mem.txt new file mode 100755 index 000000000..d06642b1e --- /dev/null +++ b/benchmark/cpp/config/config.gpu.paddle.fp32.e2e.mem.txt @@ -0,0 +1,14 @@ +device: gpu +device_id: 3 +cpu_thread_nums: 1 +warmup: 10 +repeat: 20 +backend: paddle +profile_mode: end2end +include_h2d_d2h: false +use_fp16: false +collect_memory_info: false +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 0 +result_path: benchmark_gpu_paddle_fp32_e2e_mem.txt \ No newline at end of file diff --git a/benchmark/cpp/config/config.gpu.paddle.fp32.e2e.txt b/benchmark/cpp/config/config.gpu.paddle.fp32.e2e.txt new file mode 100755 index 000000000..1e3dabc59 --- /dev/null +++ b/benchmark/cpp/config/config.gpu.paddle.fp32.e2e.txt @@ -0,0 +1,14 @@ +device: gpu +device_id: 3 +cpu_thread_nums: 1 +warmup: 20 +repeat: 100 +backend: paddle +profile_mode: end2end +include_h2d_d2h: false +use_fp16: false +collect_memory_info: false +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 0 +result_path: benchmark_gpu_paddle_fp32_e2e.txt \ No newline at end of file diff --git a/benchmark/cpp/config/config.gpu.paddle.fp32.txt b/benchmark/cpp/config/config.gpu.paddle.fp32.txt index ba06c5c5d..cbe7bc2cb 100755 --- a/benchmark/cpp/config/config.gpu.paddle.fp32.txt +++ b/benchmark/cpp/config/config.gpu.paddle.fp32.txt @@ -1,5 +1,5 @@ device: gpu -device_id: 0 +device_id: 3 cpu_thread_nums: 1 warmup: 20 repeat: 100 diff --git a/benchmark/cpp/config/config.gpu.paddle_trt.fp16.e2e.mem.txt b/benchmark/cpp/config/config.gpu.paddle_trt.fp16.e2e.mem.txt new file mode 100755 index 000000000..8063c45f4 --- /dev/null +++ b/benchmark/cpp/config/config.gpu.paddle_trt.fp16.e2e.mem.txt @@ -0,0 +1,14 @@ +device: gpu +device_id: 3 +cpu_thread_nums: 1 +warmup: 10 +repeat: 20 +backend: paddle_trt +profile_mode: end2end +include_h2d_d2h: false +use_fp16: true +collect_memory_info: false +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 0 +result_path: benchmark_gpu_paddle_trt_fp16_e2e_mem.txt \ No newline at end of file diff --git a/benchmark/cpp/config/config.gpu.paddle_trt.fp16.e2e.txt b/benchmark/cpp/config/config.gpu.paddle_trt.fp16.e2e.txt new file mode 100755 index 000000000..525d8c3e9 --- /dev/null +++ b/benchmark/cpp/config/config.gpu.paddle_trt.fp16.e2e.txt @@ -0,0 +1,14 @@ +device: gpu +device_id: 3 +cpu_thread_nums: 1 +warmup: 20 +repeat: 100 +backend: paddle_trt +profile_mode: end2end +include_h2d_d2h: false +use_fp16: true +collect_memory_info: false +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 0 +result_path: benchmark_gpu_paddle_trt_fp16_e2e.txt \ No newline at end of file diff --git a/benchmark/cpp/config/config.gpu.paddle_trt.fp16.txt b/benchmark/cpp/config/config.gpu.paddle_trt.fp16.txt index f731ce7fc..6cdd4f925 100755 --- a/benchmark/cpp/config/config.gpu.paddle_trt.fp16.txt +++ b/benchmark/cpp/config/config.gpu.paddle_trt.fp16.txt @@ -1,5 +1,5 @@ device: gpu -device_id: 0 +device_id: 3 cpu_thread_nums: 1 warmup: 20 repeat: 100 diff --git a/benchmark/cpp/config/config.gpu.paddle_trt.fp32.e2e.mem.txt b/benchmark/cpp/config/config.gpu.paddle_trt.fp32.e2e.mem.txt new file mode 100755 index 000000000..4f4fb67ac --- /dev/null +++ b/benchmark/cpp/config/config.gpu.paddle_trt.fp32.e2e.mem.txt @@ -0,0 +1,14 @@ +device: gpu +device_id: 3 +cpu_thread_nums: 1 +warmup: 10 +repeat: 20 +backend: paddle_trt +profile_mode: end2end +include_h2d_d2h: false +use_fp16: false +collect_memory_info: false +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 0 +result_path: benchmark_gpu_paddle_trt_fp32_e2e_mem.txt \ No newline at end of file diff --git a/benchmark/cpp/config/config.gpu.paddle_trt.fp32.e2e.txt b/benchmark/cpp/config/config.gpu.paddle_trt.fp32.e2e.txt new file mode 100755 index 000000000..58fe7d8a3 --- /dev/null +++ b/benchmark/cpp/config/config.gpu.paddle_trt.fp32.e2e.txt @@ -0,0 +1,14 @@ +device: gpu +device_id: 3 +cpu_thread_nums: 1 +warmup: 20 +repeat: 100 +backend: paddle_trt +profile_mode: end2end +include_h2d_d2h: false +use_fp16: false +collect_memory_info: false +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 0 +result_path: benchmark_gpu_paddle_trt_fp32_e2e.txt \ No newline at end of file diff --git a/benchmark/cpp/config/config.gpu.paddle_trt.fp32.txt b/benchmark/cpp/config/config.gpu.paddle_trt.fp32.txt index a11394eb7..7a83160cd 100755 --- a/benchmark/cpp/config/config.gpu.paddle_trt.fp32.txt +++ b/benchmark/cpp/config/config.gpu.paddle_trt.fp32.txt @@ -1,5 +1,5 @@ device: gpu -device_id: 0 +device_id: 3 cpu_thread_nums: 1 warmup: 20 repeat: 100 diff --git a/benchmark/cpp/config/config.gpu.trt.fp16.e2e.mem.txt b/benchmark/cpp/config/config.gpu.trt.fp16.e2e.mem.txt new file mode 100755 index 000000000..9819ba74e --- /dev/null +++ b/benchmark/cpp/config/config.gpu.trt.fp16.e2e.mem.txt @@ -0,0 +1,14 @@ +device: gpu +device_id: 3 +cpu_thread_nums: 1 +warmup: 10 +repeat: 20 +backend: trt +profile_mode: end2end +include_h2d_d2h: false +use_fp16: true +collect_memory_info: false +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 0 +result_path: benchmark_gpu_trt_fp16_e2e_mem.txt \ No newline at end of file diff --git a/benchmark/cpp/config/config.gpu.trt.fp16.e2e.txt b/benchmark/cpp/config/config.gpu.trt.fp16.e2e.txt new file mode 100755 index 000000000..742129f65 --- /dev/null +++ b/benchmark/cpp/config/config.gpu.trt.fp16.e2e.txt @@ -0,0 +1,14 @@ +device: gpu +device_id: 3 +cpu_thread_nums: 1 +warmup: 20 +repeat: 100 +backend: trt +profile_mode: end2end +include_h2d_d2h: false +use_fp16: true +collect_memory_info: false +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 0 +result_path: benchmark_gpu_trt_fp16_e2e.txt \ No newline at end of file diff --git a/benchmark/cpp/config/config.gpu.trt.fp16.txt b/benchmark/cpp/config/config.gpu.trt.fp16.txt index e27837e22..4a842a69c 100755 --- a/benchmark/cpp/config/config.gpu.trt.fp16.txt +++ b/benchmark/cpp/config/config.gpu.trt.fp16.txt @@ -1,5 +1,5 @@ device: gpu -device_id: 0 +device_id: 3 cpu_thread_nums: 1 warmup: 20 repeat: 100 diff --git a/benchmark/cpp/config/config.gpu.trt.fp32.e2e.mem.txt b/benchmark/cpp/config/config.gpu.trt.fp32.e2e.mem.txt new file mode 100755 index 000000000..993a4309e --- /dev/null +++ b/benchmark/cpp/config/config.gpu.trt.fp32.e2e.mem.txt @@ -0,0 +1,14 @@ +device: gpu +device_id: 3 +cpu_thread_nums: 1 +warmup: 10 +repeat: 20 +backend: trt +profile_mode: end2end +include_h2d_d2h: false +use_fp16: false +collect_memory_info: false +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 0 +result_path: benchmark_gpu_trt_fp32_e2e_mem.txt \ No newline at end of file diff --git a/benchmark/cpp/config/config.gpu.trt.fp32.e2e.txt b/benchmark/cpp/config/config.gpu.trt.fp32.e2e.txt new file mode 100755 index 000000000..1130f3482 --- /dev/null +++ b/benchmark/cpp/config/config.gpu.trt.fp32.e2e.txt @@ -0,0 +1,14 @@ +device: gpu +device_id: 3 +cpu_thread_nums: 1 +warmup: 20 +repeat: 100 +backend: trt +profile_mode: end2end +include_h2d_d2h: false +use_fp16: false +collect_memory_info: false +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 0 +result_path: benchmark_gpu_trt_fp32_e2e.txt \ No newline at end of file diff --git a/benchmark/cpp/config/config.gpu.trt.fp32.txt b/benchmark/cpp/config/config.gpu.trt.fp32.txt index 49c2695c1..c38dfcfd5 100755 --- a/benchmark/cpp/config/config.gpu.trt.fp32.txt +++ b/benchmark/cpp/config/config.gpu.trt.fp32.txt @@ -1,5 +1,5 @@ device: gpu -device_id: 0 +device_id: 3 cpu_thread_nums: 1 warmup: 20 repeat: 100 diff --git a/benchmark/cpp/config/config.x86.ort.fp32.e2e.mem.txt b/benchmark/cpp/config/config.x86.ort.fp32.e2e.mem.txt new file mode 100755 index 000000000..497034564 --- /dev/null +++ b/benchmark/cpp/config/config.x86.ort.fp32.e2e.mem.txt @@ -0,0 +1,14 @@ +device: cpu +device_id: 0 +cpu_thread_nums: 1 +warmup: 10 +repeat: 20 +backend: ort +profile_mode: end2end +include_h2d_d2h: false +use_fp16: false +collect_memory_info: false +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 0 +result_path: benchmark_x86_ort_fp32_e2e_mem.txt \ No newline at end of file diff --git a/benchmark/cpp/config/config.x86.ort.fp32.e2e.txt b/benchmark/cpp/config/config.x86.ort.fp32.e2e.txt new file mode 100755 index 000000000..17803cbf7 --- /dev/null +++ b/benchmark/cpp/config/config.x86.ort.fp32.e2e.txt @@ -0,0 +1,14 @@ +device: cpu +device_id: 0 +cpu_thread_nums: 1 +warmup: 20 +repeat: 100 +backend: ort +profile_mode: end2end +include_h2d_d2h: false +use_fp16: false +collect_memory_info: false +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 0 +result_path: benchmark_x86_ort_fp32_e2e.txt \ No newline at end of file diff --git a/benchmark/cpp/config/config.x86.ov.fp32.e2e.mem.txt b/benchmark/cpp/config/config.x86.ov.fp32.e2e.mem.txt new file mode 100755 index 000000000..0717bcb20 --- /dev/null +++ b/benchmark/cpp/config/config.x86.ov.fp32.e2e.mem.txt @@ -0,0 +1,14 @@ +device: cpu +device_id: 0 +cpu_thread_nums: 1 +warmup: 10 +repeat: 20 +backend: ov +profile_mode: end2end +include_h2d_d2h: false +use_fp16: false +collect_memory_info: false +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 0 +result_path: benchmark_x86_ov_fp32_e2e_mem.txt \ No newline at end of file diff --git a/benchmark/cpp/config/config.x86.ov.fp32.e2e.txt b/benchmark/cpp/config/config.x86.ov.fp32.e2e.txt new file mode 100755 index 000000000..cdcb8d0ad --- /dev/null +++ b/benchmark/cpp/config/config.x86.ov.fp32.e2e.txt @@ -0,0 +1,14 @@ +device: cpu +device_id: 0 +cpu_thread_nums: 1 +warmup: 20 +repeat: 100 +backend: ov +profile_mode: end2end +include_h2d_d2h: false +use_fp16: false +collect_memory_info: false +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 0 +result_path: benchmark_x86_ov_fp32_e2e.txt \ No newline at end of file diff --git a/benchmark/cpp/config/config.x86.paddle.fp32.e2e.mem.txt b/benchmark/cpp/config/config.x86.paddle.fp32.e2e.mem.txt new file mode 100755 index 000000000..d36413d9a --- /dev/null +++ b/benchmark/cpp/config/config.x86.paddle.fp32.e2e.mem.txt @@ -0,0 +1,14 @@ +device: cpu +device_id: 0 +cpu_thread_nums: 1 +warmup: 10 +repeat: 20 +backend: paddle +profile_mode: end2end +include_h2d_d2h: false +use_fp16: false +collect_memory_info: false +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 0 +result_path: benchmark_x86_paddle_fp32_e2e_mem.txt \ No newline at end of file diff --git a/benchmark/cpp/config/config.x86.paddle.fp32.e2e.txt b/benchmark/cpp/config/config.x86.paddle.fp32.e2e.txt new file mode 100755 index 000000000..b255d7378 --- /dev/null +++ b/benchmark/cpp/config/config.x86.paddle.fp32.e2e.txt @@ -0,0 +1,14 @@ +device: cpu +device_id: 0 +cpu_thread_nums: 1 +warmup: 20 +repeat: 100 +backend: paddle +profile_mode: end2end +include_h2d_d2h: false +use_fp16: false +collect_memory_info: false +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 0 +result_path: benchmark_x86_paddle_fp32_e2e.txt \ No newline at end of file diff --git a/benchmark/cpp/config/config.xpu.lite.fp32.e2e.mem.txt b/benchmark/cpp/config/config.xpu.lite.fp32.e2e.mem.txt new file mode 100755 index 000000000..6a68958b2 --- /dev/null +++ b/benchmark/cpp/config/config.xpu.lite.fp32.e2e.mem.txt @@ -0,0 +1,14 @@ +device: xpu +device_id: 0 +cpu_thread_nums: 1 +warmup: 10 +repeat: 20 +backend: lite +profile_mode: end2end +include_h2d_d2h: false +use_fp16: false +collect_memory_info: false +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 0 +result_path: benchmark_xpu_lite_fp32_e2e_mem.txt \ No newline at end of file diff --git a/benchmark/cpp/config/config.xpu.lite.fp32.e2e.txt b/benchmark/cpp/config/config.xpu.lite.fp32.e2e.txt new file mode 100755 index 000000000..6ae4cf2f4 --- /dev/null +++ b/benchmark/cpp/config/config.xpu.lite.fp32.e2e.txt @@ -0,0 +1,14 @@ +device: xpu +device_id: 0 +cpu_thread_nums: 1 +warmup: 20 +repeat: 100 +backend: lite +profile_mode: end2end +include_h2d_d2h: false +use_fp16: false +collect_memory_info: false +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 0 +result_path: benchmark_xpu_lite_fp32_e2e.txt \ No newline at end of file diff --git a/benchmark/cpp/macros.h b/benchmark/cpp/macros.h index 021e7db2e..018a7c881 100755 --- a/benchmark/cpp/macros.h +++ b/benchmark/cpp/macros.h @@ -79,7 +79,7 @@ std::cout << "gpu_rss_mb: " << __gpu_mem__ << "MB." << std::endl; \ __ss__ << "gpu_rss_mb: " << __gpu_mem__ << "MB." << std::endl; \ std::cout << "gpu_util: " << __gpu_util__ << std::endl; \ - __ss__ << "gpu_rss_mb: " << __gpu_mem__ << "MB." << std::endl; \ + __ss__ << "gpu_util: " << __gpu_util__ << "MB." << std::endl; \ __resource_moniter__.Stop(); \ } \ fastdeploy::benchmark::ResultManager::SaveBenchmarkResult(__ss__.str(), \ diff --git a/cmake/paddle_inference.cmake b/cmake/paddle_inference.cmake index 5c125fcc5..fc9cbd8e1 100755 --- a/cmake/paddle_inference.cmake +++ b/cmake/paddle_inference.cmake @@ -134,7 +134,6 @@ else() endif(PADDLEINFERENCE_DIRECTORY) if(UNIX AND (NOT APPLE) AND (NOT ANDROID)) - message("?????????????? ${PATCHELF_EXE}") add_custom_target(patchelf_paddle_inference ALL COMMAND bash -c "PATCHELF_EXE=${PATCHELF_EXE} python ${PROJECT_SOURCE_DIR}/scripts/patch_paddle_inference.py ${PADDLEINFERENCE_INSTALL_DIR}/paddle/lib/libpaddle_inference.so" DEPENDS ${LIBRARY_NAME}) endif()