device: gpu
device_id: 3
cpu_thread_nums: 1
warmup: 200
repeat: 1000
backend: paddle
profile_mode: runtime
include_h2d_d2h: false
use_fp16: false
collect_memory_info: false
sampling_interval: 1
precision_compare: false
xpu_l3_cache: 0
result_path: benchmark_gpu_paddle_fp32.txt