diff --git a/benchmark/cpp/benchmark_xpu.sh b/benchmark/cpp/benchmark_xpu.sh index dc2a81c3d..96105610d 100755 --- a/benchmark/cpp/benchmark_xpu.sh +++ b/benchmark/cpp/benchmark_xpu.sh @@ -18,12 +18,12 @@ fi ./benchmark_picodet --model picodet_l_640_coco_lcnet_no_nms --image 000000014439.jpg --config_path $CONFIG_PATH --no_nms # PaddleSeg -./benchmark_ppseg --model Portrait_PP_HumanSegV2_Lite_256x144_with_argmax_infer --image portrait_heng.jpg --config_path $CONFIG_PATH -./benchmark_ppseg --model PP_HumanSegV2_Lite_192x192_with_argmax_infer --image portrait_heng.jpg --config_path $CONFIG_PATH +./benchmark_ppseg --model Portrait_PP_HumanSegV2_Lite_256x144_with_argmax_infer --image portrait_heng.jpg --config_path $CONFIG_PATH --xpu_l3_cache 0 +./benchmark_ppseg --model PP_HumanSegV2_Lite_192x192_with_argmax_infer --image portrait_heng.jpg --config_path $CONFIG_PATH --xpu_l3_cache 0 ./benchmark_ppseg --model PP_HumanSegV1_Lite_infer --image portrait_heng.jpg --config_path $CONFIG_PATH -./benchmark_ppseg --model Deeplabv3_ResNet101_OS8_cityscapes_with_argmax_infer --image cityscapes_demo.png --config_path $CONFIG_PATH -./benchmark_ppseg --model PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer --image cityscapes_demo.png --config_path $CONFIG_PATH -./benchmark_ppseg --model SegFormer_B0-cityscapes-with-argmax --image cityscapes_demo.png --config_path $CONFIG_PATH +./benchmark_ppseg --model Deeplabv3_ResNet101_OS8_cityscapes_with_argmax_infer --image cityscapes_demo.png --config_path $CONFIG_PATH --xpu_l3_cache 0 +./benchmark_ppseg --model PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer --image cityscapes_demo.png --config_path $CONFIG_PATH --xpu_l3_cache 0 +./benchmark_ppseg --model SegFormer_B0-cityscapes-with-argmax --image cityscapes_demo.png --config_path $CONFIG_PATH --xpu_l3_cache 0 ./benchmark_ppmatting --model PP-Matting-512 --image matting_input.jpg --config_path $CONFIG_PATH ./benchmark_ppmatting --model PPHumanMatting --image matting_input.jpg --config_path $CONFIG_PATH ./benchmark_ppmatting --model PPModnet_MobileNetV2 --image matting_input.jpg --config_path $CONFIG_PATH diff --git a/benchmark/cpp/config/config.xpu.lite.fp32.l3.e2e.txt b/benchmark/cpp/config/config.xpu.lite.fp32.l3.e2e.txt new file mode 100755 index 000000000..88a8ac111 --- /dev/null +++ b/benchmark/cpp/config/config.xpu.lite.fp32.l3.e2e.txt @@ -0,0 +1,14 @@ +device: xpu +device_id: 0 +cpu_thread_nums: 1 +warmup: 20 +repeat: 100 +backend: lite +profile_mode: end2end +include_h2d_d2h: false +use_fp16: false +collect_memory_info: false +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 62914560 +result_path: benchmark_xpu_lite_fp32_l3_e2e.txt diff --git a/benchmark/cpp/config/config.xpu.lite.fp32.l3.txt b/benchmark/cpp/config/config.xpu.lite.fp32.l3.txt new file mode 100755 index 000000000..9fae9e8f0 --- /dev/null +++ b/benchmark/cpp/config/config.xpu.lite.fp32.l3.txt @@ -0,0 +1,14 @@ +device: xpu +device_id: 0 +cpu_thread_nums: 1 +warmup: 20 +repeat: 100 +backend: lite +profile_mode: runtime +include_h2d_d2h: false +use_fp16: false +collect_memory_info: false +sampling_interval: 1 +precision_compare: false +xpu_l3_cache: 62914560 +result_path: benchmark_xpu_lite_fp32_l3.txt diff --git a/benchmark/cpp/flags.h b/benchmark/cpp/flags.h index b3e773f6a..b3b77e825 100755 --- a/benchmark/cpp/flags.h +++ b/benchmark/cpp/flags.h @@ -32,6 +32,7 @@ DEFINE_string(image, "", "Path of the image file."); DEFINE_string(config_path, "config.txt", "Path of benchmark config."); DEFINE_int32(warmup, -1, "Number of warmup for profiling."); DEFINE_int32(repeat, -1, "Number of repeats for profiling."); +DEFINE_int32(xpu_l3_cache, -1, "Size xpu l3 cache for profiling."); static void PrintUsage() { std::cout << "Usage: infer_demo --model model_path --image img_path " diff --git a/benchmark/cpp/get_models.sh b/benchmark/cpp/get_models.sh index 52fd5f676..8f297b17c 100755 --- a/benchmark/cpp/get_models.sh +++ b/benchmark/cpp/get_models.sh @@ -1,43 +1,93 @@ #!/bin/bash -set -e +set +e set +x download_fd_model_zxvf() { - local model="$1" + local model="$1" # xxx_model.tgz + local len=${#model} + local model_dir=${model:0:${#model}-4} # xxx_model + if [ -d "${model_dir}" ]; then + echo "[INFO] --- $model_dir already exists!" + return + fi if [ ! -f "${model}" ]; then echo "[INFO] --- downloading $model" wget https://bj.bcebos.com/paddlehub/fastdeploy/$model && tar -zxvf $model + # remove tar crash + rm $(ls ./${model_dir}/._*) else echo "[INFO] --- $model already exists!" + if [ ! -d "${model_dir}" ]; then + tar -zxvf $model + rm $(ls ./${model_dir}/._*) + else + echo "[INFO] --- $model_dir already exists!" + fi fi } download_fd_model_xvf() { local model="$1" + local model_dir=${model:0:${#model}-4} # xxx_model + if [ -d "${model_dir}" ]; then + echo "[INFO] --- $model_dir already exists!" + return + fi if [ ! -f "${model}" ]; then echo "[INFO] --- downloading $model" wget https://bj.bcebos.com/paddlehub/fastdeploy/$model && tar -xvf $model + rm $(ls ./${model_dir}/._*) else echo "[INFO] --- $model already exists!" + if [ ! -d "${model_dir}" ]; then + tar -xvf $model + rm $(ls ./${model_dir}/._*) + else + echo "[INFO] --- $model_dir already exists!" + fi fi } download_common_model_zxvf() { local model_url="$1" local model="$2" + local model_dir=${model:0:${#model}-4} # xxx_model + if [ -d "${model_dir}" ]; then + echo "[INFO] --- $model_dir already exists!" + return + fi if [ ! -f "${model}" ]; then echo "[INFO] --- downloading $model" wget ${model_url} && tar -zxvf $model + rm $(ls ./${model_dir}/._*) else echo "[INFO] --- $model already exists!" + if [ ! -d "${model_dir}" ]; then + tar -zxvf $model + rm $(ls ./${model_dir}/._*) + else + echo "[INFO] --- $model_dir already exists!" + fi fi } download_common_model_xvf() { local model_url="$1" local model="$2" + local model_dir=${model:0:${#model}-4} # xxx_model + if [ -d "${model_dir}" ]; then + echo "[INFO] --- $model_dir already exists!" + return + fi if [ ! -f "${model}" ]; then echo "[INFO] --- downloading $model" wget ${model_url} && tar -xvf $model + rm $(ls ./${model_dir}/._*) else echo "[INFO] --- $model already exists!" + if [ ! -d "${model_dir}" ]; then + tar -xvf $model + rm $(ls ./${model_dir}/._*) + else + echo "[INFO] --- $model_dir already exists!" + fi fi } download_common_file() { @@ -60,6 +110,7 @@ download_fd_model_zxvf yolov5_s_300e_coco_no_nms.tgz download_fd_model_zxvf yolov6_s_300e_coco_no_nms.tgz download_fd_model_zxvf yolov7_l_300e_coco_no_nms.tgz download_fd_model_zxvf yolov8_s_500e_coco_no_nms.tgz + # PaddleClas download_fd_model_zxvf PPLCNet_x1_0_infer.tgz download_fd_model_zxvf PPLCNetV2_base_infer.tgz @@ -90,6 +141,7 @@ download_fd_model_zxvf DenseNet121_infer.tgz download_fd_model_zxvf PPHGNet_small_infer.tgz download_fd_model_zxvf person_exists_infer.tgz download_fd_model_zxvf EfficientNetB0_small_infer.tgz + # PaddleSeg download_fd_model_zxvf PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz download_fd_model_zxvf PP_HumanSegV1_Lite_infer.tgz @@ -100,12 +152,14 @@ download_fd_model_zxvf SegFormer_B0-cityscapes-with-argmax.tgz download_fd_model_xvf PP-Matting-512.tgz download_fd_model_xvf PPHumanMatting.tgz download_fd_model_xvf PPModnet_MobileNetV2.tgz + # PaddleOCR download_common_model_xvf https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar ch_PP-OCRv3_det_infer.tar download_common_model_xvf https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar ch_PP-OCRv3_rec_infer.tar download_common_model_xvf https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar ch_ppocr_mobile_v2.0_cls_infer.tar download_common_model_xvf https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar ch_PP-OCRv2_det_infer.tar download_common_model_xvf https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar ch_PP-OCRv2_rec_infer.tar + # download images download_common_file https://bj.bcebos.com/paddlehub/fastdeploy/rec_img.jpg rec_img.jpg download_common_file https://bj.bcebos.com/paddlehub/fastdeploy/cityscapes_demo.png cityscapes_demo.png diff --git a/benchmark/cpp/option.h b/benchmark/cpp/option.h index f62863b4b..329a630dc 100755 --- a/benchmark/cpp/option.h +++ b/benchmark/cpp/option.h @@ -85,8 +85,13 @@ static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option, return false; } } else if (config_info["device"] == "xpu") { - option->UseKunlunXin(std::stoi(config_info["device_id"]), - std::stoi(config_info["xpu_l3_cache"])); + if (FLAGS_xpu_l3_cache >= 0) { + option->UseKunlunXin(std::stoi(config_info["device_id"], + FLAGS_xpu_l3_cache); + } else { + option->UseKunlunXin(std::stoi(config_info["device_id"]), + std::stoi(config_info["xpu_l3_cache"])); + } if (config_info["backend"] == "ort") { option->UseOrtBackend(); } else if (config_info["backend"] == "paddle") { diff --git a/cmake/kunlunxin.cmake b/cmake/kunlunxin.cmake index e3fba52cb..1b335be80 100644 --- a/cmake/kunlunxin.cmake +++ b/cmake/kunlunxin.cmake @@ -10,8 +10,8 @@ endif() option(WITH_LITE_XPU_LOG "" ON) if(NOT PADDLELITE_URL) if (WITH_LITE_XPU_LOG) - set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-20221215.tgz") - # set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-20230303.tgz") + #set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-20221215.tgz") + set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-20230303.tgz") else() set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-without-log-20230303.tgz") endif() diff --git a/fastdeploy/runtime/backends/lite/configure_hardware.cc b/fastdeploy/runtime/backends/lite/configure_hardware.cc index cf8a958fe..28d22d2db 100644 --- a/fastdeploy/runtime/backends/lite/configure_hardware.cc +++ b/fastdeploy/runtime/backends/lite/configure_hardware.cc @@ -66,6 +66,7 @@ void LiteBackend::ConfigureKunlunXin(const LiteBackendOption& option) { option.kunlunxin_l3_workspace_size); config_.set_xpu_l3_cache_method(option.kunlunxin_l3_workspace_size, option.kunlunxin_locked); + config_.set_xpu_l3_cache_autotune(option.kunlunxin_autotune); config_.set_xpu_conv_autotune(option.kunlunxin_autotune, option.kunlunxin_autotune_file); config_.set_xpu_multi_encoder_method(option.kunlunxin_precision,