[XPU] Update Lite XPU lib and fixed some bugs (#1647)

2025-10-05 08:37:06 +08:00 · 2023-03-17 21:39:39 +08:00
parent e21127b7e8
commit 6af3691568
8 changed files with 100 additions and 11 deletions
--- a/benchmark/cpp/benchmark_xpu.sh
+++ b/benchmark/cpp/benchmark_xpu.sh
@@ -18,12 +18,12 @@ fi
 ./benchmark_picodet --model picodet_l_640_coco_lcnet_no_nms --image 000000014439.jpg --config_path $CONFIG_PATH --no_nms

 # PaddleSeg
-./benchmark_ppseg --model Portrait_PP_HumanSegV2_Lite_256x144_with_argmax_infer --image portrait_heng.jpg --config_path $CONFIG_PATH
-./benchmark_ppseg --model PP_HumanSegV2_Lite_192x192_with_argmax_infer --image portrait_heng.jpg --config_path $CONFIG_PATH
+./benchmark_ppseg --model Portrait_PP_HumanSegV2_Lite_256x144_with_argmax_infer --image portrait_heng.jpg --config_path $CONFIG_PATH --xpu_l3_cache 0
+./benchmark_ppseg --model PP_HumanSegV2_Lite_192x192_with_argmax_infer --image portrait_heng.jpg --config_path $CONFIG_PATH  --xpu_l3_cache 0
 ./benchmark_ppseg --model PP_HumanSegV1_Lite_infer --image portrait_heng.jpg --config_path $CONFIG_PATH
-./benchmark_ppseg --model Deeplabv3_ResNet101_OS8_cityscapes_with_argmax_infer --image cityscapes_demo.png --config_path $CONFIG_PATH
-./benchmark_ppseg --model PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer --image cityscapes_demo.png --config_path $CONFIG_PATH
-./benchmark_ppseg --model SegFormer_B0-cityscapes-with-argmax --image cityscapes_demo.png --config_path $CONFIG_PATH
+./benchmark_ppseg --model Deeplabv3_ResNet101_OS8_cityscapes_with_argmax_infer --image cityscapes_demo.png --config_path $CONFIG_PATH --xpu_l3_cache 0
+./benchmark_ppseg --model PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer --image cityscapes_demo.png --config_path $CONFIG_PATH --xpu_l3_cache 0
+./benchmark_ppseg --model SegFormer_B0-cityscapes-with-argmax --image cityscapes_demo.png --config_path $CONFIG_PATH --xpu_l3_cache 0
 ./benchmark_ppmatting --model PP-Matting-512 --image matting_input.jpg --config_path $CONFIG_PATH
 ./benchmark_ppmatting --model PPHumanMatting --image matting_input.jpg --config_path $CONFIG_PATH
 ./benchmark_ppmatting --model PPModnet_MobileNetV2 --image matting_input.jpg --config_path $CONFIG_PATH
--- a/benchmark/cpp/config/config.xpu.lite.fp32.l3.e2e.txt
+++ b/benchmark/cpp/config/config.xpu.lite.fp32.l3.e2e.txt
@@ -0,0 +1,14 @@
+device: xpu
+device_id: 0
+cpu_thread_nums: 1
+warmup: 20
+repeat: 100
+backend: lite
+profile_mode: end2end
+include_h2d_d2h: false
+use_fp16: false
+collect_memory_info: false
+sampling_interval: 1
+precision_compare: false
+xpu_l3_cache: 62914560
+result_path: benchmark_xpu_lite_fp32_l3_e2e.txt
--- a/benchmark/cpp/config/config.xpu.lite.fp32.l3.txt
+++ b/benchmark/cpp/config/config.xpu.lite.fp32.l3.txt
@@ -0,0 +1,14 @@
+device: xpu
+device_id: 0
+cpu_thread_nums: 1
+warmup: 20
+repeat: 100
+backend: lite
+profile_mode: runtime
+include_h2d_d2h: false
+use_fp16: false
+collect_memory_info: false
+sampling_interval: 1
+precision_compare: false
+xpu_l3_cache: 62914560
+result_path: benchmark_xpu_lite_fp32_l3.txt
--- a/benchmark/cpp/flags.h
+++ b/benchmark/cpp/flags.h
@@ -32,6 +32,7 @@ DEFINE_string(image, "", "Path of the image file.");
 DEFINE_string(config_path, "config.txt", "Path of benchmark config.");
 DEFINE_int32(warmup, -1, "Number of warmup for profiling.");
 DEFINE_int32(repeat, -1, "Number of repeats for profiling.");
+DEFINE_int32(xpu_l3_cache, -1, "Size xpu l3 cache for profiling.");

 static void PrintUsage() {
  std::cout << "Usage: infer_demo --model model_path --image img_path "
--- a/benchmark/cpp/get_models.sh
+++ b/benchmark/cpp/get_models.sh
@@ -1,43 +1,93 @@
 #!/bin/bash
-set -e
+set +e
 set +x

 download_fd_model_zxvf() {
-  local model="$1"
+  local model="$1"  # xxx_model.tgz
+  local len=${#model}
+  local model_dir=${model:0:${#model}-4}  # xxx_model
+  if [ -d "${model_dir}" ]; then
+     echo "[INFO] --- $model_dir already exists!"
+     return
+  fi
  if [ ! -f "${model}" ]; then
     echo "[INFO] --- downloading $model"
     wget https://bj.bcebos.com/paddlehub/fastdeploy/$model && tar -zxvf $model
+     # remove tar crash
+     rm $(ls ./${model_dir}/._*)
  else
     echo "[INFO] --- $model already exists!"
+     if [ ! -d "${model_dir}" ]; then
+        tar -zxvf $model
+        rm $(ls ./${model_dir}/._*)
+     else
+        echo "[INFO] --- $model_dir already exists!"
+     fi
  fi
 }
 download_fd_model_xvf() {
  local model="$1"
+  local model_dir=${model:0:${#model}-4}  # xxx_model
+  if [ -d "${model_dir}" ]; then
+     echo "[INFO] --- $model_dir already exists!"
+     return
+  fi
  if [ ! -f "${model}" ]; then
     echo "[INFO] --- downloading $model"
     wget https://bj.bcebos.com/paddlehub/fastdeploy/$model && tar -xvf $model
+     rm $(ls ./${model_dir}/._*)
  else
     echo "[INFO] --- $model already exists!"
+     if [ ! -d "${model_dir}" ]; then
+        tar -xvf $model
+        rm $(ls ./${model_dir}/._*)
+     else
+        echo "[INFO] --- $model_dir already exists!"
+     fi
  fi
 }
 download_common_model_zxvf() {
  local model_url="$1"
  local model="$2"
+  local model_dir=${model:0:${#model}-4}  # xxx_model
+  if [ -d "${model_dir}" ]; then
+     echo "[INFO] --- $model_dir already exists!"
+     return
+  fi
  if [ ! -f "${model}" ]; then
     echo "[INFO] --- downloading $model"
     wget ${model_url} && tar -zxvf $model
+     rm $(ls ./${model_dir}/._*)
  else
     echo "[INFO] --- $model already exists!"
+     if [ ! -d "${model_dir}" ]; then
+        tar -zxvf $model
+        rm $(ls ./${model_dir}/._*)
+     else
+        echo "[INFO] --- $model_dir already exists!"
+     fi
  fi
 }
 download_common_model_xvf() {
  local model_url="$1"
  local model="$2"
+  local model_dir=${model:0:${#model}-4}  # xxx_model
+  if [ -d "${model_dir}" ]; then
+     echo "[INFO] --- $model_dir already exists!"
+     return
+  fi
  if [ ! -f "${model}" ]; then
     echo "[INFO] --- downloading $model"
     wget ${model_url} && tar -xvf $model
+     rm $(ls ./${model_dir}/._*)
  else
     echo "[INFO] --- $model already exists!"
+     if [ ! -d "${model_dir}" ]; then
+        tar -xvf $model
+        rm $(ls ./${model_dir}/._*)
+     else
+        echo "[INFO] --- $model_dir already exists!"
+     fi
  fi
 }
 download_common_file() {
@@ -60,6 +110,7 @@ download_fd_model_zxvf yolov5_s_300e_coco_no_nms.tgz
 download_fd_model_zxvf yolov6_s_300e_coco_no_nms.tgz
 download_fd_model_zxvf yolov7_l_300e_coco_no_nms.tgz
 download_fd_model_zxvf yolov8_s_500e_coco_no_nms.tgz
+
 # PaddleClas
 download_fd_model_zxvf PPLCNet_x1_0_infer.tgz
 download_fd_model_zxvf PPLCNetV2_base_infer.tgz
@@ -90,6 +141,7 @@ download_fd_model_zxvf DenseNet121_infer.tgz
 download_fd_model_zxvf PPHGNet_small_infer.tgz
 download_fd_model_zxvf person_exists_infer.tgz
 download_fd_model_zxvf EfficientNetB0_small_infer.tgz
+
 # PaddleSeg
 download_fd_model_zxvf PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
 download_fd_model_zxvf PP_HumanSegV1_Lite_infer.tgz
@@ -100,12 +152,14 @@ download_fd_model_zxvf SegFormer_B0-cityscapes-with-argmax.tgz
 download_fd_model_xvf PP-Matting-512.tgz
 download_fd_model_xvf PPHumanMatting.tgz
 download_fd_model_xvf PPModnet_MobileNetV2.tgz
+
 # PaddleOCR
 download_common_model_xvf https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar ch_PP-OCRv3_det_infer.tar
 download_common_model_xvf https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar ch_PP-OCRv3_rec_infer.tar
 download_common_model_xvf https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar ch_ppocr_mobile_v2.0_cls_infer.tar
 download_common_model_xvf https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar ch_PP-OCRv2_det_infer.tar
 download_common_model_xvf https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar ch_PP-OCRv2_rec_infer.tar
+
 # download images
 download_common_file https://bj.bcebos.com/paddlehub/fastdeploy/rec_img.jpg rec_img.jpg
 download_common_file https://bj.bcebos.com/paddlehub/fastdeploy/cityscapes_demo.png cityscapes_demo.png
--- a/benchmark/cpp/option.h
+++ b/benchmark/cpp/option.h
@@ -85,8 +85,13 @@ static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option,
      return false;
    }
  } else if (config_info["device"] == "xpu") {
+    if (FLAGS_xpu_l3_cache >= 0) {
+       option->UseKunlunXin(std::stoi(config_info["device_id"],
+                                      FLAGS_xpu_l3_cache);
+    } else {
      option->UseKunlunXin(std::stoi(config_info["device_id"]),
                           std::stoi(config_info["xpu_l3_cache"]));
+    }
    if (config_info["backend"] == "ort") {
      option->UseOrtBackend();
    } else if (config_info["backend"] == "paddle") {
--- a/cmake/kunlunxin.cmake
+++ b/cmake/kunlunxin.cmake
@@ -10,8 +10,8 @@ endif()
 option(WITH_LITE_XPU_LOG "" ON)
 if(NOT PADDLELITE_URL)
  if (WITH_LITE_XPU_LOG)
-    set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-20221215.tgz")
-    # set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-20230303.tgz")
+    #set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-20221215.tgz")
+    set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-20230303.tgz")
  else()
    set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-without-log-20230303.tgz")
  endif()
--- a/fastdeploy/runtime/backends/lite/configure_hardware.cc
+++ b/fastdeploy/runtime/backends/lite/configure_hardware.cc
@@ -66,6 +66,7 @@ void LiteBackend::ConfigureKunlunXin(const LiteBackendOption& option) {
      option.kunlunxin_l3_workspace_size);
  config_.set_xpu_l3_cache_method(option.kunlunxin_l3_workspace_size,
                                  option.kunlunxin_locked);
+  config_.set_xpu_l3_cache_autotune(option.kunlunxin_autotune);
  config_.set_xpu_conv_autotune(option.kunlunxin_autotune,
                                option.kunlunxin_autotune_file);
  config_.set_xpu_multi_encoder_method(option.kunlunxin_precision,