Merge branch 'develop' of https://github.com/PaddlePaddle/FastDeploy into new_dev

2025-10-19 06:54:41 +08:00 · 2023-03-21 02:21:35 +00:00
parent f23c61328e f278ed424b
commit 3cc72765dd
53 changed files with 115 additions and 64 deletions
--- a/benchmark/cpp/benchmark_gpu.sh
+++ b/benchmark/cpp/benchmark_gpu.sh
@@ -21,12 +21,18 @@ fi
 ./benchmark_ppseg --model Portrait_PP_HumanSegV2_Lite_256x144_with_argmax_infer --image portrait_heng.jpg --config_path $CONFIG_PATH
 ./benchmark_ppseg --model PP_HumanSegV2_Lite_192x192_with_argmax_infer --image portrait_heng.jpg --config_path $CONFIG_PATH
 ./benchmark_ppseg --model PP_HumanSegV1_Lite_infer --image portrait_heng.jpg --config_path $CONFIG_PATH
+./benchmark_ppseg --model PP_HumanSegV2_Mobile_192x192_with_argmax_infer --image portrait_heng.jpg --config_path $CONFIG_PATH
 ./benchmark_ppseg --model Deeplabv3_ResNet101_OS8_cityscapes_with_argmax_infer --image cityscapes_demo.png --config_path $CONFIG_PATH
 ./benchmark_ppseg --model PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer --image cityscapes_demo.png --config_path $CONFIG_PATH
 ./benchmark_ppseg --model SegFormer_B0-cityscapes-with-argmax --image cityscapes_demo.png --config_path $CONFIG_PATH
 ./benchmark_ppmatting --model PP-Matting-512 --image matting_input.jpg --config_path $CONFIG_PATH
 ./benchmark_ppmatting --model PPHumanMatting --image matting_input.jpg --config_path $CONFIG_PATH
 ./benchmark_ppmatting --model PPModnet_MobileNetV2 --image matting_input.jpg --config_path $CONFIG_PATH
+./benchmark_ppseg --model Unet_cityscapes_with_argmax_infer --image cityscapes_demo.png --config_path $CONFIG_PATH
+./benchmark_ppseg --model PP_HumanSegV1_Server_with_argmax_infer --image portrait_heng.jpg --config_path $CONFIG_PATH
+./benchmark_ppseg --model FCN_HRNet_W18_cityscapes_with_argmax_infer --image cityscapes_demo.png --config_path $CONFIG_PATH
+
+

 # PaddleClas
 ./benchmark_ppcls --model PPLCNet_x1_0_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
--- a/benchmark/cpp/benchmark_gpu_trt.sh
+++ b/benchmark/cpp/benchmark_gpu_trt.sh
@@ -21,27 +21,52 @@ fi
 ./benchmark_ppseg --model Portrait_PP_HumanSegV2_Lite_256x144_with_argmax_infer --image portrait_heng.jpg --config_path $CONFIG_PATH --trt_shape 1,3,144,256:1,3,144,256:1,3,144,256
 ./benchmark_ppseg --model PP_HumanSegV2_Lite_192x192_with_argmax_infer --image portrait_heng.jpg --config_path $CONFIG_PATH --trt_shape 1,3,192,192:1,3,192,192:1,3,192,192
 ./benchmark_ppseg --model PP_HumanSegV1_Lite_infer --image portrait_heng.jpg --config_path $CONFIG_PATH --trt_shape 1,3,192,192:1,3,192,192:1,3,192,192
+./benchmark_ppseg --model PP_HumanSegV2_Mobile_192x192_with_argmax_infer --image portrait_heng.jpg --config_path $CONFIG_PATH --trt_shape 1,3,192,192:1,3,192,192:1,3,192,192
 ./benchmark_ppseg --model Deeplabv3_ResNet101_OS8_cityscapes_with_argmax_infer --image cityscapes_demo.png --config_path $CONFIG_PATH --trt_shape 1,3,512,512:1,3,512,512:1,3,512,512
 ./benchmark_ppseg --model PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer --image cityscapes_demo.png --config_path $CONFIG_PATH --trt_shape 1,3,512,512:1,3,512,512:1,3,512,512
 ./benchmark_ppseg --model SegFormer_B0-cityscapes-with-argmax --image cityscapes_demo.png --config_path $CONFIG_PATH --trt_shape 1,3,512,512:1,3,512,512:1,3,512,512
 ./benchmark_ppmatting --model PP-Matting-512 --image matting_input.jpg --config_path $CONFIG_PATH --trt_shape 1,3,512,512:1,3,512,512:1,3,512,512
 ./benchmark_ppmatting --model PPHumanMatting --image matting_input.jpg --config_path $CONFIG_PATH --trt_shape 1,3,2048,2048:1,3,2048,2048:1,3,2048,2048
 ./benchmark_ppmatting --model PPModnet_MobileNetV2 --image matting_input.jpg --config_path $CONFIG_PATH --trt_shape 1,3,512,512:1,3,512,512:1,3,512,512
+./benchmark_ppseg --model Unet_cityscapes_with_argmax_infer --image cityscapes_demo.png --config_path $CONFIG_PATH --trt_shape 1,3,512,512:1,3,512,512:1,3,512,512
+./benchmark_ppseg --model PP_HumanSegV1_Server_with_argmax_infer --image portrait_heng.jpg --config_path $CONFIG_PATH --trt_shape 1,3,512,512:1,3,512,512:1,3,512,512
+./benchmark_ppseg --model FCN_HRNet_W18_cityscapes_with_argmax_infer --image cityscapes_demo.png --config_path $CONFIG_PATH --trt_shape 1,3,512,512:1,3,512,512:1,3,512,512
+

 # PaddleClas
 ./benchmark_ppcls --model PPLCNet_x1_0_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
 ./benchmark_ppcls --model PPLCNetV2_base_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
+./benchmark_ppcls --model EfficientNetB7_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
+./benchmark_ppcls --model EfficientNetB0_small_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
+./benchmark_ppcls --model GhostNet_x0_5_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
+./benchmark_ppcls --model GhostNet_x1_3_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
+./benchmark_ppcls --model GhostNet_x1_3_ssld_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
 ./benchmark_ppcls --model MobileNetV1_x0_25_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
 ./benchmark_ppcls --model MobileNetV1_ssld_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
+./benchmark_ppcls --model MobileNetV2_x0_25_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
+./benchmark_ppcls --model MobileNetV2_ssld_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
+./benchmark_ppcls --model MobileNetV3_small_x0_35_ssld_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
 ./benchmark_ppcls --model MobileNetV3_large_x1_0_ssld_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
+./benchmark_ppcls --model ShuffleNetV2_x0_25_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
 ./benchmark_ppcls --model ShuffleNetV2_x2_0_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
+./benchmark_ppcls --model SqueezeNet1_1_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
+./benchmark_ppcls --model InceptionV3_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
 ./benchmark_ppcls --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
-./benchmark_ppcls --model EfficientNetB0_small_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
 ./benchmark_ppcls --model PPHGNet_tiny_ssld_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
+./benchmark_ppcls --model PPHGNet_base_ssld_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
+./benchmark_ppcls --model ResNet50_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
+./benchmark_ppcls --model EfficientNetB0_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
+./benchmark_ppcls --model MobileNetV2_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
+./benchmark_ppcls --model MobileNetV3_small_x1_0_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
+./benchmark_ppcls --model ViT_large_patch16_224_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
+./benchmark_ppcls --model ResNeXt50_32x4d_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
+./benchmark_ppcls --model DenseNet121_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
+./benchmark_ppcls --model PPHGNet_small_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
+./benchmark_ppcls --model person_exists_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH

 # PaddleOCR
 ./benchmark_ppocr_det --model ch_PP-OCRv3_det_infer --image 12.jpg --config_path $CONFIG_PATH --trt_shape 1,3,64,64:1,3,640,640:1,3,960,960
 ./benchmark_ppocr_cls --model ch_ppocr_mobile_v2.0_cls_infer --image rec_img.jpg --config_path $CONFIG_PATH --trt_shape 1,3,48,10:4,3,48,320:8,3,48,1024
 ./benchmark_ppocr_rec --model ch_PP-OCRv3_rec_infer --image rec_img.jpg --rec_label_file ppocr_keys_v1.txt --config_path $CONFIG_PATH --trt_shape 1,3,48,10:4,3,48,320:8,3,48,2304
 ./benchmark_ppocr_det --model ch_PP-OCRv2_det_infer --image 12.jpg --config_path $CONFIG_PATH --trt_shape 1,3,64,64:1,3,640,640:1,3,960,960
-./benchmark_ppocr_rec --model ch_PP-OCRv2_rec_infer --image rec_img.jpg --rec_label_file ppocr_keys_v1.txt --config_path $CONFIG_PATH --trt_shape 1,3,48,10:4,3,48,320:8,3,48,2304
+./benchmark_ppocr_rec --model ch_PP-OCRv2_rec_infer --image rec_img.jpg --rec_label_file ppocr_keys_v1.txt --config_path $CONFIG_PATH --trt_shape 1,3,32,10:4,3,32,320:8,3,32,2304
--- a/benchmark/cpp/benchmark_picodet.cc
+++ b/benchmark/cpp/benchmark_picodet.cc
--- a/benchmark/cpp/benchmark_ppmatting.cc
+++ b/benchmark/cpp/benchmark_ppmatting.cc
--- a/benchmark/cpp/benchmark_ppocr_cls.cc
+++ b/benchmark/cpp/benchmark_ppocr_cls.cc
--- a/benchmark/cpp/benchmark_ppocr_det.cc
+++ b/benchmark/cpp/benchmark_ppocr_det.cc
--- a/benchmark/cpp/benchmark_ppocr_rec.cc
+++ b/benchmark/cpp/benchmark_ppocr_rec.cc
@@ -55,6 +55,11 @@ int main(int argc, char* argv[]) {
  }
  auto model_ppocr_rec = vision::ocr::Recognizer(
      model_file, params_file, FLAGS_rec_label_file, option, model_format);
+  std::vector<std::string> model_names;
+  fastdeploy::benchmark::Split(FLAGS_model, model_names, sep);
+  if (model_names[model_names.size() - 1] == "ch_PP-OCRv2_rec_infer") {
+    model_ppocr_rec.GetPreprocessor().SetRecImageShape({3, 32, 320});
+  }
  std::string text;
  float rec_score;
  if (config_info["precision_compare"] == "true") {
--- a/benchmark/cpp/benchmark_ppseg.cc
+++ b/benchmark/cpp/benchmark_ppseg.cc
--- a/benchmark/cpp/benchmark_ppyoloe.cc
+++ b/benchmark/cpp/benchmark_ppyoloe.cc
--- a/benchmark/cpp/benchmark_ppyolov5.cc
+++ b/benchmark/cpp/benchmark_ppyolov5.cc
--- a/benchmark/cpp/benchmark_ppyolov6.cc
+++ b/benchmark/cpp/benchmark_ppyolov6.cc
--- a/benchmark/cpp/benchmark_ppyolov7.cc
+++ b/benchmark/cpp/benchmark_ppyolov7.cc
--- a/benchmark/cpp/benchmark_ppyolov8.cc
+++ b/benchmark/cpp/benchmark_ppyolov8.cc
--- a/benchmark/cpp/benchmark_ppyolox.cc
+++ b/benchmark/cpp/benchmark_ppyolox.cc
--- a/benchmark/cpp/benchmark_x86.sh
+++ b/benchmark/cpp/benchmark_x86.sh
@@ -21,12 +21,17 @@ fi
 ./benchmark_ppseg --model Portrait_PP_HumanSegV2_Lite_256x144_with_argmax_infer --image portrait_heng.jpg --config_path $CONFIG_PATH
 ./benchmark_ppseg --model PP_HumanSegV2_Lite_192x192_with_argmax_infer --image portrait_heng.jpg --config_path $CONFIG_PATH
 ./benchmark_ppseg --model PP_HumanSegV1_Lite_infer --image portrait_heng.jpg --config_path $CONFIG_PATH
+./benchmark_ppseg --model PP_HumanSegV2_Mobile_192x192_with_argmax_infer --image portrait_heng.jpg --config_path $CONFIG_PATH
 ./benchmark_ppseg --model Deeplabv3_ResNet101_OS8_cityscapes_with_argmax_infer --image cityscapes_demo.png --config_path $CONFIG_PATH
 ./benchmark_ppseg --model PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer --image cityscapes_demo.png --config_path $CONFIG_PATH
 ./benchmark_ppseg --model SegFormer_B0-cityscapes-with-argmax --image cityscapes_demo.png --config_path $CONFIG_PATH
 ./benchmark_ppmatting --model PP-Matting-512 --image matting_input.jpg --config_path $CONFIG_PATH
 ./benchmark_ppmatting --model PPHumanMatting --image matting_input.jpg --config_path $CONFIG_PATH
 ./benchmark_ppmatting --model PPModnet_MobileNetV2 --image matting_input.jpg --config_path $CONFIG_PATH
+./benchmark_ppseg --model Unet_cityscapes_with_argmax_infer --image cityscapes_demo.png --config_path $CONFIG_PATH
+./benchmark_ppseg --model PP_HumanSegV1_Server_with_argmax_infer --image portrait_heng.jpg --config_path $CONFIG_PATH
+./benchmark_ppseg --model FCN_HRNet_W18_cityscapes_with_argmax_infer --image cityscapes_demo.png --config_path $CONFIG_PATH
+

 # PaddleClas
 ./benchmark_ppcls --model PPLCNet_x1_0_infer --image ILSVRC2012_val_00000010.jpeg --config_path $CONFIG_PATH
--- a/benchmark/cpp/config/config.gpu.ort.fp32.e2e.mem.txt
+++ b/benchmark/cpp/config/config.gpu.ort.fp32.e2e.mem.txt
@@ -1,8 +1,8 @@
 device: gpu
 device_id: 3
 cpu_thread_nums: 1
-warmup: 10
-repeat: 50
+warmup: 20
+repeat: 100
 backend: ort
 profile_mode: end2end
 include_h2d_d2h: false
--- a/benchmark/cpp/config/config.gpu.paddle.fp32.e2e.mem.txt
+++ b/benchmark/cpp/config/config.gpu.paddle.fp32.e2e.mem.txt
@@ -1,8 +1,8 @@
 device: gpu
 device_id: 3
 cpu_thread_nums: 1
-warmup: 10
-repeat: 50
+warmup: 20
+repeat: 100
 backend: paddle
 profile_mode: end2end
 include_h2d_d2h: false
--- a/benchmark/cpp/config/config.gpu.paddle_trt.fp16.e2e.mem.txt
+++ b/benchmark/cpp/config/config.gpu.paddle_trt.fp16.e2e.mem.txt
@@ -1,8 +1,8 @@
 device: gpu
 device_id: 3
 cpu_thread_nums: 1
-warmup: 10
-repeat: 50
+warmup: 20
+repeat: 100
 backend: paddle_trt
 profile_mode: end2end
 include_h2d_d2h: false
--- a/benchmark/cpp/config/config.gpu.paddle_trt.fp16.e2e.txt
+++ b/benchmark/cpp/config/config.gpu.paddle_trt.fp16.e2e.txt
@@ -1,8 +1,8 @@
 device: gpu
 device_id: 3
 cpu_thread_nums: 1
-warmup: 20
-repeat: 100
+warmup: 200
+repeat: 1000
 backend: paddle_trt
 profile_mode: end2end
 include_h2d_d2h: false
--- a/benchmark/cpp/config/config.gpu.paddle_trt.fp16.txt
+++ b/benchmark/cpp/config/config.gpu.paddle_trt.fp16.txt
@@ -1,8 +1,8 @@
 device: gpu
 device_id: 3
 cpu_thread_nums: 1
-warmup: 20
-repeat: 100
+warmup: 200
+repeat: 1000
 backend: paddle_trt
 profile_mode: runtime
 include_h2d_d2h: false
--- a/benchmark/cpp/config/config.gpu.paddle_trt.fp32.e2e.mem.txt
+++ b/benchmark/cpp/config/config.gpu.paddle_trt.fp32.e2e.mem.txt
@@ -1,8 +1,8 @@
 device: gpu
 device_id: 3
 cpu_thread_nums: 1
-warmup: 10
-repeat: 50
+warmup: 20
+repeat: 100
 backend: paddle_trt
 profile_mode: end2end
 include_h2d_d2h: false
--- a/benchmark/cpp/config/config.gpu.paddle_trt.fp32.e2e.txt
+++ b/benchmark/cpp/config/config.gpu.paddle_trt.fp32.e2e.txt
@@ -1,8 +1,8 @@
 device: gpu
 device_id: 3
 cpu_thread_nums: 1
-warmup: 20
-repeat: 100
+warmup: 200
+repeat: 1000
 backend: paddle_trt
 profile_mode: end2end
 include_h2d_d2h: false
--- a/benchmark/cpp/config/config.gpu.paddle_trt.fp32.txt
+++ b/benchmark/cpp/config/config.gpu.paddle_trt.fp32.txt
@@ -1,8 +1,8 @@
 device: gpu
 device_id: 3
 cpu_thread_nums: 1
-warmup: 20
-repeat: 100
+warmup: 200
+repeat: 1000
 backend: paddle_trt
 profile_mode: runtime
 include_h2d_d2h: false
--- a/benchmark/cpp/config/config.gpu.trt.fp16.e2e.mem.txt
+++ b/benchmark/cpp/config/config.gpu.trt.fp16.e2e.mem.txt
@@ -1,8 +1,8 @@
 device: gpu
 device_id: 3
 cpu_thread_nums: 1
-warmup: 10
-repeat: 50
+warmup: 20
+repeat: 100
 backend: trt
 profile_mode: end2end
 include_h2d_d2h: false
--- a/benchmark/cpp/config/config.gpu.trt.fp16.e2e.txt
+++ b/benchmark/cpp/config/config.gpu.trt.fp16.e2e.txt
@@ -1,8 +1,8 @@
 device: gpu
 device_id: 3
 cpu_thread_nums: 1
-warmup: 20
-repeat: 100
+warmup: 200
+repeat: 1000
 backend: trt
 profile_mode: end2end
 include_h2d_d2h: false
--- a/benchmark/cpp/config/config.gpu.trt.fp16.txt
+++ b/benchmark/cpp/config/config.gpu.trt.fp16.txt
@@ -1,8 +1,8 @@
 device: gpu
 device_id: 3
 cpu_thread_nums: 1
-warmup: 20
-repeat: 100
+warmup: 200
+repeat: 1000
 backend: trt
 profile_mode: runtime
 include_h2d_d2h: false
--- a/benchmark/cpp/config/config.gpu.trt.fp32.e2e.mem.txt
+++ b/benchmark/cpp/config/config.gpu.trt.fp32.e2e.mem.txt
@@ -1,8 +1,8 @@
 device: gpu
 device_id: 3
 cpu_thread_nums: 1
-warmup: 10
-repeat: 50
+warmup: 20
+repeat: 100
 backend: trt
 profile_mode: end2end
 include_h2d_d2h: false
--- a/benchmark/cpp/config/config.gpu.trt.fp32.e2e.txt
+++ b/benchmark/cpp/config/config.gpu.trt.fp32.e2e.txt
@@ -1,8 +1,8 @@
 device: gpu
 device_id: 3
 cpu_thread_nums: 1
-warmup: 20
-repeat: 100
+warmup: 200
+repeat: 1000
 backend: trt
 profile_mode: end2end
 include_h2d_d2h: false
--- a/benchmark/cpp/config/config.gpu.trt.fp32.txt
+++ b/benchmark/cpp/config/config.gpu.trt.fp32.txt
@@ -1,8 +1,8 @@
 device: gpu
 device_id: 3
 cpu_thread_nums: 1
-warmup: 20
-repeat: 100
+warmup: 200
+repeat: 1000
 backend: trt
 profile_mode: runtime
 include_h2d_d2h: false
--- a/benchmark/cpp/get_models.sh
+++ b/benchmark/cpp/get_models.sh
@@ -152,6 +152,11 @@ download_fd_model_zxvf SegFormer_B0-cityscapes-with-argmax.tgz
 download_fd_model_xvf PP-Matting-512.tgz
 download_fd_model_xvf PPHumanMatting.tgz
 download_fd_model_xvf PPModnet_MobileNetV2.tgz
+download_fd_model_xvf Unet_cityscapes_with_argmax_infer.tgz
+download_fd_model_xvf PP_HumanSegV1_Server_with_argmax_infer.tgz
+download_fd_model_xvf FCN_HRNet_W18_cityscapes_with_argmax_infer.tgz
+download_fd_model_xvf PP_HumanSegV2_Mobile_192x192_with_argmax_infer.tgz
+

 # PaddleOCR
 download_common_model_xvf https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar ch_PP-OCRv3_det_infer.tar
--- a/benchmark/cpp/option.h
+++ b/benchmark/cpp/option.h
@@ -43,6 +43,8 @@ static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option,
      option->UsePaddleInferBackend();
    } else if (config_info["backend"] == "trt" ||
               config_info["backend"] == "paddle_trt") {
+      option->trt_option.serialize_file = FLAGS_model +
+                                          sep + "trt_serialized.trt";
      option->UseTrtBackend();
      if (config_info["backend"] == "paddle_trt") {
        option->UsePaddleInferBackend();
--- a/benchmark/python/README.md
+++ b/benchmark/python/README.md
--- a/benchmark/python/benchmark_ernie_seq_cls.py
+++ b/benchmark/python/benchmark_ernie_seq_cls.py
--- a/benchmark/python/convert_info.py
+++ b/benchmark/python/convert_info.py
--- a/benchmark/python/requirements.txt
+++ b/benchmark/python/requirements.txt
--- a/benchmark/python/run_benchmark_ernie_seq_cls.sh
+++ b/benchmark/python/run_benchmark_ernie_seq_cls.sh
--- a/benchmark/python/run_benchmark_ppcls.sh
+++ b/benchmark/python/run_benchmark_ppcls.sh
--- a/benchmark/python/run_benchmark_ppdet.sh
+++ b/benchmark/python/run_benchmark_ppdet.sh
--- a/benchmark/python/run_benchmark_ppocr.sh
+++ b/benchmark/python/run_benchmark_ppocr.sh
--- a/benchmark/python/run_benchmark_ppseg.sh
+++ b/benchmark/python/run_benchmark_ppseg.sh
--- a/benchmark/python/run_benchmark_uie.sh
+++ b/benchmark/python/run_benchmark_uie.sh
--- a/examples/vision/segmentation/paddleseg/README.md
+++ b/examples/vision/segmentation/paddleseg/README.md
@@ -30,7 +30,7 @@
 |ARM CPU|✅|[链接](semantic_segmentation/cpu-gpu)|✅|✅|
 |Intel GPU(集成显卡)|✅|[链接](semantic_segmentation/cpu-gpu)|✅|✅|  
 |Intel GPU(独立显卡)|✅|[链接](semantic_segmentation/cpu-gpu)|✅|✅|  
-|昆仑|✅|[链接](semantic_segmentation/kunlun)|✅|✅|
+|昆仑|✅|[链接](semantic_segmentation/kunlunxin)|✅|✅|
 |昇腾|✅|[链接](semantic_segmentation/ascend)|✅|✅|
 |瑞芯微|✅|[链接](semantic_segmentation/rockchip)|✅|✅|  
 |晶晨|✅|[链接](semantic_segmentation/amlogic)|--|✅|✅|  
@@ -58,9 +58,9 @@
  - [Python部署示例](semantic_segmentation/cpu-gpu/python/)
  - [C++部署示例](semantic_segmentation/cpu-gpu/cpp/)
 - 昆仑 XPU
-  - [部署模型准备](semantic_segmentation/kunlun)  
-  - [Python部署示例](semantic_segmentation/kunlun/python/)
-  - [C++部署示例](semantic_segmentation/kunlun/cpp/)
+  - [部署模型准备](semantic_segmentation/kunlunxin)  
+  - [Python部署示例](semantic_segmentation/kunlunxin/python/)
+  - [C++部署示例](semantic_segmentation/kunlunxin/cpp/)
 - 昇腾 Ascend
  - [部署模型准备](semantic_segmentation/ascend)  
  - [Python部署示例](semantic_segmentation/ascend/python/)
@@ -97,7 +97,7 @@
 |ARM CPU|✅|[链接](matting/cpu-gpu)|✅|✅|  
 |Intel GPU(集成显卡)|✅|[链接](matting/cpu-gpu)|✅|✅|  
 |Intel GPU(独立显卡)|✅|[链接](matting/cpu-gpu)|✅|✅|  
-|昆仑|✅|[链接](matting/kunlun)|✅|✅|  
+|昆仑|✅|[链接](matting/kunlunxin)|✅|✅|  
 |昇腾|✅|[链接](matting/ascend)|✅|✅|  

 ### 3.2 详细使用文档
@@ -122,9 +122,9 @@
  - [Python部署示例](matting/cpu-gpu/python/)
  - [C++部署示例](cpu-gpu/cpp/)
 - 昆仑 XPU
-  - [部署模型准备](matting/kunlun)  
-  - [Python部署示例](matting/kunlun/README.md)
-  - [C++部署示例](matting/kunlun/README.md)
+  - [部署模型准备](matting/kunlunxin)  
+  - [Python部署示例](matting/kunlunxin/README.md)
+  - [C++部署示例](matting/kunlunxin/README.md)
 - 昇腾 Ascend
  - [部署模型准备](matting/ascend)  
  - [Python部署示例](matting/ascend/README.md)
--- a/examples/vision/segmentation/paddleseg/matting/README.md
+++ b/examples/vision/segmentation/paddleseg/matting/README.md
@@ -13,7 +13,7 @@
 |ARM CPU|✅|[链接](cpu-gpu)|✅|✅|  
 |Intel GPU(集成显卡)|✅|[链接](cpu-gpu)|✅|✅|  
 |Intel GPU(独立显卡)|✅|[链接](cpu-gpu)|✅|✅|  
-|昆仑|✅|[链接](kunlun)|✅|✅|  
+|昆仑|✅|[链接](kunlunxin)|✅|✅|  
 |昇腾|✅|[链接](ascend)|✅|✅|  

 ## 3. 详细使用文档
@@ -38,9 +38,9 @@
  - [Python部署示例](cpu-gpu/python/)
  - [C++部署示例](cpu-gpu/cpp/)
 - 昆仑 XPU
-  - [部署模型准备](kunlun)  
-  - [Python部署示例](kunlun/README.md)
-  - [C++部署示例](kunlun/README.md)
+  - [部署模型准备](kunlunxin)  
+  - [Python部署示例](kunlunxin/README.md)
+  - [C++部署示例](kunlunxin/README.md)
 - 昇腾 Ascend
  - [部署模型准备](ascend)  
  - [Python部署示例](ascend/README.md)
--- a/examples/vision/segmentation/paddleseg/matting/kunlunxin/README.md
+++ b/examples/vision/segmentation/paddleseg/matting/kunlunxin/README.md
--- a/examples/vision/segmentation/paddleseg/semantic_segmentation/README.md
+++ b/examples/vision/segmentation/paddleseg/semantic_segmentation/README.md
@@ -13,7 +13,7 @@
 |ARM CPU|✅|[链接](cpu-gpu)|✅|✅|
 |Intel GPU(集成显卡)|✅|[链接](cpu-gpu)|✅|✅|  
 |Intel GPU(独立显卡)|✅|[链接](cpu-gpu)|✅|✅|  
-|昆仑|✅|[链接](kunlun)|✅|✅|
+|昆仑|✅|[链接](kunlunxin)|✅|✅|
 |昇腾|✅|[链接](ascend)|✅|✅|
 |瑞芯微|✅|[链接](rockchip)|✅|✅|  
 |晶晨|✅|[链接](amlogic)|--|✅|  
@@ -41,9 +41,9 @@
  - [Python部署示例](cpu-gpu/python/)
  - [C++部署示例](cpu-gpu/cpp/)
 - 昆仑 XPU
-  - [部署模型准备](kunlun)  
-  - [Python部署示例](kunlun/python/)
-  - [C++部署示例](kunlun/cpp/)
+  - [部署模型准备](kunlunxin)  
+  - [Python部署示例](kunlunxin/python/)
+  - [C++部署示例](kunlunxin/cpp/)
 - 昇腾 Ascend
  - [部署模型准备](ascend)  
  - [Python部署示例](ascend/python/)
--- a/examples/vision/segmentation/paddleseg/semantic_segmentation/kunlunxin/README.md
+++ b/examples/vision/segmentation/paddleseg/semantic_segmentation/kunlunxin/README.md
--- a/examples/vision/segmentation/paddleseg/semantic_segmentation/kunlunxin/cpp/CMakeLists.txt
+++ b/examples/vision/segmentation/paddleseg/semantic_segmentation/kunlunxin/cpp/CMakeLists.txt
--- a/examples/vision/segmentation/paddleseg/semantic_segmentation/kunlunxin/cpp/README.md
+++ b/examples/vision/segmentation/paddleseg/semantic_segmentation/kunlunxin/cpp/README.md
@@ -14,12 +14,12 @@
 ```bash
 # 下载部署示例代码
 git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd  FastDeploy/examples/vision/segmentation/semantic_segmentation/kunlun/cpp
+cd  FastDeploy/examples/vision/segmentation/semantic_segmentation/kunlunxin/cpp
 # 如果您希望从PaddleSeg下载示例代码，请运行
 # git clone https://github.com/PaddlePaddle/PaddleSeg.git
 # # 注意：如果当前分支找不到下面的fastdeploy测试代码，请切换到develop分支
 # # git checkout develop
-# cd PaddleSeg/deploy/fastdeploy/semantic_segmentation/kunlun/cpp
+# cd PaddleSeg/deploy/fastdeploy/semantic_segmentation/kunlunxin/cpp

 mkdir build
 cd build
--- a/examples/vision/segmentation/paddleseg/semantic_segmentation/kunlunxin/cpp/infer.cc
+++ b/examples/vision/segmentation/paddleseg/semantic_segmentation/kunlunxin/cpp/infer.cc
--- a/examples/vision/segmentation/paddleseg/semantic_segmentation/kunlunxin/python/README.md
+++ b/examples/vision/segmentation/paddleseg/semantic_segmentation/kunlunxin/python/README.md
@@ -14,12 +14,12 @@
 ```bash
 # 下载部署示例代码
 git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd  FastDeploy/examples/vision/segmentation/semantic_segmentation/kunlun/python
+cd  FastDeploy/examples/vision/segmentation/semantic_segmentation/kunlunxin/python
 # 如果您希望从PaddleSeg下载示例代码，请运行
 # git clone https://github.com/PaddlePaddle/PaddleSeg.git
 # # 注意：如果当前分支找不到下面的fastdeploy测试代码，请切换到develop分支
 # # git checkout develop
-# cd PaddleSeg/deploy/fastdeploy/semantic_segmentation/kunlun/python
+# cd PaddleSeg/deploy/fastdeploy/semantic_segmentation/kunlunxin/python

 # 下载PP-LiteSeg模型文件和测试图片
 wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
--- a/examples/vision/segmentation/paddleseg/semantic_segmentation/kunlunxin/python/infer.py
+++ b/examples/vision/segmentation/paddleseg/semantic_segmentation/kunlunxin/python/infer.py
--- a/fastdeploy/runtime/backends/paddle/paddle_backend.cc
+++ b/fastdeploy/runtime/backends/paddle/paddle_backend.cc
@@ -113,7 +113,8 @@ bool PaddleBackend::Init(const RuntimeOption& runtime_option) {
  option.paddle_infer_option.external_stream_ = runtime_option.external_stream_;
  option.paddle_infer_option.trt_option = runtime_option.trt_option;
  option.paddle_infer_option.trt_option.gpu_id = runtime_option.device_id;
-  return InitFromPaddle(option.model_file, option.params_file, option.model_from_memory_, option.paddle_infer_option);
+  return InitFromPaddle(option.model_file, option.params_file,
+                        option.model_from_memory_, option.paddle_infer_option);
 }

 bool PaddleBackend::InitFromPaddle(const std::string& model,
@@ -126,8 +127,8 @@ bool PaddleBackend::InitFromPaddle(const std::string& model,
    return false;
  }
  if (model_from_memory) {
-    config_.SetModelBuffer(model.c_str(), model.size(),
-                           params.c_str(), params.size());
+    config_.SetModelBuffer(model.c_str(), model.size(), params.c_str(),
+                           params.size());
  } else {
    config_.SetModel(model, params);
  }
@@ -140,7 +141,8 @@ bool PaddleBackend::InitFromPaddle(const std::string& model,
  // PaddleReader instead now
  std::string model_content = model;
  if (!model_from_memory) {
-    FDASSERT(ReadBinaryFromFile(model, &model_content), "Failed to read file %s.", model.c_str());
+    FDASSERT(ReadBinaryFromFile(model, &model_content),
+             "Failed to read file %s.", model.c_str());
  }
  auto reader =
      paddle2onnx::PaddleReader(model_content.c_str(), model_content.size());
@@ -210,8 +212,7 @@ bool PaddleBackend::InitFromPaddle(const std::string& model,
      paddle_infer::Config analysis_config;
      if (model_from_memory) {
        analysis_config.SetModelBuffer(model.c_str(), model.size(),
-                                       params.c_str(),
-                                       params.size());
+                                       params.c_str(), params.size());
      } else {
        analysis_config.SetModel(model, params);
      }
@@ -283,7 +284,6 @@ bool PaddleBackend::Infer(std::vector<FDTensor>& inputs,
    auto handle = predictor_->GetInputHandle(inputs[i].name);
    ShareTensorFromFDTensor(handle.get(), inputs[i]);
  }
-  std::unordered_set<std::string> prebinded_output_name;
  // prebinded output only support for GPU
  if (!copy_to_fd) {
    for (size_t i = 0; i < (*outputs).size(); ++i) {
@@ -297,7 +297,6 @@ bool PaddleBackend::Infer(std::vector<FDTensor>& inputs,
      // Record the prebinded output_name.
      // Those outputs do not need PaddleTensorToFDTensor
      // after predictor_.Run()
-      prebinded_output_name.insert(output_name);
      auto handle = predictor_->GetOutputHandle(output_name);
      ShareOutTensorFromFDTensor(handle.get(), (*outputs)[i]);
    }
@@ -309,11 +308,6 @@ bool PaddleBackend::Infer(std::vector<FDTensor>& inputs,

  outputs->resize(outputs_desc_.size());
  for (size_t i = 0; i < outputs_desc_.size(); ++i) {
-    // skip prebinded output
-    if (copy_to_fd == false &&
-        prebinded_output_name.count(outputs_desc_[i].name)) {
-      continue;
-    }
    auto handle = predictor_->GetOutputHandle(outputs_desc_[i].name);
    if (copy_to_fd) {
      (*outputs)[i].is_pinned_memory = option_.enable_pinned_memory;
@@ -334,7 +328,10 @@ std::unique_ptr<BaseBackend> PaddleBackend::Clone(RuntimeOption& runtime_option,
    auto clone_option = option_;
    clone_option.device_id = device_id;
    clone_option.external_stream_ = stream;
-    FDASSERT(casted_backend->InitFromPaddle(runtime_option.model_file, runtime_option.params_file, runtime_option.model_from_memory_, clone_option), "Clone model from Paddle failed while initialize PaddleBackend.");
+    FDASSERT(casted_backend->InitFromPaddle(
+                 runtime_option.model_file, runtime_option.params_file,
+                 runtime_option.model_from_memory_, clone_option),
+             "Clone model from Paddle failed while initialize PaddleBackend.");
    FDWARNING << "The target device id:" << device_id
              << " is different from current device id:" << option_.device_id
              << ", cannot share memory with current engine." << std::endl;
--- a/serving/src/fastdeploy_runtime.cc
+++ b/serving/src/fastdeploy_runtime.cc
@@ -345,6 +345,12 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
                    ParseBoolValue(value_string, &enable_fixed_size_opt));
                runtime_options_->paddle_infer_option.enable_fixed_size_opt =
                    enable_fixed_size_opt;
+              } else if (param_key == "collect_trt_shape") {
+                bool collect_trt_shape = false;
+                THROW_IF_BACKEND_MODEL_ERROR(
+                    ParseBoolValue(value_string, &collect_trt_shape));
+                runtime_options_->paddle_infer_option.collect_trt_shape =
+                    collect_trt_shape;
              }
            }
          }