Merge branch 'develop' into add_batch_size_for_uie

2025-10-05 08:37:06 +08:00 · 2022-12-28 10:51:40 +08:00
parent e36ee2e1d5 866d044898
commit a906ddd8ea
253 changed files with 6554 additions and 2573 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -66,7 +66,7 @@ option(ENABLE_TEXT "Whether to enable text models usage." OFF)
 option(ENABLE_FLYCV "Whether to enable flycv to boost image preprocess." OFF)
 option(WITH_ASCEND "Whether to compile for Huawei Ascend deploy." OFF)
 option(WITH_TIMVX "Whether to compile for TIMVX deploy." OFF)
-option(WITH_XPU "Whether to compile for KunlunXin XPU deploy." OFF)
+option(WITH_KUNLUNXIN "Whether to compile for KunlunXin XPU deploy." OFF)
 option(WITH_TESTING "Whether to compile with unittest." OFF)
 ############################# Options for Android cross compiling #########################
 option(WITH_OPENCV_STATIC "Use OpenCV static lib for Android." OFF)
@@ -148,12 +148,12 @@ if (WITH_ASCEND)
  include(${PROJECT_SOURCE_DIR}/cmake/ascend.cmake)
 endif()

-if (WITH_XPU)
+if (WITH_KUNLUNXIN)
  if(NOT ENABLE_LITE_BACKEND)
      set(ENABLE_LITE_BACKEND ON)
  endif()
  if(NOT CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64")
-    message(FATAL_ERROR "XPU is only supported on Linux x64 platform")
+    message(FATAL_ERROR "KunlunXin XPU is only supported on Linux x64 platform")
  endif()
  if(NOT PADDLELITE_URL)
    set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-20221215.tgz")
--- a/FastDeploy.cmake.in
+++ b/FastDeploy.cmake.in
@@ -27,7 +27,7 @@ set(OPENCV_DIRECTORY "@OPENCV_DIRECTORY@")
 set(ORT_DIRECTORY "@ORT_DIRECTORY@")
 set(OPENVINO_DIRECTORY "@OPENVINO_DIRECTORY@")
 set(RKNN2_TARGET_SOC "@RKNN2_TARGET_SOC@")
-set(WITH_XPU @WITH_XPU@)
+set(WITH_KUNLUNXIN @WITH_KUNLUNXIN@)

 set(FASTDEPLOY_LIBS "")
 set(FASTDEPLOY_INCS "")
@@ -246,7 +246,7 @@ if(ENABLE_PADDLE_FRONTEND)
  list(APPEND FASTDEPLOY_LIBS ${PADDLE2ONNX_LIB})
 endif()

-if(WITH_XPU)
+if(WITH_KUNLUNXIN)
  list(APPEND FASTDEPLOY_LIBS -lpthread -lrt -ldl)
 endif()

--- a/cmake/flycv.cmake
+++ b/cmake/flycv.cmake
@@ -84,6 +84,8 @@ else()
  else()
    if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
      set(FLYCV_FILE "flycv-linux-aarch64-${FLYCV_VERSION}.tgz")
+    elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
+      set(FLYCV_FILE "flycv-linux-armhf-${FLYCV_VERSION}.tgz")
    else()
      set(FLYCV_FILE "flycv-linux-x64-${FLYCV_VERSION}.tgz")
    endif()
--- a/cmake/rknpu2.cmake
+++ b/cmake/rknpu2.cmake
@@ -10,12 +10,6 @@ download_and_decompress(${RKNPU2_URL} ${CMAKE_CURRENT_BINARY_DIR}/${RKNPU2_FILE}
 # set path
 set(RKNPU_RUNTIME_PATH ${THIRD_PARTY_PATH}/install/rknpu2_runtime)

-if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-else ()
-    message(FATAL_ERROR "[rknpu2.cmake] Only support build rknpu2 in Linux")
-endif ()
-
-
 if (EXISTS ${RKNPU_RUNTIME_PATH})
    set(RKNN_RT_LIB ${RKNPU_RUNTIME_PATH}/${RKNN2_TARGET_SOC}/lib/librknnrt.so)
    include_directories(${RKNPU_RUNTIME_PATH}/${RKNN2_TARGET_SOC}/include)
--- a/cmake/summary.cmake
+++ b/cmake/summary.cmake
@@ -39,7 +39,7 @@ function(fastdeploy_summary)
  message(STATUS "  ENABLE_OPENVINO_BACKEND   : ${ENABLE_OPENVINO_BACKEND}")
  message(STATUS "  WITH_ASCEND               : ${WITH_ASCEND}")
  message(STATUS "  WITH_TIMVX                : ${WITH_TIMVX}")
-  message(STATUS "  WITH_XPU                  : ${WITH_XPU}")
+  message(STATUS "  WITH_KUNLUNXIN            : ${WITH_KUNLUNXIN}")
  if(ENABLE_ORT_BACKEND)
    message(STATUS "  ONNXRuntime version       : ${ONNXRUNTIME_VERSION}")
  endif()
--- a/docs/README.md
+++ b/docs/README.md
@@ -8,7 +8,7 @@
 - [Build and Install FastDeploy Library on GPU Platform](en/build_and_install/gpu.md)
 - [Build and Install FastDeploy Library on CPU Platform](en/build_and_install/cpu.md)
 - [Build and Install FastDeploy Library on IPU Platform](en/build_and_install/ipu.md)
- [Build and Install FastDeploy Library on KunlunXin XPU Platform](en/build_and_install/xpu.md)
+- [Build and Install FastDeploy Library on KunlunXin XPU Platform](en/build_and_install/kunlunxin.md)
 - [Build and Install on RV1126 Platform](en/build_and_install/rv1126.md)
 - [Build and Install on RK3588 and RK356X Platform](en/build_and_install/rknpu2.md)
 - [Build and Install on A311D Platform](en/build_and_install/a311d.md)
--- a/docs/README_CN.md
+++ b/docs/README_CN.md
@@ -8,7 +8,7 @@
 - [GPU部署环境编译安装](cn/build_and_install/gpu.md)
 - [CPU部署环境编译安装](cn/build_and_install/cpu.md)
 - [IPU部署环境编译安装](cn/build_and_install/ipu.md)
- [昆仑芯XPU部署环境编译安装](cn/build_and_install/xpu.md)
+- [昆仑芯XPU部署环境编译安装](cn/build_and_install/kunlunxin.md)
 - [瑞芯微RV1126部署环境编译安装](cn/build_and_install/rv1126.md)
 - [瑞芯微RK3588部署环境编译安装](cn/build_and_install/rknpu2.md)
 - [晶晨A311D部署环境编译安装](cn/build_and_install/a311d.md)
--- a/docs/README_EN.md
+++ b/docs/README_EN.md
@@ -8,7 +8,7 @@
 - [Build and Install FastDeploy Library on GPU Platform](en/build_and_install/gpu.md)
 - [Build and Install FastDeploy Library on CPU Platform](en/build_and_install/cpu.md)
 - [Build and Install FastDeploy Library on IPU Platform](en/build_and_install/ipu.md)
- [Build and Install FastDeploy Library on KunlunXin XPU Platform](en/build_and_install/xpu.md)
+- [Build and Install FastDeploy Library on KunlunXin XPU Platform](en/build_and_install/kunlunxin.md)
 - [Build and Install on RV1126 Platform](en/build_and_install/rv1126.md)
 - [Build and Install on RK3588 Platform](en/build_and_install/rknpu2.md)
 - [Build and Install on A311D Platform](en/build_and_install/a311d.md)
--- a/docs/cn/build_and_install/README.md
+++ b/docs/cn/build_and_install/README.md
@@ -14,7 +14,7 @@
 - [瑞芯微RV1126部署环境](rv1126.md)
 - [瑞芯微RK3588部署环境](rknpu2.md)
 - [晶晨A311D部署环境](a311d.md)
- [昆仑芯XPU部署环境](xpu.md)
+- [昆仑芯XPU部署环境](kunlunxin.md)
 - [华为昇腾部署环境](huawei_ascend.md)


@@ -27,7 +27,7 @@
 | ENABLE_LITE_BACKEND     | 默认OFF，是否编译集成Paddle Lite后端(编译Android库时需要设置为ON)                          |
 | ENABLE_RKNPU2_BACKEND   | 默认OFF，是否编译集成RKNPU2后端(RK3588/RK3568/RK3566上推荐打开)                           |
 | WITH_ASCEND             | 默认OFF，当在华为昇腾NPU上部署时, 需要设置为ON                                              |
-| WITH_XPU                | 默认OFF，当在昆仑芯XPU上部署时，需设置为ON                                                |
+| WITH_KUNLUNXIN          | 默认OFF，当在昆仑芯XPU上部署时，需设置为ON                                                |
 | WITH_TIMVX              | 默认OFF，需要在RV1126/RV1109/A311D上部署时，需设置为ON                                   |
 | ENABLE_TRT_BACKEND      | 默认OFF，是否编译集成TensorRT后端(GPU上推荐打开)                                          |
 | ENABLE_OPENVINO_BACKEND | 默认OFF，是否编译集成OpenVINO后端(CPU上推荐打开)                                          |
--- a/docs/cn/build_and_install/kunlunxin.md
+++ b/docs/cn/build_and_install/kunlunxin.md
@@ -1,4 +1,4 @@
-[English](../../en/build_and_install/xpu.md) | 简体中文
+[English](../../en/build_and_install/kunlunxin.md) | 简体中文

 # 昆仑芯 XPU 部署环境编译安装

@@ -10,7 +10,7 @@ FastDeploy 基于 Paddle Lite 后端支持在昆仑芯 XPU 上进行部署推理
 相关编译选项说明如下：  
 |编译选项|默认值|说明|备注|  
 |:---|:---|:---|:---|  
-| WITH_XPU| OFF | 需要在XPU上部署时需要设置为ON | - |
+| WITH_KUNLUNXIN| OFF | 需要在昆仑芯XPU上部署时需要设置为ON | - |
 | ENABLE_ORT_BACKEND | OFF | 是否编译集成ONNX Runtime后端 | - |
 | ENABLE_PADDLE_BACKEND | OFF | 是否编译集成Paddle Inference后端 | - |
 | ENABLE_OPENVINO_BACKEND | OFF | 是否编译集成OpenVINO后端 | - |
@@ -41,11 +41,11 @@ cd FastDeploy
 mkdir build && cd build

 # CMake configuration with KunlunXin xpu toolchain
-cmake -DWITH_XPU=ON  \
+cmake -DWITH_KUNLUNXIN=ON  \
      -DWITH_GPU=OFF  \ # 不编译 GPU
      -DENABLE_ORT_BACKEND=ON  \ # 可选择开启 ORT 后端
      -DENABLE_PADDLE_BACKEND=ON  \ # 可选择开启 Paddle 后端
-      -DCMAKE_INSTALL_PREFIX=fastdeploy-xpu \
+      -DCMAKE_INSTALL_PREFIX=fastdeploy-kunlunxin \
      -DENABLE_VISION=ON \ # 是否编译集成视觉模型的部署模块，可选择开启
      -DOPENCV_DIRECTORY=/usr/lib/x86_64-linux-gnu/cmake/opencv4 \
      ..
@@ -54,14 +54,14 @@ cmake -DWITH_XPU=ON  \
 make -j8
 make install
 ```  
-编译完成之后，会生成 fastdeploy-xpu 目录，表示基于 Paddle Lite 的 FastDeploy 库编译完成。
+编译完成之后，会生成 fastdeploy-kunlunxin 目录，表示基于 Paddle Lite 的 FastDeploy 库编译完成。

 ## Python 编译
 编译命令如下：
 ```bash
 git clone https://github.com/PaddlePaddle/FastDeploy.git
 cd FastDeploy/python
-export WITH_XPU=ON
+export WITH_KUNLUNXIN=ON
 export WITH_GPU=OFF
 export ENABLE_ORT_BACKEND=ON
 export ENABLE_PADDLE_BACKEND=ON
--- a/docs/cn/build_and_install/rv1126.md
+++ b/docs/cn/build_and_install/rv1126.md
@@ -61,6 +61,7 @@ mkdir build && cd build
 cmake -DCMAKE_TOOLCHAIN_FILE=./../cmake/toolchain.cmake \
      -DWITH_TIMVX=ON  \
      -DTARGET_ABI=armhf \
+      -DENABLE_FLYCV=ON \ # 是否开启 FlyCV 优化前后处理，可以选择开启
      -DCMAKE_INSTALL_PREFIX=fastdeploy-timvx \
      -DENABLE_VISION=ON \ # 是否编译集成视觉模型的部署模块，可选择开启
      -Wno-dev ..
--- a/docs/cn/faq/rknpu2/export.md
+++ b/docs/cn/faq/rknpu2/export.md
@@ -4,7 +4,10 @@

 ## 简介

-Fastdeploy已经简单的集成了onnx->rknn的转换过程。本教程使用tools/export.py文件导出模型，在导出之前需要编写yaml配置文件。
+Fastdeploy已经简单的集成了onnx->rknn的转换过程。
+本教程使用tools/rknpu2/export.py文件导出模型，在导出之前需要编写yaml配置文件。
+
+## 环境要求
 在进行转换前请根据[rknn_toolkit2安装文档](./install_rknn_toolkit2.md)检查环境是否已经安装成功。


@@ -14,29 +17,72 @@ Fastdeploy已经简单的集成了onnx->rknn的转换过程。本教程使用too
 |-----------------|------------|--------------------|
 | verbose         | 是，默认值为True | 是否在屏幕上输出转换模型时的具体信息 |
 | config_path     | 否          | 配置文件路径             |
+| target_platform | 否          | cpu型号              |

 ## config 配置文件介绍

 ### config yaml文件模版

 ```yaml
-model_path: ./portrait_pp_humansegv2_lite_256x144_pretrained.onnx
-output_folder: ./
-target_platform: RK3588
-normalize:
-  mean: [[0.5,0.5,0.5]]
-  std: [[0.5,0.5,0.5]]
-outputs: None
+mean:
+  -
+    - 128.5
+    - 128.5
+    - 128.5
+std:
+  -
+    - 128.5
+    - 128.5
+    - 128.5
+model_path: "./scrfd_500m_bnkps_shape640x640.onnx"
+outputs_nodes:
+do_quantization: True
+dataset: "./datasets.txt"
+output_folder: "./"
 ```

 ### config 配置参数介绍
-* model_path: 模型储存路径
-* output_folder: 模型储存文件夹名字
-* target_platform: 模型跑在哪一个设备上，只能为RK3588或RK3568
-* normalize: 配置在NPU上的normalize操作，有std和mean两个参数
-  * std: 如果在外部做normalize操作，请配置为[1/255,1/255,1/255]
-  * mean: 如果在外部做normalize操作，请配置为[0,0,0]
-* outputs: 输出节点列表，如果使用默认输出节点，请配置为None
+#### model_path
+代表需要转换为RKNN的ONNX格式的模型路径
+```yaml
+model_path: "./scrfd_500m_bnkps_shape640x640.onnx"
+```
+#### output_folder
+代表最后储存RKNN模型文件的文件夹路径
+```yaml
+output_folder: "./"
+```
+
+#### std 和 mean
+如果需要在NPU上进行normalize操作需要配置此参数，并且请自行将参数乘以255，例如你的normalize中mean参数为[0.5,0.5,0.5]时，
+配置文件中的mean应该配置为[128.5,128.5,128.5]。 请自行将[128.5,128.5,128.5]换成yaml格式,如下:
+```yaml
+mean:
+  -
+    - 128.5
+    - 128.5
+    - 128.5
+std:
+  -
+    - 128.5
+    - 128.5
+    - 128.5
+```
+当然如果在外部进行normalize和permute操作，则无需配置这两个参数。
+
+#### outputs_nodes
+输出节点的名字。当整个模型导出时，无语配置改参数。
+```yaml
+outputs_nodes:
+```
+
+#### do_quantization 和 dataset
+do_quantization代表是否进行静态量化。dataset表示进行静态量化时的图片数据集目录。
+这两个参数配套使用，当do_quantization生效时，dataset才生效。
+```yaml
+do_quantization: True
+dataset: "./datasets.txt"
+```

 ## 如何转换模型
 根目录下执行以下代码
@@ -47,4 +93,4 @@ python tools/export.py  --config_path=./config.yaml

 ## 模型导出要注意的事项

-* 请不要导出带softmax和argmax的模型，这两个算子存在bug，请在外部进行运算
+* 不建议导出softmax以及argmax算子
--- a/docs/en/build_and_install/README.md
+++ b/docs/en/build_and_install/README.md
@@ -15,7 +15,7 @@ English | [中文](../../cn/build_and_install/README.md)
 - [Build and Install on RV1126 Platform](rv1126.md)
 - [Build and Install on RK3588 Platform](rknpu2.md)
 - [Build and Install on A311D Platform](a311d.md)
- [Build and Install on KunlunXin XPU Platform](xpu.md)
+- [Build and Install on KunlunXin XPU Platform](kunlunxin.md)


 ## Build options
@@ -29,7 +29,7 @@ English | [中文](../../cn/build_and_install/README.md)
 | ENABLE_VISION | Default OFF，whether to enable vision models deployment module |
 | ENABLE_TEXT | Default OFF，whether to enable text models deployment module |
 | WITH_GPU | Default OFF, if build on GPU, this needs to be ON |
-| WITH_XPU | Default OFF，if deploy on KunlunXin XPU，this needs to be ON |
+| WITH_KUNLUNXIN | Default OFF，if deploy on KunlunXin XPU，this needs to be ON |
 | WITH_TIMVX | Default OFF，if deploy on RV1126/RV1109/A311D，this needs to be ON |
 | WITH_ASCEND | Default OFF，if deploy on Huawei Ascend，this needs to be ON |
 | CUDA_DIRECTORY | Default /usr/local/cuda, if build on GPU, this defines the path of CUDA(>=11.2) |
--- a/docs/en/build_and_install/kunlunxin.md
+++ b/docs/en/build_and_install/kunlunxin.md
@@ -1,4 +1,4 @@
-English | [中文](../../cn/build_and_install/xpu.md)
+English | [中文](../../cn/build_and_install/kunlunxin.md)

 # How to Build KunlunXin XPU Deployment Environment

@@ -10,7 +10,7 @@ The relevant compilation options are described as follows:
 |Compile Options|Default Values|Description|Remarks|  
 |:---|:---|:---|:---|  
 | ENABLE_LITE_BACKEND | OFF | It needs to be set to ON when compiling the RK library| - |  
-| WITH_XPU | OFF | It needs to be set to ON when compiling the KunlunXin XPU library| - |
+| WITH_KUNLUNXIN | OFF | It needs to be set to ON when compiling the KunlunXin XPU library| - |
 | ENABLE_ORT_BACKEND | OFF | whether to intergrate ONNX Runtime backend | - |
 | ENABLE_PADDLE_BACKEND | OFF | whether to intergrate Paddle Inference backend | - |
 | ENABLE_OPENVINO_BACKEND | OFF | whether to intergrate OpenVINO backend | - |
@@ -44,11 +44,11 @@ cd FastDeploy
 mkdir build && cd build

 # CMake configuration with KunlunXin xpu toolchain
-cmake -DWITH_XPU=ON  \
+cmake -DWITH_KUNLUNXIN=ON  \
      -DWITH_GPU=OFF  \
      -DENABLE_ORT_BACKEND=ON  \
      -DENABLE_PADDLE_BACKEND=ON  \
-      -DCMAKE_INSTALL_PREFIX=fastdeploy-xpu \
+      -DCMAKE_INSTALL_PREFIX=fastdeploy-kunlunxin \
      -DENABLE_VISION=ON \
      -DOPENCV_DIRECTORY=/usr/lib/x86_64-linux-gnu/cmake/opencv4 \
      ..
@@ -57,14 +57,14 @@ cmake -DWITH_XPU=ON  \
 make -j8
 make install
 ```  
-After the compilation is complete, the fastdeploy-xpu directory will be generated, indicating that the Padddle Lite based FastDeploy library has been compiled.
+After the compilation is complete, the fastdeploy-kunlunxin directory will be generated, indicating that the Padddle Lite based FastDeploy library has been compiled.

 ## Python compile
 The compilation command is as follows:
 ```bash
 git clone https://github.com/PaddlePaddle/FastDeploy.git
 cd FastDeploy/python
-export WITH_XPU=ON
+export WITH_KUNLUNXIN=ON
 export WITH_GPU=OFF
 export ENABLE_ORT_BACKEND=ON
 export ENABLE_PADDLE_BACKEND=ON
--- a/docs/en/build_and_install/rv1126.md
+++ b/docs/en/build_and_install/rv1126.md
@@ -60,6 +60,7 @@ mkdir build && cd build
 cmake -DCMAKE_TOOLCHAIN_FILE=./../cmake/toolchain.cmake \
      -DWITH_TIMVX=ON  \
      -DTARGET_ABI=armhf \
+      -DENABLE_FLYCV=ON \ # Whether to enable FlyCV optimization
      -DCMAKE_INSTALL_PREFIX=fastdeploy-timvx \
      -DENABLE_VISION=ON \ # Whether to compile the vision module
      -Wno-dev ..
--- a/examples/multimodal/stable_diffusion/README.md
+++ b/examples/multimodal/stable_diffusion/README.md
@@ -41,7 +41,7 @@ python infer.py --model_dir stable-diffusion-v1-4/ --scheduler "pndm" --backend
 python infer.py --model_dir stable-diffusion-v1-5/ --scheduler "euler_ancestral" --backend paddle

 # 在昆仑芯XPU上推理
-python infer.py --model_dir stable-diffusion-v1-5/ --scheduler "euler_ancestral" --backend paddle-xpu
+python infer.py --model_dir stable-diffusion-v1-5/ --scheduler "euler_ancestral" --backend paddle-kunlunxin
 ```

 #### 参数说明
@@ -52,7 +52,7 @@ python infer.py --model_dir stable-diffusion-v1-5/ --scheduler "euler_ancestral"
 |----------|--------------|
 | --model_dir | 导出后模型的目录。 |
 | --model_format | 模型格式。默认为`'paddle'`，可选列表：`['paddle', 'onnx']`。 |
-| --backend | 推理引擎后端。默认为`paddle`，可选列表：`['onnx_runtime', 'paddle', 'paddle-xpu']`，当模型格式为`onnx`时，可选列表为`['onnx_runtime']`。 |
+| --backend | 推理引擎后端。默认为`paddle`，可选列表：`['onnx_runtime', 'paddle', 'paddle-kunlunxin']`，当模型格式为`onnx`时，可选列表为`['onnx_runtime']`。 |
 | --scheduler | StableDiffusion 模型的scheduler。默认为`'pndm'`。可选列表：`['pndm', 'euler_ancestral']`，StableDiffusio模型对应的scheduler可参考[ppdiffuser模型列表](https://github.com/PaddlePaddle/PaddleNLP/tree/develop/ppdiffusers/examples/textual_inversion)。|
 | --unet_model_prefix | UNet模型前缀。默认为`unet`。 |
 | --vae_model_prefix | VAE模型前缀。默认为`vae_decoder`。 |
--- a/examples/multimodal/stable_diffusion/infer.py
+++ b/examples/multimodal/stable_diffusion/infer.py
@@ -69,7 +69,7 @@ def parse_arguments():
        type=str,
        default='paddle',
        # Note(zhoushunjie): Will support 'tensorrt', 'paddle-tensorrt' soon.
-        choices=['onnx_runtime', 'paddle', 'paddle-xpu'],
+        choices=['onnx_runtime', 'paddle', 'paddle-kunlunxin'],
        help="The inference runtime backend of unet model and text encoder model."
    )
    parser.add_argument(
@@ -175,9 +175,9 @@ def create_trt_runtime(model_dir,
    return fd.Runtime(option)


-def create_xpu_runtime(model_dir, model_prefix, device_id=0):
+def create_kunlunxin_runtime(model_dir, model_prefix, device_id=0):
    option = fd.RuntimeOption()
-    option.use_xpu(
+    option.use_kunlunxin(
        device_id,
        l3_workspace_size=(64 * 1024 * 1024 - 4 * 1024),
        locked=False,
@@ -306,18 +306,18 @@ if __name__ == "__main__":
            dynamic_shape=unet_dynamic_shape,
            device_id=args.device_id)
        print(f"Spend {time.time() - start : .2f} s to load unet model.")
-    elif args.backend == "paddle-xpu":
+    elif args.backend == "paddle-kunlunxin":
        print("=== build text_encoder_runtime")
-        text_encoder_runtime = create_xpu_runtime(
+        text_encoder_runtime = create_kunlunxin_runtime(
            args.model_dir,
            args.text_encoder_model_prefix,
            device_id=args.device_id)
        print("=== build vae_decoder_runtime")
-        vae_decoder_runtime = create_xpu_runtime(
+        vae_decoder_runtime = create_kunlunxin_runtime(
            args.model_dir, args.vae_model_prefix, device_id=args.device_id)
        print("=== build unet_runtime")
        start = time.time()
-        unet_runtime = create_xpu_runtime(
+        unet_runtime = create_kunlunxin_runtime(
            args.model_dir, args.unet_model_prefix, device_id=args.device_id)
        print(f"Spend {time.time() - start : .2f} s to load unet model.")
    pipe = StableDiffusionFastDeployPipeline(
--- a/examples/text/ernie-3.0/cpp/README.md
+++ b/examples/text/ernie-3.0/cpp/README.md
@@ -35,8 +35,8 @@ tar xvfz ernie-3.0-medium-zh-afqmc.tgz
 # GPU Inference
 ./seq_cls_infer_demo --device gpu --model_dir ernie-3.0-medium-zh-afqmc

-# XPU 推理
-./seq_cls_infer_demo --device xpu --model_dir ernie-3.0-medium-zh-afqmc
+# KunlunXin XPU 推理
+./seq_cls_infer_demo --device kunlunxin --model_dir ernie-3.0-medium-zh-afqmc
 ```
 The result returned after running is as follows：
 ```bash
--- a/examples/text/ernie-3.0/cpp/seq_cls_infer.cc
+++ b/examples/text/ernie-3.0/cpp/seq_cls_infer.cc
@@ -32,7 +32,7 @@ const char sep = '/';
 DEFINE_string(model_dir, "", "Directory of the inference model.");
 DEFINE_string(vocab_path, "", "Path of the vocab file.");
 DEFINE_string(device, "cpu",
-              "Type of inference device, support 'cpu', 'xpu' or 'gpu'.");
+              "Type of inference device, support 'cpu', 'kunlunxin' or 'gpu'.");
 DEFINE_string(backend, "onnx_runtime",
              "The inference runtime backend, support: ['onnx_runtime', "
              "'paddle', 'openvino', 'tensorrt', 'paddle_tensorrt']");
@@ -61,8 +61,8 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
                     << ", param_path = " << param_path << std::endl;
  option->SetModelPath(model_path, param_path);

-  if (FLAGS_device == "xpu") {
-    option->UseXpu();
+  if (FLAGS_device == "kunlunxin") {
+    option->UseKunlunXin();
    return true;
  } else if (FLAGS_device == "gpu") {
    option->UseGpu();
--- a/examples/text/ernie-3.0/python/README.md
+++ b/examples/text/ernie-3.0/python/README.md
@@ -40,8 +40,8 @@ python seq_cls_infer.py --device cpu --model_dir ernie-3.0-medium-zh-afqmc
 # GPU Inference
 python seq_cls_infer.py --device gpu --model_dir ernie-3.0-medium-zh-afqmc

-# XPU Inference
-python seq_cls_infer.py --device xpu --model_dir ernie-3.0-medium-zh-afqmc
+# KunlunXin XPU Inference
+python seq_cls_infer.py --device kunlunxin --model_dir ernie-3.0-medium-zh-afqmc

 ```
 The result returned after running is as follows:
--- a/examples/text/ernie-3.0/python/seq_cls_infer.py
+++ b/examples/text/ernie-3.0/python/seq_cls_infer.py
@@ -35,8 +35,8 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        choices=['gpu', 'cpu', 'xpu'],
-        help="Type of inference device, support 'cpu', 'xpu' or 'gpu'.")
+        choices=['gpu', 'cpu', 'kunlunxin'],
+        help="Type of inference device, support 'cpu', 'kunlunxin' or 'gpu'.")
    parser.add_argument(
        "--backend",
        type=str,
@@ -94,8 +94,8 @@ class ErnieForSequenceClassificationPredictor(object):
        model_path = os.path.join(args.model_dir, "infer.pdmodel")
        params_path = os.path.join(args.model_dir, "infer.pdiparams")
        option.set_model_path(model_path, params_path)
-        if args.device == 'xpu':
-            option.use_xpu()
+        if args.device == 'kunlunxin':
+            option.use_kunlunxin()
            option.use_paddle_lite_backend()
            return fd.Runtime(option)
        if args.device == 'cpu':
--- a/examples/text/ernie-3.0/serving/README.md
+++ b/examples/text/ernie-3.0/serving/README.md
@@ -51,16 +51,16 @@ models
 ```bash
 # x.y.z represent image versions. Please refer to the serving document to replace them with numbers
 # GPU Image
-docker pull paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10
+docker pull registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10
 # CPU Image
-docker pull paddlepaddle/fastdeploy:x.y.z-cpu-only-21.10
+docker pull registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-cpu-only-21.10

 # Running
-docker run  -it --net=host --name fastdeploy_server --shm-size="1g" -v /path/serving/models:/models paddlepaddle/fastdeploy:x.y.z-cpu-only-21.10 bash
+docker run  -it --net=host --name fastdeploy_server --shm-size="1g" -v /path/serving/models:/models registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-cpu-only-21.10 bash
 ```

 ## Deployment Models
-The serving directory contains the configuration to start the pipeline service and the code to send the prediction request, including 
+The serving directory contains the configuration to start the pipeline service and the code to send the prediction request, including

 ```
 models                    # 服务化启动需要的模型仓库，包含模型和服务配置文件
@@ -70,7 +70,7 @@ token_cls_rpc_client.py   # 序列标注任务发送pipeline预测请求的脚

 *Attention*:Attention: When starting the service, each python backend process of Server requests 64M memory by default, and the docker started by default cannot start more than one python backend node. There are two solutions:

- 1.Set the `shm-size` parameter when starting the container, for example, `docker run  -it --net=host --name fastdeploy_server --shm-size="1g" -v /path/serving/models:/models paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10 bash`
+- 1.Set the `shm-size` parameter when starting the container, for example, `docker run  -it --net=host --name fastdeploy_server --shm-size="1g" -v /path/serving/models:/models registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10 bash`
 - 2.Set the `shm-default-byte-size` parameter of python backend when starting the service. Set the default memory of python backend to 10M： `tritonserver --model-repository=/models --backend-config=python,shm-default-byte-size=10485760`

 ### Classification Task
--- a/examples/text/ernie-3.0/serving/README_CN.md
+++ b/examples/text/ernie-3.0/serving/README_CN.md
@@ -51,12 +51,12 @@ models
 ```bash
 # x.y.z为镜像版本号，需参照serving文档替换为数字
 # GPU镜像
-docker pull paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10
+docker pull registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10
 # CPU镜像
-docker pull paddlepaddle/fastdeploy:x.y.z-cpu-only-21.10
+docker pull registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-cpu-only-21.10

 # 运行
-docker run  -it --net=host --name fastdeploy_server --shm-size="1g" -v /path/serving/models:/models paddlepaddle/fastdeploy:x.y.z-cpu-only-21.10 bash
+docker run  -it --net=host --name fastdeploy_server --shm-size="1g" -v /path/serving/models:/models registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-cpu-only-21.10 bash
 ```

 ## 部署模型
@@ -69,7 +69,7 @@ token_cls_rpc_client.py   # 序列标注任务发送pipeline预测请求的脚
 ```

 *注意*:启动服务时，Server的每个python后端进程默认申请`64M`内存，默认启动的docker无法启动多个python后端节点。有两个解决方案：
- 1.启动容器时设置`shm-size`参数, 比如:`docker run  -it --net=host --name fastdeploy_server --shm-size="1g" -v /path/serving/models:/models paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10 bash`
+- 1.启动容器时设置`shm-size`参数, 比如:`docker run  -it --net=host --name fastdeploy_server --shm-size="1g" -v /path/serving/models:/models registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10 bash`
 - 2.启动服务时设置python后端的`shm-default-byte-size`参数, 设置python后端的默认内存为10M： `tritonserver --model-repository=/models --backend-config=python,shm-default-byte-size=10485760`

 ### 分类任务
--- a/examples/vision/classification/paddleclas/cpp/infer.cc
+++ b/examples/vision/classification/paddleclas/cpp/infer.cc
@@ -96,13 +96,13 @@ void IpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << res.Str() << std::endl;
 }

-void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+void KunlunXinInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "inference.pdmodel";
  auto params_file = model_dir + sep + "inference.pdiparams";
  auto config_file = model_dir + sep + "inference_cls.yaml";

  auto option = fastdeploy::RuntimeOption();
-  option.UseXpu();
+  option.UseKunlunXin();
  auto model = fastdeploy::vision::classification::PaddleClasModel(
      model_file, params_file, config_file, option);
  if (!model.Initialized()) {
@@ -179,7 +179,7 @@ int main(int argc, char* argv[]) {
                 "e.g ./infer_demo ./ResNet50_vd ./test.jpeg 0"
              << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run with ipu; 4: run with xpu."
+                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run with ipu; 4: run with kunlunxin."
              << std::endl;
    return -1;
  }
@@ -193,7 +193,7 @@ int main(int argc, char* argv[]) {
  } else if (std::atoi(argv[3]) == 3) {
    IpuInfer(argv[1], argv[2]);
  } else if (std::atoi(argv[3]) == 4) {
-    XpuInfer(argv[1], argv[2]);
+    KunlunXinInfer(argv[1], argv[2]);
  } else if (std::atoi(argv[3]) == 5) {
    AscendInfer(argv[1], argv[2]);
  }
--- a/examples/vision/classification/paddleclas/python/README.md
+++ b/examples/vision/classification/paddleclas/python/README.md
@@ -25,8 +25,8 @@ python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg -
 python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg --device gpu --use_trt True --topk 1
 # IPU推理（注意：IPU推理首次运行会有序列化模型的操作，有一定耗时，需要耐心等待）
 python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg --device ipu --topk 1
-# XPU推理
-python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg --device xpu --topk 1
+# 昆仑芯XPU推理
+python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg --device kunlunxin --topk 1
 # 华为昇腾NPU推理
 python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg --device ascend --topk 1
 ```
--- a/examples/vision/classification/paddleclas/python/infer.py
+++ b/examples/vision/classification/paddleclas/python/infer.py
@@ -17,7 +17,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'cpu' or 'gpu' or 'ipu' or 'xpu' or 'ascend' ."
+        help="Type of inference device, support 'cpu' or 'gpu' or 'ipu' or 'kunlunxin' or 'ascend' ."
    )
    parser.add_argument(
        "--use_trt",
@@ -36,8 +36,8 @@ def build_option(args):
    if args.device.lower() == "ipu":
        option.use_ipu()

-    if args.device.lower() == "xpu":
-        option.use_xpu()
+    if args.device.lower() == "kunlunxin":
+        option.use_kunlunxin()

    if args.device.lower() == "ascend":
        option.use_ascend()
--- a/examples/vision/classification/paddleclas/rv1126/cpp/CMakeLists.txt
+++ b/examples/vision/classification/paddleclas/rv1126/cpp/CMakeLists.txt
@@ -20,19 +20,11 @@ install(TARGETS infer_demo DESTINATION ./)

 install(DIRECTORY models DESTINATION ./)
 install(DIRECTORY images DESTINATION ./)
-# install(DIRECTORY run_with_adb.sh DESTINATION ./)

-file(GLOB FASTDEPLOY_LIBS ${FASTDEPLOY_INSTALL_DIR}/lib/*)
-install(PROGRAMS ${FASTDEPLOY_LIBS} DESTINATION lib)
-
-file(GLOB OPENCV_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/opencv/lib/lib*)
-install(PROGRAMS ${OPENCV_LIBS} DESTINATION lib)
-
-file(GLOB PADDLELITE_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/paddlelite/lib/lib*)
-install(PROGRAMS ${PADDLELITE_LIBS} DESTINATION lib)
-
-file(GLOB TIMVX_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/paddlelite/lib/verisilicon_timvx/*)
-install(PROGRAMS ${TIMVX_LIBS} DESTINATION lib)
+file(GLOB_RECURSE FASTDEPLOY_LIBS ${FASTDEPLOY_INSTALL_DIR}/lib/lib*.so*)
+file(GLOB_RECURSE ALL_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/lib*.so*)
+list(APPEND ALL_LIBS ${FASTDEPLOY_LIBS})
+install(PROGRAMS ${ALL_LIBS} DESTINATION lib)

 file(GLOB ADB_TOOLS run_with_adb.sh)
 install(PROGRAMS ${ADB_TOOLS} DESTINATION ./)
--- a/examples/vision/classification/paddleclas/rv1126/cpp/infer.cc
+++ b/examples/vision/classification/paddleclas/rv1126/cpp/infer.cc
@@ -23,7 +23,7 @@ void InitAndInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "inference.pdmodel";
  auto params_file = model_dir + sep + "inference.pdiparams";
  auto config_file = model_dir + sep + "inference_cls.yaml";
-  
+  fastdeploy::vision::EnableFlyCV(); 
  fastdeploy::RuntimeOption option;
  option.UseTimVX();

--- a/examples/vision/classification/paddleclas/serving/README.md
+++ b/examples/vision/classification/paddleclas/serving/README.md
@@ -26,12 +26,12 @@ mv ResNet50_vd_infer/inference.pdiparams models/runtime/1/model.pdiparams

 # 拉取fastdeploy镜像(x.y.z为镜像版本号，需参照serving文档替换为数字)
 # GPU镜像
-docker pull paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10
+docker pull registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10
 # CPU镜像
-docker pull paddlepaddle/fastdeploy:x.y.z-cpu-only-21.10
+docker pull registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-cpu-only-21.10

 # 运行容器.容器名字为 fd_serving, 并挂载当前目录为容器的 /serving 目录
-nvidia-docker run -it --net=host --name fd_serving -v `pwd`/:/serving paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10  bash
+nvidia-docker run -it --net=host --name fd_serving -v `pwd`/:/serving registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10  bash

 # 启动服务(不设置CUDA_VISIBLE_DEVICES环境变量，会拥有所有GPU卡的调度权限)
 CUDA_VISIBLE_DEVICES=0 fastdeployserver --model-repository=/serving/models --backend-config=python,shm-default-byte-size=10485760
--- a/examples/vision/detection/fastestdet/cpp/CMakeLists.txt
+++ b/examples/vision/detection/fastestdet/cpp/CMakeLists.txt
@@ -0,0 +1,14 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+# Specifies the path to the fastdeploy library after you have downloaded it
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+# Include the FastDeploy dependency header file
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
+# Add the FastDeploy library dependency
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
--- a/examples/vision/detection/fastestdet/cpp/README.md
+++ b/examples/vision/detection/fastestdet/cpp/README.md
@@ -0,0 +1,87 @@
+# FastestDet C++部署示例
+
+本目录下提供`infer.cc`快速完成FastestDet在CPU/GPU，以及GPU上通过TensorRT加速部署的示例。
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. 根据开发环境，下载预编译部署库和samples代码，参考[FastDeploy预编译库](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+以Linux上CPU推理为例，在本目录执行如下命令即可完成编译测试
+
+```bash
+mkdir build
+cd build
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.3.tgz
+tar xvf fastdeploy-linux-x64-1.0.3.tgz
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-1.0.3
+make -j
+
+#下载官方转换好的FastestDet模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/FastestDet.onnx
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+
+
+# CPU推理
+./infer_demo FastestDet.onnx 000000014439.jpg 0
+# GPU推理
+./infer_demo FastestDet.onnx 000000014439.jpg 1
+# GPU上TensorRT推理
+./infer_demo FastestDet.onnx 000000014439.jpg 2
+```
+
+运行完成可视化结果如下图所示
+
+<img width="640" src="https://user-images.githubusercontent.com/44280887/206176291-61eb118b-391b-4431-b79e-a393b9452138.jpg">
+
+以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:  
+- [如何在Windows中使用FastDeploy C++ SDK](../../../../../docs/cn/faq/use_sdk_on_windows.md)
+
+## FastestDet C++接口
+
+### FastestDet类
+
+```c++
+fastdeploy::vision::detection::FastestDet(
+        const string& model_file,
+        const string& params_file = "",
+        const RuntimeOption& runtime_option = RuntimeOption(),
+        const ModelFormat& model_format = ModelFormat::ONNX)
+```
+
+FastestDet模型加载和初始化，其中model_file为导出的ONNX模型格式。
+
+**参数**
+
+> * **model_file**(str): 模型文件路径
+> * **params_file**(str): 参数文件路径，当模型格式为ONNX时，此参数传入空字符串即可
+> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
+> * **model_format**(ModelFormat): 模型格式，默认为ONNX格式
+
+#### Predict函数
+
+> ```c++
+> FastestDet::Predict(cv::Mat* im, DetectionResult* result,
+>                 float conf_threshold = 0.65,
+>                 float nms_iou_threshold = 0.45)
+> ```
+>
+> 模型预测接口，输入图像直接输出检测结果。
+>
+> **参数**
+>
+> > * **im**: 输入图像，注意需为HWC，BGR格式
+> > * **result**: 检测结果，包括检测框，各个框的置信度, DetectionResult说明参考[视觉模型预测结果](../../../../../docs/api/vision_results/)
+> > * **conf_threshold**: 检测框置信度过滤阈值
+> > * **nms_iou_threshold**: NMS处理过程中iou阈值
+
+### 类成员变量
+#### 预处理参数
+用户可按照自己的实际需求，修改下列预处理参数，从而影响最终的推理和部署效果
+
+> > * **size**(vector&lt;int&gt;): 通过此参数修改预处理过程中resize的大小，包含两个整型元素，表示[width, height], 默认值为[352, 352]
+
+- [模型介绍](../../)
+- [Python部署](../python)
+- [视觉模型预测结果](../../../../../docs/api/vision_results/)
+- [如何切换模型推理后端引擎](../../../../../docs/cn/faq/how_to_change_backend.md)
--- a/examples/vision/detection/fastestdet/cpp/infer.cc
+++ b/examples/vision/detection/fastestdet/cpp/infer.cc
@@ -0,0 +1,105 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+
+void CpuInfer(const std::string& model_file, const std::string& image_file) {
+  auto model = fastdeploy::vision::detection::FastestDet(model_file);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+  std::cout << res.Str() << std::endl;
+
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+void GpuInfer(const std::string& model_file, const std::string& image_file) {
+  auto option = fastdeploy::RuntimeOption();
+  option.UseGpu();
+  auto model = fastdeploy::vision::detection::FastestDet(model_file, "", option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+  std::cout << res.Str() << std::endl;
+
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+void TrtInfer(const std::string& model_file, const std::string& image_file) {
+  auto option = fastdeploy::RuntimeOption();
+  option.UseGpu();
+  option.UseTrtBackend();
+  option.SetTrtInputShape("images", {1, 3, 352, 352});
+  auto model = fastdeploy::vision::detection::FastestDet(model_file, "", option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+  std::cout << res.Str() << std::endl;
+
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+  if (argc < 4) {
+    std::cout << "Usage: infer_demo path/to/model path/to/image run_option, "
+                 "e.g ./infer_model ./FastestDet.onnx ./test.jpeg 0"
+              << std::endl;
+    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
+                 "with gpu; 2: run with gpu and use tensorrt backend."
+              << std::endl;
+    return -1;
+  }
+
+  if (std::atoi(argv[3]) == 0) {
+    CpuInfer(argv[1], argv[2]);
+  } else if (std::atoi(argv[3]) == 1) {
+    GpuInfer(argv[1], argv[2]);
+  } else if (std::atoi(argv[3]) == 2) {
+    TrtInfer(argv[1], argv[2]);
+  }
+  return 0;
+}
--- a/examples/vision/detection/fastestdet/python/README.md
+++ b/examples/vision/detection/fastestdet/python/README.md
@@ -0,0 +1,74 @@
+# FastestDet Python部署示例
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. FastDeploy Python whl包安装，参考[FastDeploy Python安装](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+本目录下提供`infer.py`快速完成FastestDet在CPU/GPU，以及GPU上通过TensorRT加速部署的示例。执行如下脚本即可完成
+
+```bash
+#下载部署示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd examples/vision/detection/fastestdet/python/
+
+#下载fastestdet模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/FastestDet.onnx
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+
+# CPU推理
+python infer.py --model FastestDet.onnx --image 000000014439.jpg --device cpu
+# GPU推理
+python infer.py --model FastestDet.onnx --image 000000014439.jpg --device gpu
+# GPU上使用TensorRT推理
+python infer.py --model FastestDet.onnx --image 000000014439.jpg --device gpu --use_trt True
+```
+
+运行完成可视化结果如下图所示
+
+<img width="640" src="https://user-images.githubusercontent.com/44280887/206176291-61eb118b-391b-4431-b79e-a393b9452138.jpg">
+
+## FastestDet Python接口
+
+```python
+fastdeploy.vision.detection.FastestDet(model_file, params_file=None, runtime_option=None, model_format=ModelFormat.ONNX)
+```
+
+FastestDet模型加载和初始化，其中model_file为导出的ONNX模型格式
+
+**参数**
+
+> * **model_file**(str): 模型文件路径
+> * **params_file**(str): 参数文件路径，当模型格式为ONNX格式时，此参数无需设定
+> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
+> * **model_format**(ModelFormat): 模型格式，默认为ONNX
+
+### predict函数
+
+> ```python
+> FastestDet.predict(image_data)
+> ```
+>
+> 模型预测接口，输入图像直接输出检测结果。
+>
+> **参数**
+>
+> > * **image_data**(np.ndarray): 输入数据，注意需为HWC，BGR格式
+
+> **返回**
+>
+> > 返回`fastdeploy.vision.DetectionResult`结构体，结构体说明参考文档[视觉模型预测结果](../../../../../docs/api/vision_results/)
+
+### 类成员属性
+#### 预处理参数
+用户可按照自己的实际需求，修改下列预处理参数，从而影响最终的推理和部署效果
+
+> > * **size**(list[int]): 通过此参数修改预处理过程中resize的大小，包含两个整型元素，表示[width, height], 默认值为[352, 352]
+
+
+## 其它文档
+
+- [FastestDet 模型介绍](..)
+- [FastestDet C++部署](../cpp)
+- [模型预测结果说明](../../../../../docs/api/vision_results/)
+- [如何切换模型推理后端引擎](../../../../../docs/cn/faq/how_to_change_backend.md)
--- a/examples/vision/detection/fastestdet/python/infer.py
+++ b/examples/vision/detection/fastestdet/python/infer.py
@@ -0,0 +1,51 @@
+import fastdeploy as fd
+import cv2
+
+
+def parse_arguments():
+    import argparse
+    import ast
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model", required=True, help="Path of FastestDet onnx model.")
+    parser.add_argument(
+        "--image", required=True, help="Path of test image file.")
+    parser.add_argument(
+        "--device",
+        type=str,
+        default='cpu',
+        help="Type of inference device, support 'cpu' or 'gpu'.")
+    parser.add_argument(
+        "--use_trt",
+        type=ast.literal_eval,
+        default=False,
+        help="Wether to use tensorrt.")
+    return parser.parse_args()
+
+
+def build_option(args):
+    option = fd.RuntimeOption()
+
+    if args.device.lower() == "gpu":
+        option.use_gpu()
+
+    if args.use_trt:
+        option.use_trt_backend()
+        option.set_trt_input_shape("images", [1, 3, 352, 352])
+    return option
+
+
+args = parse_arguments()
+
+# Configure runtime and load model
+runtime_option = build_option(args)
+model = fd.vision.detection.FastestDet(args.model, runtime_option=runtime_option)
+
+# Predict picture detection results
+im = cv2.imread(args.image)
+result = model.predict(im)
+
+# Visualization of prediction results
+vis_im = fd.vision.vis_detection(im, result)
+cv2.imwrite("visualized_result.jpg", vis_im)
+print("Visualized result save in ./visualized_result.jpg")
--- a/examples/vision/detection/paddledetection/cpp/infer_faster_rcnn.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_faster_rcnn.cc
@@ -47,12 +47,12 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

-void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+void KunlunXinInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
  auto config_file = model_dir + sep + "infer_cfg.yml";
  auto option = fastdeploy::RuntimeOption();
-  option.UseXpu(0, 0, false, false);
+  option.UseKunlunXin(0, 0, false, false);
  auto model = fastdeploy::vision::detection::FasterRCNN(
      model_file, params_file, config_file, option);
  if (!model.Initialized()) {
@@ -109,7 +109,7 @@ int main(int argc, char* argv[]) {
           "e.g ./infer_model ./faster_rcnn_r50_vd_fpn_2x_coco ./test.jpeg 0"
        << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu; 2: run with xpu."
+                 "with gpu; 2: run with kunlunxin."
              << std::endl;
    return -1;
  }
@@ -119,7 +119,7 @@ int main(int argc, char* argv[]) {
  } else if (std::atoi(argv[3]) == 1) {
    GpuInfer(argv[1], argv[2]);
  } else if (std::atoi(argv[3]) == 2) {
-    XpuInfer(argv[1], argv[2]);
+    KunlunXinInfer(argv[1], argv[2]);
  }
  return 0;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_mask_rcnn.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_mask_rcnn.cc
@@ -47,12 +47,12 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

-void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+void KunlunXinInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
  auto config_file = model_dir + sep + "infer_cfg.yml";
  auto option = fastdeploy::RuntimeOption();
-  option.UseXpu(0, 0, false, false);
+  option.UseKunlunXin(0, 0, false, false);
  auto model = fastdeploy::vision::detection::MaskRCNN(model_file, params_file,
                                                       config_file, option);
  if (!model.Initialized()) {
@@ -109,7 +109,7 @@ int main(int argc, char* argv[]) {
           "e.g ./infer_model ./mask_rcnn_r50_1x_coco/ ./test.jpeg 0"
        << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu; 2: run with xpu."
+                 "with gpu; 2: run with kunlunxin."
              << std::endl;
    return -1;
  }
@@ -119,7 +119,7 @@ int main(int argc, char* argv[]) {
  } else if (std::atoi(argv[3]) == 1) {
    GpuInfer(argv[1], argv[2]);
  } else if (std::atoi(argv[3]) == 2) {
-    XpuInfer(argv[1], argv[2]);
+    KunlunXinInfer(argv[1], argv[2]);
  } else if (std::atoi(argv[3]) == 3) {
    std::cout
        << "Backend::TRT has not been supported yet, will skip this inference."
--- a/examples/vision/detection/paddledetection/cpp/infer_picodet.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_picodet.cc
@@ -47,12 +47,12 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

-void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+void KunlunXinInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
  auto config_file = model_dir + sep + "infer_cfg.yml";
  auto option = fastdeploy::RuntimeOption();
-  option.UseXpu();
+  option.UseKunlunXin();
  auto model = fastdeploy::vision::detection::PicoDet(model_file, params_file,
                                                      config_file, option);
  if (!model.Initialized()) {
@@ -138,7 +138,7 @@ int main(int argc, char* argv[]) {
           "e.g ./infer_model ./picodet_model_dir ./test.jpeg 0"
        << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run with xpu."
+                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run with kunlunxin."
              << std::endl;
    return -1;
  }
@@ -150,7 +150,7 @@ int main(int argc, char* argv[]) {
  } else if (std::atoi(argv[3]) == 2) {
    TrtInfer(argv[1], argv[2]);
  }  else if (std::atoi(argv[3]) == 3) {
-    XpuInfer(argv[1], argv[2]);
+    KunlunXinInfer(argv[1], argv[2]);
  }
  return 0;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_ppyolo.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_ppyolo.cc
@@ -47,12 +47,12 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

-void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+void KunlunXinInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
  auto config_file = model_dir + sep + "infer_cfg.yml";
  auto option = fastdeploy::RuntimeOption();
-  option.UseXpu();
+  option.UseKunlunXin();
  auto model = fastdeploy::vision::detection::PPYOLO(model_file, params_file,
                                                     config_file, option);
  if (!model.Initialized()) {
@@ -109,7 +109,7 @@ int main(int argc, char* argv[]) {
           "e.g ./infer_model ./ppyolo_dirname ./test.jpeg 0"
        << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu; 2: run with xpu."
+                 "with gpu; 2: run with kunlunxin."
              << std::endl;
    return -1;
  }
@@ -119,7 +119,7 @@ int main(int argc, char* argv[]) {
  } else if (std::atoi(argv[3]) == 1) {
    GpuInfer(argv[1], argv[2]);
  } else if (std::atoi(argv[3]) == 2) {
-    XpuInfer(argv[1], argv[2]);
+    KunlunXinInfer(argv[1], argv[2]);
  }
  return 0;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc
@@ -47,12 +47,12 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

-void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+void KunlunXinInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
  auto config_file = model_dir + sep + "infer_cfg.yml";
  auto option = fastdeploy::RuntimeOption();
-  option.UseXpu();
+  option.UseKunlunXin();
  auto model = fastdeploy::vision::detection::PPYOLOE(model_file, params_file,
                                                      config_file, option);
  if (!model.Initialized()) {
@@ -138,7 +138,7 @@ int main(int argc, char* argv[]) {
           "e.g ./infer_model ./ppyoloe_model_dir ./test.jpeg 0"
        << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run with xpu."
+                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run with kunlunxin."
              << std::endl;
    return -1;
  }
@@ -150,7 +150,7 @@ int main(int argc, char* argv[]) {
  } else if (std::atoi(argv[3]) == 2) {
    TrtInfer(argv[1], argv[2]);
  } else if (std::atoi(argv[3]) == 3) {
-    XpuInfer(argv[1], argv[2]);
+    KunlunXinInfer(argv[1], argv[2]);
  }
  return 0;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_rtmdet.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_rtmdet.cc
@@ -48,12 +48,12 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

-void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+void KunlunXinInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
  auto config_file = model_dir + sep + "infer_cfg.yml";
  auto option = fastdeploy::RuntimeOption();
-  option.UseXpu();
+  option.UseKunlunXin();
  auto model = fastdeploy::vision::detection::RTMDet(model_file, params_file,
                                                     config_file, option);
  if (!model.Initialized()) {
@@ -141,7 +141,7 @@ int main(int argc, char* argv[]) {
           "e.g ./infer_model ./ppyolo_dirname ./test.jpeg 0"
        << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu, 2: run with gpu and use tensorrt backend; 3: run with xpu."
+                 "with gpu, 2: run with gpu and use tensorrt backend; 3: run with kunlunxin."
              << std::endl;
    return -1;
  }
@@ -153,7 +153,7 @@ int main(int argc, char* argv[]) {
  } else if(std::atoi(argv[3]) == 2) {
    TrtInfer(argv[1], argv[2]);
  } else if(std::atoi(argv[3]) == 3) {
-    XpuInfer(argv[1], argv[2]);
+    KunlunXinInfer(argv[1], argv[2]);
  }
  return 0;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_ssd.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_ssd.cc
@@ -48,12 +48,12 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

-void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+void KunlunXinInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
  auto config_file = model_dir + sep + "infer_cfg.yml";
  auto option = fastdeploy::RuntimeOption();
-  option.UseXpu();
+  option.UseKunlunXin();
  auto model = fastdeploy::vision::detection::SSD(model_file, params_file,
                                                     config_file, option);
  if (!model.Initialized()) {
@@ -111,7 +111,7 @@ int main(int argc, char* argv[]) {
           "e.g ./infer_model ./ssd_dirname ./test.jpeg 0"
        << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu; 2: run with xpu."
+                 "with gpu; 2: run with kunlunxin."
              << std::endl;
    return -1;
  }
@@ -121,7 +121,7 @@ int main(int argc, char* argv[]) {
  } else if (std::atoi(argv[3]) == 1) {
    GpuInfer(argv[1], argv[2]);
  } else if (std::atoi(argv[3]) == 2) {
-    XpuInfer(argv[1], argv[2]);
+    KunlunXinInfer(argv[1], argv[2]);
  }
  return 0;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_yolov3.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_yolov3.cc
@@ -47,12 +47,12 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

-void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+void KunlunXinInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
  auto config_file = model_dir + sep + "infer_cfg.yml";
  auto option = fastdeploy::RuntimeOption();
-  option.UseXpu();
+  option.UseKunlunXin();
  auto model = fastdeploy::vision::detection::YOLOv3(model_file, params_file,
                                                     config_file, option);
  if (!model.Initialized()) {
@@ -109,7 +109,7 @@ int main(int argc, char* argv[]) {
           "e.g ./infer_model ./ppyolo_dirname ./test.jpeg 0"
        << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu; 2: run with xpu."
+                 "with gpu; 2: run with kunlunxin."
              << std::endl;
    return -1;
  }
@@ -119,7 +119,7 @@ int main(int argc, char* argv[]) {
  } else if (std::atoi(argv[3]) == 1) {
    GpuInfer(argv[1], argv[2]);
  } else if (std::atoi(argv[3]) == 2) {
-    XpuInfer(argv[1], argv[2]);
+    KunlunXinInfer(argv[1], argv[2]);
  }
  return 0;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_yolov5.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_yolov5.cc
@@ -48,12 +48,12 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

-void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+void KunlunXinInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
  auto config_file = model_dir + sep + "infer_cfg.yml";
  auto option = fastdeploy::RuntimeOption();
-  option.UseXpu();
+  option.UseKunlunXin();
  auto model = fastdeploy::vision::detection::PaddleYOLOv5(model_file, params_file,
                                                     config_file, option);
  if (!model.Initialized()) {
@@ -142,7 +142,7 @@ int main(int argc, char* argv[]) {
           "e.g ./infer_model ./ppyolo_dirname ./test.jpeg 0"
        << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run with xpu."
+                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run with kunlunxin."
              << std::endl;
    return -1;
  }
@@ -154,7 +154,7 @@ int main(int argc, char* argv[]) {
  } else if(std::atoi(argv[3]) == 2){
    TrtInfer(argv[1], argv[2]);
  } else if(std::atoi(argv[3]) == 3){
-    XpuInfer(argv[1], argv[2]);
+    KunlunXinInfer(argv[1], argv[2]);
  }
  return 0;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_yolov6.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_yolov6.cc
@@ -48,12 +48,12 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

-void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+void KunlunXinInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
  auto config_file = model_dir + sep + "infer_cfg.yml";
  auto option = fastdeploy::RuntimeOption();
-  option.UseXpu();
+  option.UseKunlunXin();
  auto model = fastdeploy::vision::detection::PaddleYOLOv6(model_file, params_file,
                                                     config_file, option);
  if (!model.Initialized()) {
@@ -141,7 +141,7 @@ int main(int argc, char* argv[]) {
           "e.g ./infer_model ./ppyolo_dirname ./test.jpeg 0"
        << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run with xpu."
+                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run with kunlunxin."
              << std::endl;
    return -1;
  }
@@ -153,7 +153,7 @@ int main(int argc, char* argv[]) {
  } else if(std::atoi(argv[3]) == 2){
    TrtInfer(argv[1], argv[2]);
  } else if(std::atoi(argv[3]) == 3){
-    XpuInfer(argv[1], argv[2]);
+    KunlunXinInfer(argv[1], argv[2]);
  }
  return 0;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_yolov7.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_yolov7.cc
@@ -48,12 +48,12 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

-void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+void KunlunXinInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
  auto config_file = model_dir + sep + "infer_cfg.yml";
  auto option = fastdeploy::RuntimeOption();
-  option.UseXpu();
+  option.UseKunlunXin();
  auto model = fastdeploy::vision::detection::PaddleYOLOv7(model_file, params_file,
                                                     config_file, option);
  if (!model.Initialized()) {
@@ -140,7 +140,7 @@ int main(int argc, char* argv[]) {
           "e.g ./infer_model ./ppyolo_dirname ./test.jpeg 0"
        << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run with xpu."
+                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run with kunlunxin."
              << std::endl;
    return -1;
  }
@@ -152,7 +152,7 @@ int main(int argc, char* argv[]) {
  } else if(std::atoi(argv[3]) == 2){
    TrtInfer(argv[1], argv[2]);
  } else if(std::atoi(argv[3]) == 3){
-    XpuInfer(argv[1], argv[2]);
+    KunlunXinInfer(argv[1], argv[2]);
  }
  return 0;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_yolox.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_yolox.cc
@@ -47,12 +47,12 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

-void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+void KunlunXinInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
  auto config_file = model_dir + sep + "infer_cfg.yml";
  auto option = fastdeploy::RuntimeOption();
-  option.UseXpu();
+  option.UseKunlunXin();
  auto model = fastdeploy::vision::detection::PaddleYOLOX(
      model_file, params_file, config_file, option);
  if (!model.Initialized()) {
@@ -138,7 +138,7 @@ int main(int argc, char* argv[]) {
           "e.g ./infer_model ./paddle_yolox_dirname ./test.jpeg 0"
        << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu; 2: run with gpu by tensorrt; 3: run with xpu."
+                 "with gpu; 2: run with gpu by tensorrt; 3: run with kunlunxin."
              << std::endl;
    return -1;
  }
@@ -150,7 +150,7 @@ int main(int argc, char* argv[]) {
  } else if (std::atoi(argv[3]) == 2) {
    TrtInfer(argv[1], argv[2]);
  } else if (std::atoi(argv[3]) == 3) {
-    XpuInfer(argv[1], argv[2]);
+    KunlunXinInfer(argv[1], argv[2]);
  }
  return 0;
 }
--- a/examples/vision/detection/paddledetection/python/README.md
+++ b/examples/vision/detection/paddledetection/python/README.md
@@ -24,7 +24,7 @@ python infer_ppyoloe.py --model_dir ppyoloe_crn_l_300e_coco --image 000000014439
 # GPU上使用TensorRT推理 （注意：TensorRT推理第一次运行，有序列化模型的操作，有一定耗时，需要耐心等待）
 python infer_ppyoloe.py --model_dir ppyoloe_crn_l_300e_coco --image 000000014439.jpg --device gpu --use_trt True
 # 昆仑芯XPU推理
-python infer_ppyoloe.py --model_dir ppyoloe_crn_l_300e_coco --image 000000014439.jpg --device xpu
+python infer_ppyoloe.py --model_dir ppyoloe_crn_l_300e_coco --image 000000014439.jpg --device kunlunxin
 ```

 运行完成可视化结果如下图所示
--- a/examples/vision/detection/paddledetection/python/infer_faster_rcnn.py
+++ b/examples/vision/detection/paddledetection/python/infer_faster_rcnn.py
@@ -17,7 +17,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'xpu', 'cpu' or 'gpu'.")
+        help="Type of inference device, support 'kunlunxin', 'cpu' or 'gpu'.")
    parser.add_argument(
        "--use_trt",
        type=ast.literal_eval,
@@ -29,8 +29,8 @@ def parse_arguments():
 def build_option(args):
    option = fd.RuntimeOption()

-    if args.device.lower() == "xpu":
-        option.use_xpu(autotune=False, l3_workspace_size=0)
+    if args.device.lower() == "kunlunxin":
+        option.use_kunlunxin(autotune=False, l3_workspace_size=0)

    if args.device.lower() == "gpu":
        option.use_gpu()
--- a/examples/vision/detection/paddledetection/python/infer_mask_rcnn.py
+++ b/examples/vision/detection/paddledetection/python/infer_mask_rcnn.py
@@ -17,7 +17,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'xpu', 'cpu' or 'gpu'.")
+        help="Type of inference device, support 'kunlunxin', 'cpu' or 'gpu'.")
    parser.add_argument(
        "--use_trt",
        type=ast.literal_eval,
@@ -29,8 +29,8 @@ def parse_arguments():
 def build_option(args):
    option = fd.RuntimeOption()

-    if args.device.lower() == "xpu":
-        option.use_xpu(autotune=False, l3_workspace_size=0)
+    if args.device.lower() == "kunlunxin":
+        option.use_kunlunxin(autotune=False, l3_workspace_size=0)

    if args.device.lower() == "gpu":
        # option.use_gpu()
--- a/examples/vision/detection/paddledetection/python/infer_picodet.py
+++ b/examples/vision/detection/paddledetection/python/infer_picodet.py
@@ -17,7 +17,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'xpu', 'cpu' or 'gpu'.")
+        help="Type of inference device, support 'kunlunxin', 'cpu' or 'gpu'.")
    parser.add_argument(
        "--use_trt",
        type=ast.literal_eval,
@@ -29,8 +29,8 @@ def parse_arguments():
 def build_option(args):
    option = fd.RuntimeOption()

-    if args.device.lower() == "xpu":
-        option.use_xpu()
+    if args.device.lower() == "kunlunxin":
+        option.use_kunlunxin()

    if args.device.lower() == "gpu":
        option.use_gpu()
--- a/examples/vision/detection/paddledetection/python/infer_ppyolo.py
+++ b/examples/vision/detection/paddledetection/python/infer_ppyolo.py
@@ -17,7 +17,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'xpu', 'cpu' or 'gpu'.")
+        help="Type of inference device, support 'kunlunxin', 'cpu' or 'gpu'.")
    parser.add_argument(
        "--use_trt",
        type=ast.literal_eval,
@@ -29,8 +29,8 @@ def parse_arguments():
 def build_option(args):
    option = fd.RuntimeOption()

-    if args.device.lower() == "xpu":
-        option.use_xpu()
+    if args.device.lower() == "kunlunxin":
+        option.use_kunlunxin()

    if args.device.lower() == "gpu":
        option.use_gpu()
--- a/examples/vision/detection/paddledetection/python/infer_ppyoloe.py
+++ b/examples/vision/detection/paddledetection/python/infer_ppyoloe.py
@@ -18,7 +18,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'xpu', 'cpu' or 'gpu'.")
+        help="Type of inference device, support 'kunlunxin', 'cpu' or 'gpu'.")
    parser.add_argument(
        "--use_trt",
        type=ast.literal_eval,
@@ -30,8 +30,8 @@ def parse_arguments():
 def build_option(args):
    option = fd.RuntimeOption()

-    if args.device.lower() == "xpu":
-        option.use_xpu()
+    if args.device.lower() == "kunlunxin":
+        option.use_kunlunxin()

    if args.device.lower() == "gpu":
        option.use_gpu()
--- a/examples/vision/detection/paddledetection/python/infer_rtmdet.py
+++ b/examples/vision/detection/paddledetection/python/infer_rtmdet.py
@@ -17,7 +17,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'xpu', 'cpu' or 'gpu'.")
+        help="Type of inference device, support 'kunlunxin', 'cpu' or 'gpu'.")
    parser.add_argument(
        "--use_trt",
        type=ast.literal_eval,
@@ -29,8 +29,8 @@ def parse_arguments():
 def build_option(args):
    option = fd.RuntimeOption()

-    if args.device.lower() == "xpu":
-        option.use_xpu()
+    if args.device.lower() == "kunlunxin":
+        option.use_kunlunxin()

    if args.device.lower() == "gpu":
        option.use_gpu()
--- a/examples/vision/detection/paddledetection/python/infer_ssd.py
+++ b/examples/vision/detection/paddledetection/python/infer_ssd.py
@@ -17,14 +17,14 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'xpu', 'cpu' or 'gpu'.")
+        help="Type of inference device, support 'kunlunxin', 'cpu' or 'gpu'.")
    return parser.parse_args()


 def build_option(args):
    option = fd.RuntimeOption()
-    if args.device.lower() == "xpu":
-        option.use_xpu()
+    if args.device.lower() == "kunlunxin":
+        option.use_kunlunxin()

    if args.device.lower() == "gpu":
        option.use_gpu()
--- a/examples/vision/detection/paddledetection/python/infer_yolov3.py
+++ b/examples/vision/detection/paddledetection/python/infer_yolov3.py
@@ -17,7 +17,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'xpu', 'cpu' or 'gpu'.")
+        help="Type of inference device, support 'kunlunxin', 'cpu' or 'gpu'.")
    parser.add_argument(
        "--use_trt",
        type=ast.literal_eval,
@@ -29,8 +29,8 @@ def parse_arguments():
 def build_option(args):
    option = fd.RuntimeOption()

-    if args.device.lower() == "xpu":
-        option.use_xpu()
+    if args.device.lower() == "kunlunxin":
+        option.use_kunlunxin()

    if args.device.lower() == "gpu":
        option.use_gpu()
--- a/examples/vision/detection/paddledetection/python/infer_yolov5.py
+++ b/examples/vision/detection/paddledetection/python/infer_yolov5.py
@@ -17,7 +17,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'xpu', 'cpu' or 'gpu'.")
+        help="Type of inference device, support 'kunlunxin', 'cpu' or 'gpu'.")
    parser.add_argument(
        "--use_trt",
        type=ast.literal_eval,
@@ -29,8 +29,8 @@ def parse_arguments():
 def build_option(args):
    option = fd.RuntimeOption()

-    if args.device.lower() == "xpu":
-        option.use_xpu()
+    if args.device.lower() == "kunlunxin":
+        option.use_kunlunxin()

    if args.device.lower() == "gpu":
        option.use_gpu()
--- a/examples/vision/detection/paddledetection/python/infer_yolov6.py
+++ b/examples/vision/detection/paddledetection/python/infer_yolov6.py
@@ -17,7 +17,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'xpu', 'cpu' or 'gpu'.")
+        help="Type of inference device, support 'kunlunxin', 'cpu' or 'gpu'.")
    parser.add_argument(
        "--use_trt",
        type=ast.literal_eval,
@@ -29,8 +29,8 @@ def parse_arguments():
 def build_option(args):
    option = fd.RuntimeOption()

-    if args.device.lower() == "xpu":
-        option.use_xpu()
+    if args.device.lower() == "kunlunxin":
+        option.use_kunlunxin()

    if args.device.lower() == "gpu":
        option.use_gpu()
--- a/examples/vision/detection/paddledetection/python/infer_yolov7.py
+++ b/examples/vision/detection/paddledetection/python/infer_yolov7.py
@@ -17,7 +17,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'xpu', 'cpu' or 'gpu'.")
+        help="Type of inference device, support 'kunlunxin', 'cpu' or 'gpu'.")
    parser.add_argument(
        "--use_trt",
        type=ast.literal_eval,
@@ -29,8 +29,8 @@ def parse_arguments():
 def build_option(args):
    option = fd.RuntimeOption()

-    if args.device.lower() == "xpu":
-        option.use_xpu()
+    if args.device.lower() == "kunlunxin":
+        option.use_kunlunxin()

    if args.device.lower() == "gpu":
        option.use_gpu()
--- a/examples/vision/detection/paddledetection/python/infer_yolox.py
+++ b/examples/vision/detection/paddledetection/python/infer_yolox.py
@@ -17,7 +17,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'xpu', 'cpu' or 'gpu'.")
+        help="Type of inference device, support 'kunlunxin', 'cpu' or 'gpu'.")
    parser.add_argument(
        "--use_trt",
        type=ast.literal_eval,
@@ -29,8 +29,8 @@ def parse_arguments():
 def build_option(args):
    option = fd.RuntimeOption()

-    if args.device.lower() == "xpu":
-        option.use_xpu()
+    if args.device.lower() == "kunlunxin":
+        option.use_kunlunxin()

    if args.device.lower() == "gpu":
        option.use_gpu()
--- a/examples/vision/detection/paddledetection/python/serving/README.md
+++ b/examples/vision/detection/paddledetection/python/serving/README.md
@@ -0,0 +1 @@
+README_CN.md
--- a/examples/vision/detection/paddledetection/python/serving/README_CN.md
+++ b/examples/vision/detection/paddledetection/python/serving/README_CN.md
@@ -0,0 +1,36 @@
+简体中文 | [English](README_EN.md)
+
+# PaddleDetection Python轻量服务化部署示例
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. FastDeploy Python whl包安装，参考[FastDeploy Python安装](../../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+服务端：
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/detection/paddledetection/python/serving
+
+# 下载PPYOLOE模型文件（如果不下载，代码里会自动从hub下载）
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
+tar xvf ppyoloe_crn_l_300e_coco.tgz
+
+# 启动服务，可修改server.py中的配置项来指定硬件、后端等
+# 可通过--host、--port指定IP和端口号
+fastdeploy simple_serving --app server:app
+```
+
+客户端：
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/detection/paddledetection/python/serving
+
+# 下载测试图片
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+
+# 请求服务，获取推理结果（如有必要，请修改脚本中的IP和端口号）
+python client.py
+```
--- a/examples/vision/detection/paddledetection/python/serving/README_EN.md
+++ b/examples/vision/detection/paddledetection/python/serving/README_EN.md
@@ -0,0 +1,36 @@
+English | [简体中文](README_CN.md)
+
+# PaddleDetection Python Simple Serving Demo
+
+
+## Environment
+
+- 1. Prepare environment and install FastDeploy Python whl, refer to [download_prebuilt_libraries](../../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
+
+Server:
+```bash
+# Download demo code
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/detection/paddledetection/python/serving
+
+# Download PPYOLOE model
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
+tar xvf ppyoloe_crn_l_300e_coco.tgz
+
+# Launch server, change the configurations in server.py to select hardware, backend, etc.
+# and use --host, --port to specify IP and port
+fastdeploy simple_serving --app server:app
+```
+
+Client:
+```bash
+# Download demo code
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/detection/paddledetection/python/serving
+
+# Download test image
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+
+# Send request and get inference result (Please adapt the IP and port if necessary)
+python client.py
+```
--- a/examples/vision/detection/paddledetection/python/serving/client.py
+++ b/examples/vision/detection/paddledetection/python/serving/client.py
@@ -0,0 +1,23 @@
+import requests
+import json
+import cv2
+import fastdeploy as fd
+from fastdeploy.serving.utils import cv2_to_base64
+
+if __name__ == '__main__':
+    url = "http://127.0.0.1:8000/fd/ppyoloe"
+    headers = {"Content-Type": "application/json"}
+
+    im = cv2.imread("000000014439.jpg")
+    data = {"data": {"image": cv2_to_base64(im)}, "parameters": {}}
+
+    resp = requests.post(url=url, headers=headers, data=json.dumps(data))
+    if resp.status_code == 200:
+        r_json = json.loads(resp.json()["result"])
+        det_result = fd.vision.utils.json_to_detection(r_json)
+        vis_im = fd.vision.vis_detection(im, det_result, score_threshold=0.5)
+        cv2.imwrite("visualized_result.jpg", vis_im)
+        print("Visualized result save in ./visualized_result.jpg")
+    else:
+        print("Error code:", resp.status_code)
+        print(resp.text)
--- a/examples/vision/detection/paddledetection/python/serving/server.py
+++ b/examples/vision/detection/paddledetection/python/serving/server.py
@@ -0,0 +1,38 @@
+import fastdeploy as fd
+from fastdeploy.serving.server import SimpleServer
+import os
+import logging
+
+logging.getLogger().setLevel(logging.INFO)
+
+# Configurations
+model_dir = 'ppyoloe_crn_l_300e_coco'
+device = 'cpu'
+use_trt = False
+
+# Prepare model
+model_file = os.path.join(model_dir, "model.pdmodel")
+params_file = os.path.join(model_dir, "model.pdiparams")
+config_file = os.path.join(model_dir, "infer_cfg.yml")
+
+# Setup runtime option to select hardware, backend, etc.
+option = fd.RuntimeOption()
+if device.lower() == 'gpu':
+    option.use_gpu()
+if use_trt:
+    option.use_trt_backend()
+    option.set_trt_cache_file('ppyoloe.trt')
+
+# Create model instance
+model_instance = fd.vision.detection.PPYOLOE(
+    model_file=model_file,
+    params_file=params_file,
+    config_file=config_file,
+    runtime_option=option)
+
+# Create server, setup REST API
+app = SimpleServer()
+app.register(
+    task_name="fd/ppyoloe",
+    model_handler=fd.serving.handler.VisionModelHandler,
+    predictor=model_instance)
--- a/examples/vision/detection/paddledetection/rknpu2/README.md
+++ b/examples/vision/detection/paddledetection/rknpu2/README.md
@@ -70,7 +70,8 @@ outputs: ['tmp_17','p2o.Concat.9']

 # ONNX模型转RKNN模型
 # 转换模型,模型将生成在picodet_s_320_coco_lcnet_non_postprocess目录下
-python tools/rknpu2/export.py --config_path tools/rknpu2/config/RK3568/picodet_s_416_coco_lcnet.yaml
+python tools/rknpu2/export.py --config_path tools/rknpu2/config/picodet_s_416_coco_lcnet.yaml \
+                              --target_platform rk3588
 ```

 ### 修改模型运行时的配置文件
--- a/examples/vision/detection/paddledetection/rv1126/cpp/CMakeLists.txt
+++ b/examples/vision/detection/paddledetection/rv1126/cpp/CMakeLists.txt
@@ -20,19 +20,11 @@ install(TARGETS infer_demo DESTINATION ./)

 install(DIRECTORY models DESTINATION ./)
 install(DIRECTORY images DESTINATION ./)
-# install(DIRECTORY run_with_adb.sh DESTINATION ./)

-file(GLOB FASTDEPLOY_LIBS ${FASTDEPLOY_INSTALL_DIR}/lib/*)
-install(PROGRAMS ${FASTDEPLOY_LIBS} DESTINATION lib)
-
-file(GLOB OPENCV_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/opencv/lib/lib*)
-install(PROGRAMS ${OPENCV_LIBS} DESTINATION lib)
-
-file(GLOB PADDLELITE_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/paddlelite/lib/lib*)
-install(PROGRAMS ${PADDLELITE_LIBS} DESTINATION lib)
-
-file(GLOB TIMVX_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/paddlelite/lib/verisilicon_timvx/*)
-install(PROGRAMS ${TIMVX_LIBS} DESTINATION lib)
+file(GLOB_RECURSE FASTDEPLOY_LIBS ${FASTDEPLOY_INSTALL_DIR}/lib/lib*.so*)
+file(GLOB_RECURSE ALL_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/lib*.so*)
+list(APPEND ALL_LIBS ${FASTDEPLOY_LIBS})
+install(PROGRAMS ${ALL_LIBS} DESTINATION lib)

 file(GLOB ADB_TOOLS run_with_adb.sh)
 install(PROGRAMS ${ADB_TOOLS} DESTINATION ./)
--- a/examples/vision/detection/paddledetection/rv1126/cpp/infer_ppyoloe.cc
+++ b/examples/vision/detection/paddledetection/rv1126/cpp/infer_ppyoloe.cc
@@ -24,7 +24,7 @@ void InitAndInfer(const std::string& model_dir, const std::string& image_file) {
  auto params_file = model_dir + sep + "model.pdiparams";
  auto config_file = model_dir + sep + "infer_cfg.yml";
  auto subgraph_file = model_dir + sep + "subgraph.txt";
-
+  fastdeploy::vision::EnableFlyCV(); 
  fastdeploy::RuntimeOption option;
  option.UseTimVX();
  option.SetLiteSubgraphPartitionPath(subgraph_file);
--- a/examples/vision/detection/paddledetection/serving/README.md
+++ b/examples/vision/detection/paddledetection/serving/README.md
@@ -37,13 +37,13 @@ cp models/runtime/ppyoloe_runtime_config.pbtxt models/runtime/config.pbtxt

 # 拉取fastdeploy镜像(x.y.z为镜像版本号，需替换成fastdeploy版本数字)
 # GPU镜像
-docker pull paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10
+docker pull registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10
 # CPU镜像
 docker pull paddlepaddle/fastdeploy:z.y.z-cpu-only-21.10


 # 运行容器.容器名字为 fd_serving, 并挂载当前目录为容器的 /serving 目录
-nvidia-docker run -it --net=host --name fd_serving --shm-size="1g"  -v `pwd`/:/serving paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10  bash
+nvidia-docker run -it --net=host --name fd_serving --shm-size="1g"  -v `pwd`/:/serving registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10  bash

 # 启动服务(不设置CUDA_VISIBLE_DEVICES环境变量，会拥有所有GPU卡的调度权限)
 CUDA_VISIBLE_DEVICES=0 fastdeployserver --model-repository=/serving/models
--- a/examples/vision/detection/yolov5/cpp/README.md
+++ b/examples/vision/detection/yolov5/cpp/README.md
@@ -29,7 +29,7 @@ wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/0000000
 ./infer_paddle_demo yolov5s_infer 000000014439.jpg 1
 # GPU上TensorRT推理
 ./infer_paddle_demo yolov5s_infer 000000014439.jpg 2
-# XPU推理
+# 昆仑芯XPU推理
 ./infer_paddle_demo yolov5s_infer 000000014439.jpg 3
 ```

--- a/examples/vision/detection/yolov5/cpp/infer_paddle_model.cc
+++ b/examples/vision/detection/yolov5/cpp/infer_paddle_model.cc
@@ -102,11 +102,11 @@ void TrtInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

-void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+void KunlunXinInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
  fastdeploy::RuntimeOption option;
-  option.UseXpu();
+  option.UseKunlunXin();
  auto model = fastdeploy::vision::detection::YOLOv5(
      model_file, params_file, option, fastdeploy::ModelFormat::PADDLE);

@@ -148,7 +148,7 @@ int main(int argc, char* argv[]) {
  } else if (std::atoi(argv[3]) == 2) {
    TrtInfer(argv[1], argv[2]);
  } else if (std::atoi(argv[3]) == 3) {
-    XpuInfer(argv[1], argv[2]);
+    KunlunXinInfer(argv[1], argv[2]);
  }
  return 0;
 }
--- a/examples/vision/detection/yolov5/python/README.md
+++ b/examples/vision/detection/yolov5/python/README.md
@@ -23,8 +23,8 @@ python infer.py --model yolov5s_infer --image 000000014439.jpg --device cpu
 python infer.py --model yolov5s_infer --image 000000014439.jpg --device gpu
 # GPU上使用TensorRT推理
 python infer.py --model yolov5s_infer --image 000000014439.jpg --device gpu --use_trt True
-# XPU推理
-python infer.py --model yolov5s_infer --image 000000014439.jpg --device xpu
+# 昆仑芯XPU推理
+python infer.py --model yolov5s_infer --image 000000014439.jpg --device kunlunxin
 ```

 运行完成可视化结果如下图所示
--- a/examples/vision/detection/yolov5/python/infer.py
+++ b/examples/vision/detection/yolov5/python/infer.py
@@ -14,7 +14,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'cpu' or 'gpu' or 'xpu'.")
+        help="Type of inference device, support 'cpu' or 'gpu' or 'kunlunxin'.")
    parser.add_argument(
        "--use_trt",
        type=ast.literal_eval,
@@ -25,8 +25,8 @@ def parse_arguments():

 def build_option(args):
    option = fd.RuntimeOption()
-    if args.device.lower() == "xpu":
-        option.use_xpu()
+    if args.device.lower() == "kunlunxin":
+        option.use_kunlunxin()

    if args.device.lower() == "gpu":
        option.use_gpu()
--- a/examples/vision/detection/yolov5/python/serving/README.md
+++ b/examples/vision/detection/yolov5/python/serving/README.md
@@ -0,0 +1 @@
+README_CN.md
--- a/examples/vision/detection/yolov5/python/serving/README_CN.md
+++ b/examples/vision/detection/yolov5/python/serving/README_CN.md
@@ -0,0 +1,36 @@
+简体中文 | [English](README_EN.md)
+
+# YOLOv5 Python轻量服务化部署示例
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. FastDeploy Python whl包安装，参考[FastDeploy Python安装](../../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+服务端：
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/detection/yolov5/python/serving
+
+# 下载模型文件
+wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov5s_infer.tar
+tar xvf yolov5s_infer.tar
+
+# 启动服务，可修改server.py中的配置项来指定硬件、后端等
+# 可通过--host、--port指定IP和端口号
+fastdeploy simple_serving --app server:app
+```
+
+客户端：
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/detection/yolov5/python/serving
+
+# 下载测试图片
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+
+# 请求服务，获取推理结果（如有必要，请修改脚本中的IP和端口号）
+python client.py
+```
--- a/examples/vision/detection/yolov5/python/serving/README_EN.md
+++ b/examples/vision/detection/yolov5/python/serving/README_EN.md
@@ -0,0 +1,36 @@
+English | [简体中文](README_CN.md)
+
+# YOLOv5 Python Simple Serving Demo
+
+
+## Environment
+
+- 1. Prepare environment and install FastDeploy Python whl, refer to [download_prebuilt_libraries](../../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
+
+Server:
+```bash
+# Download demo code
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/detection/yolov5/python/serving
+
+# Download model
+wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov5s_infer.tar
+tar xvf yolov5s_infer.tar
+
+# Launch server, change the configurations in server.py to select hardware, backend, etc.
+# and use --host, --port to specify IP and port
+fastdeploy simple_serving --app server:app
+```
+
+Client:
+```bash
+# Download demo code
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/detection/yolov5/python/serving
+
+# Download test image
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+
+# Send request and get inference result (Please adapt the IP and port if necessary)
+python client.py
+```
--- a/examples/vision/detection/yolov5/python/serving/client.py
+++ b/examples/vision/detection/yolov5/python/serving/client.py
@@ -0,0 +1,23 @@
+import requests
+import json
+import cv2
+import fastdeploy as fd
+from fastdeploy.serving.utils import cv2_to_base64
+
+if __name__ == '__main__':
+    url = "http://127.0.0.1:8000/fd/yolov5s"
+    headers = {"Content-Type": "application/json"}
+
+    im = cv2.imread("000000014439.jpg")
+    data = {"data": {"image": cv2_to_base64(im)}, "parameters": {}}
+
+    resp = requests.post(url=url, headers=headers, data=json.dumps(data))
+    if resp.status_code == 200:
+        r_json = json.loads(resp.json()["result"])
+        det_result = fd.vision.utils.json_to_detection(r_json)
+        vis_im = fd.vision.vis_detection(im, det_result, score_threshold=0.5)
+        cv2.imwrite("visualized_result.jpg", vis_im)
+        print("Visualized result save in ./visualized_result.jpg")
+    else:
+        print("Error code:", resp.status_code)
+        print(resp.text)
--- a/examples/vision/detection/yolov5/python/serving/server.py
+++ b/examples/vision/detection/yolov5/python/serving/server.py
@@ -0,0 +1,38 @@
+import fastdeploy as fd
+from fastdeploy.serving.server import SimpleServer
+import os
+import logging
+
+logging.getLogger().setLevel(logging.INFO)
+
+# Configurations
+model_dir = 'yolov5s_infer'
+device = 'cpu'
+use_trt = False
+
+# Prepare model
+model_file = os.path.join(model_dir, "model.pdmodel")
+params_file = os.path.join(model_dir, "model.pdiparams")
+
+# Setup runtime option to select hardware, backend, etc.
+option = fd.RuntimeOption()
+if device.lower() == 'gpu':
+    option.use_gpu()
+if use_trt:
+    option.use_trt_backend()
+    option.set_trt_input_shape("images", [1, 3, 640, 640])
+    option.set_trt_cache_file('yolov5s.trt')
+
+# Create model instance
+model_instance = fd.vision.detection.YOLOv5(
+    model_file,
+    params_file,
+    runtime_option=option,
+    model_format=fd.ModelFormat.PADDLE)
+
+# Create server, setup REST API
+app = SimpleServer()
+app.register(
+    task_name="fd/yolov5s",
+    model_handler=fd.serving.handler.VisionModelHandler,
+    predictor=model_instance)
--- a/examples/vision/detection/yolov5/rv1126/cpp/CMakeLists.txt
+++ b/examples/vision/detection/yolov5/rv1126/cpp/CMakeLists.txt
@@ -21,17 +21,10 @@ install(TARGETS infer_demo DESTINATION ./)
 install(DIRECTORY models DESTINATION ./)
 install(DIRECTORY images DESTINATION ./)

-file(GLOB FASTDEPLOY_LIBS ${FASTDEPLOY_INSTALL_DIR}/lib/*)
-install(PROGRAMS ${FASTDEPLOY_LIBS} DESTINATION lib)
-
-file(GLOB OPENCV_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/opencv/lib/lib*)
-install(PROGRAMS ${OPENCV_LIBS} DESTINATION lib)
-
-file(GLOB PADDLELITE_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/paddlelite/lib/lib*)
-install(PROGRAMS ${PADDLELITE_LIBS} DESTINATION lib)
-
-file(GLOB TIMVX_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/paddlelite/lib/verisilicon_timvx/*)
-install(PROGRAMS ${TIMVX_LIBS} DESTINATION lib)
+file(GLOB_RECURSE FASTDEPLOY_LIBS ${FASTDEPLOY_INSTALL_DIR}/lib/lib*.so*)
+file(GLOB_RECURSE ALL_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/lib*.so*)
+list(APPEND ALL_LIBS ${FASTDEPLOY_LIBS})
+install(PROGRAMS ${ALL_LIBS} DESTINATION lib)

 file(GLOB ADB_TOOLS run_with_adb.sh)
 install(PROGRAMS ${ADB_TOOLS} DESTINATION ./)
--- a/examples/vision/detection/yolov5/rv1126/cpp/infer.cc
+++ b/examples/vision/detection/yolov5/rv1126/cpp/infer.cc
@@ -23,7 +23,7 @@ void InitAndInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
  auto subgraph_file = model_dir + sep + "subgraph.txt";
-
+  fastdeploy::vision::EnableFlyCV(); 
  fastdeploy::RuntimeOption option;
  option.UseTimVX();
  option.SetLiteSubgraphPartitionPath(subgraph_file);
--- a/examples/vision/detection/yolov5/serving/README.md
+++ b/examples/vision/detection/yolov5/serving/README.md
@@ -20,12 +20,12 @@ mv yolov5s.onnx models/runtime/1/model.onnx

 # 拉取fastdeploy镜像(x.y.z为镜像版本号，需参照serving文档替换为数字)
 # GPU镜像
-docker pull paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10
+docker pull registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10
 # CPU镜像
-docker pull paddlepaddle/fastdeploy:x.y.z-cpu-only-21.10
+docker pull registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-cpu-only-21.10

 # 运行容器.容器名字为 fd_serving, 并挂载当前目录为容器的 /yolov5_serving 目录
-nvidia-docker run -it --net=host --name fd_serving -v `pwd`/:/yolov5_serving paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10  bash
+nvidia-docker run -it --net=host --name fd_serving -v `pwd`/:/yolov5_serving registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10  bash

 # 启动服务(不设置CUDA_VISIBLE_DEVICES环境变量，会拥有所有GPU卡的调度权限)
 CUDA_VISIBLE_DEVICES=0 fastdeployserver --model-repository=/yolov5_serving/models --backend-config=python,shm-default-byte-size=10485760
--- a/examples/vision/detection/yolov6/cpp/README.md
+++ b/examples/vision/detection/yolov6/cpp/README.md
@@ -19,16 +19,16 @@ cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
 make -j

 #下载Paddle模型文件和测试图片
-https://bj.bcebos.com/paddlehub/fastdeploy/yolov6s_infer.tar
+wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov6s_infer.tar
 tar -xf yolov6s_infer.tar
 wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg

 # CPU推理
-./infer_paddle_demo ./../yolov6s_infer 000000014439.jpg 0
+./infer_paddle_demo yolov6s_infer 000000014439.jpg 0
 # GPU推理
-./infer_paddle_demo ./../yolov6s_infer 000000014439.jpg 1
-# XPU推理
-./infer_paddle_demo ./../yolov6s_infer 000000014439.jpg 2
+./infer_paddle_demo yolov6s_infer 000000014439.jpg 1
+# 昆仑芯XPU推理
+./infer_paddle_demo yolov6s_infer 000000014439.jpg 2
 ```

 如果想要验证ONNX模型的推理，可以参考如下命令：
--- a/examples/vision/detection/yolov6/cpp/infer_paddle_model.cc
+++ b/examples/vision/detection/yolov6/cpp/infer_paddle_model.cc
@@ -45,9 +45,9 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

-void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+void KunlunXinInfer(const std::string& model_dir, const std::string& image_file) {
  fastdeploy::RuntimeOption option;
-  option.UseXpu();
+  option.UseKunlunXin();
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
  auto model = fastdeploy::vision::detection::YOLOv6(model_file, params_file, option, fastdeploy::ModelFormat::PADDLE);
@@ -103,7 +103,7 @@ int main(int argc, char* argv[]) {
                 "e.g ./infer_model ./yolov6s_infer ./test.jpeg 0"
              << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu; 2: run with xpu."
+                 "with gpu; 2: run with kunlunxin."
              << std::endl;
    return -1;
  }
@@ -113,7 +113,7 @@ int main(int argc, char* argv[]) {
  } else if (std::atoi(argv[3]) == 1) {
    GpuInfer(argv[1], argv[2]);
  } else if (std::atoi(argv[3]) == 2) {
-    XpuInfer(argv[1], argv[2]);
+    KunlunXinInfer(argv[1], argv[2]);
  }
  return 0;
 }
--- a/examples/vision/detection/yolov6/python/README.md
+++ b/examples/vision/detection/yolov6/python/README.md
@@ -12,7 +12,7 @@
 git clone https://github.com/PaddlePaddle/FastDeploy.git
 cd examples/vision/detection/yolov6/python/

-https://bj.bcebos.com/paddlehub/fastdeploy/yolov6s_infer.tar
+wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov6s_infer.tar
 tar -xf yolov6s_infer.tar
 wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg

@@ -20,8 +20,8 @@ wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/0000000
 python infer_paddle_model.py --model yolov6s_infer --image 000000014439.jpg  --device cpu
 # GPU推理
 python infer_paddle_model.py --model yolov6s_infer --image 000000014439.jpg  --device gpu
-# XPU推理
-python infer_paddle_model.py --model yolov6s_infer --image 000000014439.jpg  --device xpu
+# 昆仑芯XPU推理
+python infer_paddle_model.py --model yolov6s_infer --image 000000014439.jpg  --device kunlunxin
 ```
 如果想要验证ONNX模型的推理，可以参考如下命令：
 ```bash
--- a/examples/vision/detection/yolov6/python/infer.py
+++ b/examples/vision/detection/yolov6/python/infer.py
--- a/examples/vision/detection/yolov6/python/infer_paddle_model.py
+++ b/examples/vision/detection/yolov6/python/infer_paddle_model.py
@@ -16,7 +16,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'cpu', 'xpu' or 'gpu'.")
+        help="Type of inference device, support 'cpu', 'kunlunxin' or 'gpu'.")
    return parser.parse_args()


@@ -25,8 +25,8 @@ def build_option(args):
    if args.device.lower() == "gpu":
        option.use_gpu(0)

-    if args.device.lower() == "xpu":
-        option.use_xpu()
+    if args.device.lower() == "kunlunxin":
+        option.use_kunlunxin()

    return option

--- a/examples/vision/detection/yolov7/cpp/README.md
+++ b/examples/vision/detection/yolov7/cpp/README.md
@@ -26,7 +26,7 @@ wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/0000000
 ./infer_paddle_model_demo yolov7_infer 000000014439.jpg 0
 # GPU推理
 ./infer_paddle_model_demo yolov7_infer 000000014439.jpg 1
-# XPU推理
+# 昆仑芯XPU推理
 ./infer_paddle_model_demo yolov7_infer 000000014439.jpg 2
 ```
 如果想要验证ONNX模型的推理，可以参考如下命令：
--- a/examples/vision/detection/yolov7/cpp/infer_paddle_model.cc
+++ b/examples/vision/detection/yolov7/cpp/infer_paddle_model.cc
@@ -52,7 +52,7 @@ int main(int argc, char* argv[]) {
              << std::endl;
    std::cout << "The data type of run_option is int, 0: run on cpu with ORT "
                 "backend; 1: run "
-                 "on gpu with TensorRT backend ; 2: run with xpu. "
+                 "on gpu with TensorRT backend ; 2: run with kunlunxin. "
              << std::endl;
    return -1;
  }
@@ -67,7 +67,7 @@ int main(int argc, char* argv[]) {
    option.UseGpu();
    option.UseTrtBackend();
  } else if (flag == 2) {
-    option.UseXpu();
+    option.UseKunlunXin();
    }

  std::string model_dir = argv[1];
--- a/examples/vision/detection/yolov7/python/README.md
+++ b/examples/vision/detection/yolov7/python/README.md
@@ -22,8 +22,8 @@ wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/0000000
 python infer_paddle_model.py --model yolov7_infer --image 000000014439.jpg --device cpu
 # GPU推理
 python infer_paddle_model.py --model yolov7_infer --image 000000014439.jpg --device gpu
-# XPU推理
-python infer_paddle_model.py --model yolov7_infer --image 000000014439.jpg --device xpu
+# 昆仑芯XPU推理
+python infer_paddle_model.py --model yolov7_infer --image 000000014439.jpg --device kunlunxin
 ```
 如果想要验证ONNX模型的推理，可以参考如下命令：
 ```bash
--- a/examples/vision/detection/yolov7/python/README_EN.md
+++ b/examples/vision/detection/yolov7/python/README_EN.md
@@ -22,8 +22,8 @@ wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/0000000
 python infer_paddle_model.py --model yolov7_infer --image 000000014439.jpg --device cpu
 # GPU
 python infer_paddle_model.py --model yolov7_infer --image 000000014439.jpg --device gpu
-# XPU
-python infer_paddle_model.py --model yolov7_infer --image 000000014439.jpg --device xpu
+# KunlunXin XPU
+python infer_paddle_model.py --model yolov7_infer --image 000000014439.jpg --device kunlunxin
 ```
 If you want to test ONNX model:
 ```bash
--- a/examples/vision/detection/yolov7/python/infer.py
+++ b/examples/vision/detection/yolov7/python/infer.py
--- a/examples/vision/detection/yolov7/python/infer_paddle_model.py
+++ b/examples/vision/detection/yolov7/python/infer_paddle_model.py
@@ -16,7 +16,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'cpu', 'xpu' or 'gpu'.")
+        help="Type of inference device, support 'cpu', 'kunlunxin' or 'gpu'.")
    return parser.parse_args()


@@ -25,8 +25,8 @@ def build_option(args):
    if args.device.lower() == "gpu":
        option.use_gpu(0)

-    if args.device.lower() == "xpu":
-        option.use_xpu()
+    if args.device.lower() == "kunlunxin":
+        option.use_kunlunxin()

    return option

--- a/examples/vision/faceid/README.md
+++ b/examples/vision/faceid/README.md
@@ -1,6 +1,8 @@
 # 人脸识别模型


+## 模型支持列表
+
 FastDeploy目前支持如下人脸识别模型部署

 | 模型                                     | 说明             | 模型格式       | 版本                                                                            |
@@ -10,3 +12,7 @@ FastDeploy目前支持如下人脸识别模型部署
 | [deepinsight/PartialFC](./insightface) | PartialFC 系列模型 | ONNX       | [CommitID:babb9a5](https://github.com/deepinsight/insightface/commit/babb9a5) |
 | [deepinsight/VPL](./insightface)       | VPL 系列模型       | ONNX       | [CommitID:babb9a5](https://github.com/deepinsight/insightface/commit/babb9a5) |
 | [paddleclas/AdaFace](./adaface)        | AdaFace 系列模型   | PADDLE     | [CommitID:babb9a5](https://github.com/PaddlePaddle/PaddleClas/tree/v2.4.0)    |
+
+## 模型demo简介
+
+ArcFace,CosFace,PartialFC,VPL同属于deepinsight系列，因此demo使用ONNX作为推理框架。AdaFace则采用PaddleInference作为推理框架。
--- a/examples/vision/faceid/adaface/cpp/CMakeLists.txt
+++ b/examples/vision/faceid/adaface/cpp/CMakeLists.txt
@@ -1,4 +1,4 @@
-PROJECT(infer_demo C CXX)
+PROJECT(infer_adaface_demo C CXX)
 CMAKE_MINIMUM_REQUIRED (VERSION 3.12)

 # 指定下载解压后的fastdeploy库路径
@@ -9,5 +9,5 @@ include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
 # 添加FastDeploy依赖头文件
 include_directories(${FASTDEPLOY_INCS})

-add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
-target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
+add_executable(infer_adaface_demo ${PROJECT_SOURCE_DIR}/infer.cc)
+target_link_libraries(infer_adaface_demo ${FASTDEPLOY_LIBS})
--- a/examples/vision/faceid/adaface/cpp/README.md
+++ b/examples/vision/faceid/adaface/cpp/README.md
@@ -11,53 +11,40 @@
 以Linux上CPU推理为例，在本目录执行如下命令即可完成编译测试，支持此模型需保证FastDeploy版本0.7.0以上(x.x.x>=0.7.0)

 ```bash
+# “如果预编译库不包含本模型，请从最新代码编译SDK”
 mkdir build
 cd build
-# 下载FastDeploy预编译库，用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
 wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
 tar xvf fastdeploy-linux-x64-x.x.x.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
 make -j

 #下载测试图片
-wget https://bj.bcebos.com/paddlehub/test_samples/test_lite_focal_arcface_0.JPG
-wget https://bj.bcebos.com/paddlehub/test_samples/test_lite_focal_arcface_1.JPG
-wget https://bj.bcebos.com/paddlehub/test_samples/test_lite_focal_arcface_2.JPG
+wget https://bj.bcebos.com/paddlehub/fastdeploy/rknpu2/face_demo.zip
+unzip face_demo.zip

 # 如果为Paddle模型，运行以下代码
 wget https://bj.bcebos.com/paddlehub/fastdeploy/mobilefacenet_adaface.tgz
 tar zxvf mobilefacenet_adaface.tgz -C ./
 # CPU推理
-./infer_demo mobilefacenet_adaface/mobilefacenet_adaface.pdmodel \
+./infer_adaface_demo mobilefacenet_adaface/mobilefacenet_adaface.pdmodel \
              mobilefacenet_adaface/mobilefacenet_adaface.pdiparams \
-              test_lite_focal_arcface_0.JPG \
-              test_lite_focal_arcface_1.JPG \
-              test_lite_focal_arcface_2.JPG \
-              0
+              face_0.jpg face_1.jpg face_2.jpg 0

 # GPU推理
-./infer_demo mobilefacenet_adaface/mobilefacenet_adaface.pdmodel \
+./infer_adaface_demo mobilefacenet_adaface/mobilefacenet_adaface.pdmodel \
              mobilefacenet_adaface/mobilefacenet_adaface.pdiparams \
-              test_lite_focal_arcface_0.JPG \
-              test_lite_focal_arcface_1.JPG \
-              test_lite_focal_arcface_2.JPG \
-              1
+              face_0.jpg face_1.jpg face_2.jpg 1

 # GPU上TensorRT推理
-./infer_demo mobilefacenet_adaface/mobilefacenet_adaface.pdmodel \
+./infer_adaface_demo mobilefacenet_adaface/mobilefacenet_adaface.pdmodel \
              mobilefacenet_adaface/mobilefacenet_adaface.pdiparams \
-              test_lite_focal_arcface_0.JPG \
-              test_lite_focal_arcface_1.JPG \
-              test_lite_focal_arcface_2.JPG \
-              2
+              face_0.jpg face_1.jpg face_2.jpg 2

-# XPU推理
+# 昆仑芯XPU推理
 ./infer_demo mobilefacenet_adaface/mobilefacenet_adaface.pdmodel \
              mobilefacenet_adaface/mobilefacenet_adaface.pdiparams \
-              test_lite_focal_arcface_0.JPG \
-              test_lite_focal_arcface_1.JPG \
-              test_lite_focal_arcface_2.JPG \
-              3
+              face_0.jpg face_1.jpg face_2.jpg 3
 ```

 运行完成可视化结果如下图所示
@@ -101,16 +88,22 @@ AdaFace模型加载和初始化，如果使用PaddleInference推理，model_file
 > > * **im**: 输入图像，注意需为HWC，BGR格式
 > > * **result**: 检测结果，包括检测框，各个框的置信度, FaceRecognitionResult说明参考[视觉模型预测结果](../../../../../docs/api/vision_results/)

-### 类成员变量
-#### 预处理参数
-用户可按照自己的实际需求，修改下列预处理参数，从而影响最终的推理和部署效果
+### 修改预处理以及后处理的参数
+预处理和后处理的参数的需要通过修改AdaFacePostprocessor，AdaFacePreprocessor的成员变量来进行修改。

+#### AdaFacePreprocessor成员变量(预处理参数)
+> > * **size**(vector&lt;int&gt;): 通过此参数修改预处理过程中resize的大小，包含两个整型元素，表示[width, height], 默认值为[112, 112],
+      通过AdaFacePreprocessor::SetSize(std::vector<int>& size)来进行修改
+> > * **alpha**(vector&lt;float&gt;): 预处理归一化的alpha值，计算公式为`x'=x*alpha+beta`，alpha默认为[1. / 127.5, 1.f / 127.5, 1. / 127.5],
+      通过AdaFacePreprocessor::SetAlpha(std::vector<float>& alpha)来进行修改
+> > * **beta**(vector&lt;float&gt;): 预处理归一化的beta值，计算公式为`x'=x*alpha+beta`，beta默认为[-1.f, -1.f, -1.f],
+      通过AdaFacePreprocessor::SetBeta(std::vector<float>& beta)来进行修改
+> > * **permute**(bool): 预处理是否将BGR转换成RGB，默认true,
+      通过AdaFacePreprocessor::SetPermute(bool permute)来进行修改

-> > * **size**(vector&lt;int&gt;): 通过此参数修改预处理过程中resize的大小，包含两个整型元素，表示[width, height], 默认值为[112, 112]
-> > * **alpha**(vector&lt;float&gt;): 预处理归一化的alpha值，计算公式为`x'=x*alpha+beta`，alpha默认为[1. / 127.5, 1.f / 127.5, 1. / 127.5]
-> > * **beta**(vector&lt;float&gt;): 预处理归一化的beta值，计算公式为`x'=x*alpha+beta`，beta默认为[-1.f, -1.f, -1.f]
-> > * **swap_rb**(bool): 预处理是否将BGR转换成RGB，默认true
-> > * **l2_normalize**(bool): 输出人脸向量之前是否执行l2归一化，默认false
+#### AdaFacePostprocessor成员变量(后处理参数)
+> > * **l2_normalize**(bool): 输出人脸向量之前是否执行l2归一化，默认false,
+      AdaFacePostprocessor::SetL2Normalize(bool& l2_normalize)来进行修改

 - [模型介绍](../../)
 - [Python部署](../python)
--- a/examples/vision/faceid/adaface/cpp/infer.cc
+++ b/examples/vision/faceid/adaface/cpp/infer.cc
@@ -1,14 +1,17 @@
-/***************************************************************************
- *
- * Copyright (c) 2021 Baidu.com, Inc. All Rights Reserved
- *
- **************************************************************************/
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.

-/**
- * @author Baidu
- * @brief demo_image_inference
- *
- **/
 #include "fastdeploy/vision.h"

 void CpuInfer(const std::string &model_file, const std::string &params_file,
@@ -28,8 +31,8 @@ void CpuInfer(const std::string &model_file, const std::string &params_file,
  fastdeploy::vision::FaceRecognitionResult res1;
  fastdeploy::vision::FaceRecognitionResult res2;

-  if ((!model.Predict(&face0, &res0)) || (!model.Predict(&face1, &res1)) ||
-      (!model.Predict(&face2, &res2))) {
+  if ((!model.Predict(face0, &res0)) || (!model.Predict(face1, &res1)) ||
+      (!model.Predict(face2, &res2))) {
    std::cerr << "Prediction Failed." << std::endl;
  }

@@ -40,17 +43,19 @@ void CpuInfer(const std::string &model_file, const std::string &params_file,
  std::cout << "--- [Face 2]:" << res2.Str();

  float cosine01 = fastdeploy::vision::utils::CosineSimilarity(
-      res0.embedding, res1.embedding, model.l2_normalize);
+      res0.embedding, res1.embedding,
+      model.GetPostprocessor().GetL2Normalize());
  float cosine02 = fastdeploy::vision::utils::CosineSimilarity(
-      res0.embedding, res2.embedding, model.l2_normalize);
+      res0.embedding, res2.embedding,
+      model.GetPostprocessor().GetL2Normalize());
  std::cout << "Detect Done! Cosine 01: " << cosine01
            << ", Cosine 02:" << cosine02 << std::endl;
 }

-void XpuInfer(const std::string &model_file, const std::string &params_file,
+void KunlunXinInfer(const std::string &model_file, const std::string &params_file,
              const std::vector<std::string> &image_file) {
  auto option = fastdeploy::RuntimeOption();
-  option.UseXpu();
+  option.UseKunlunXin();
  auto model = fastdeploy::vision::faceid::AdaFace(model_file, params_file);
  if (!model.Initialized()) {
    std::cerr << "Failed to initialize." << std::endl;
@@ -65,8 +70,8 @@ void XpuInfer(const std::string &model_file, const std::string &params_file,
  fastdeploy::vision::FaceRecognitionResult res1;
  fastdeploy::vision::FaceRecognitionResult res2;

-  if ((!model.Predict(&face0, &res0)) || (!model.Predict(&face1, &res1)) ||
-      (!model.Predict(&face2, &res2))) {
+  if ((!model.Predict(face0, &res0)) || (!model.Predict(face1, &res1)) ||
+      (!model.Predict(face2, &res2))) {
    std::cerr << "Prediction Failed." << std::endl;
  }

@@ -77,9 +82,11 @@ void XpuInfer(const std::string &model_file, const std::string &params_file,
  std::cout << "--- [Face 2]:" << res2.Str();

  float cosine01 = fastdeploy::vision::utils::CosineSimilarity(
-      res0.embedding, res1.embedding, model.l2_normalize);
+      res0.embedding, res1.embedding,
+      model.GetPostprocessor().GetL2Normalize());
  float cosine02 = fastdeploy::vision::utils::CosineSimilarity(
-      res0.embedding, res2.embedding, model.l2_normalize);
+      res0.embedding, res2.embedding,
+      model.GetPostprocessor().GetL2Normalize());
  std::cout << "Detect Done! Cosine 01: " << cosine01
            << ", Cosine 02:" << cosine02 << std::endl;
 }
@@ -103,8 +110,8 @@ void GpuInfer(const std::string &model_file, const std::string &params_file,
  fastdeploy::vision::FaceRecognitionResult res1;
  fastdeploy::vision::FaceRecognitionResult res2;

-  if ((!model.Predict(&face0, &res0)) || (!model.Predict(&face1, &res1)) ||
-      (!model.Predict(&face2, &res2))) {
+  if ((!model.Predict(face0, &res0)) || (!model.Predict(face1, &res1)) ||
+      (!model.Predict(face2, &res2))) {
    std::cerr << "Prediction Failed." << std::endl;
  }

@@ -115,9 +122,11 @@ void GpuInfer(const std::string &model_file, const std::string &params_file,
  std::cout << "--- [Face 2]:" << res2.Str();

  float cosine01 = fastdeploy::vision::utils::CosineSimilarity(
-      res0.embedding, res1.embedding, model.l2_normalize);
+      res0.embedding, res1.embedding,
+      model.GetPostprocessor().GetL2Normalize());
  float cosine02 = fastdeploy::vision::utils::CosineSimilarity(
-      res0.embedding, res2.embedding, model.l2_normalize);
+      res0.embedding, res2.embedding,
+      model.GetPostprocessor().GetL2Normalize());
  std::cout << "Detect Done! Cosine 01: " << cosine01
            << ", Cosine 02:" << cosine02 << std::endl;
 }
@@ -143,8 +152,8 @@ void TrtInfer(const std::string &model_file, const std::string &params_file,
  fastdeploy::vision::FaceRecognitionResult res1;
  fastdeploy::vision::FaceRecognitionResult res2;

-  if ((!model.Predict(&face0, &res0)) || (!model.Predict(&face1, &res1)) ||
-      (!model.Predict(&face2, &res2))) {
+  if ((!model.Predict(face0, &res0)) || (!model.Predict(face1, &res1)) ||
+      (!model.Predict(face2, &res2))) {
    std::cerr << "Prediction Failed." << std::endl;
  }

@@ -155,9 +164,11 @@ void TrtInfer(const std::string &model_file, const std::string &params_file,
  std::cout << "--- [Face 2]:" << res2.Str();

  float cosine01 = fastdeploy::vision::utils::CosineSimilarity(
-      res0.embedding, res1.embedding, model.l2_normalize);
+      res0.embedding, res1.embedding,
+      model.GetPostprocessor().GetL2Normalize());
  float cosine02 = fastdeploy::vision::utils::CosineSimilarity(
-      res0.embedding, res2.embedding, model.l2_normalize);
+      res0.embedding, res2.embedding,
+      model.GetPostprocessor().GetL2Normalize());
  std::cout << "Detect Done! Cosine 01: " << cosine01
            << ", Cosine 02:" << cosine02 << std::endl;
 }
@@ -171,7 +182,7 @@ int main(int argc, char *argv[]) {
                 "test_lite_focal_AdaFace_2.JPG 0"
              << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run with xpu."
+                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run with kunlunxin."
              << std::endl;
    return -1;
  }
@@ -185,7 +196,7 @@ int main(int argc, char *argv[]) {
  } else if (std::atoi(argv[6]) == 2) {
    TrtInfer(argv[1], argv[2], image_files);
  } else if (std::atoi(argv[6]) == 3) {
-    CpuInfer(argv[1], argv[2], image_files);
+    KunlunXinInfer(argv[1], argv[2], image_files);
  }
  return 0;
 }
--- a/examples/vision/faceid/adaface/python/README.md
+++ b/examples/vision/faceid/adaface/python/README.md
@@ -15,9 +15,8 @@ cd examples/vision/faceid/adaface/python/

 #下载AdaFace模型文件和测试图片
 #下载测试图片
-wget https://bj.bcebos.com/paddlehub/test_samples/test_lite_focal_arcface_0.JPG
-wget https://bj.bcebos.com/paddlehub/test_samples/test_lite_focal_arcface_1.JPG
-wget https://bj.bcebos.com/paddlehub/test_samples/test_lite_focal_arcface_2.JPG
+wget https://bj.bcebos.com/paddlehub/fastdeploy/rknpu2/face_demo.zip
+unzip face_demo.zip

 # 如果为Paddle模型，运行以下代码
 wget https://bj.bcebos.com/paddlehub/fastdeploy/mobilefacenet_adaface.tgz
@@ -26,33 +25,33 @@ tar zxvf mobilefacenet_adaface.tgz -C ./
 # CPU推理
 python infer.py --model mobilefacenet_adaface/mobilefacenet_adaface.pdmodel \
                --params_file mobilefacenet_adaface/mobilefacenet_adaface.pdiparams \
-                --face test_lite_focal_arcface_0.JPG \
-                --face_positive test_lite_focal_arcface_1.JPG \
-                --face_negative test_lite_focal_arcface_2.JPG \
+                --face face_0.jpg \
+                --face_positive face_1.jpg \
+                --face_negative face_2.jpg \
                --device cpu
 # GPU推理
 python infer.py --model mobilefacenet_adaface/mobilefacenet_adaface.pdmodel \
                --params_file mobilefacenet_adaface/mobilefacenet_adaface.pdiparams \
-                --face test_lite_focal_arcface_0.JPG \
-                --face_positive test_lite_focal_arcface_1.JPG \
-                --face_negative test_lite_focal_arcface_2.JPG \
+                --face face_0.jpg \
+                --face_positive face_1.jpg \
+                --face_negative face_2.jpg \
                --device gpu
 # GPU上使用TensorRT推理
 python infer.py --model mobilefacenet_adaface/mobilefacenet_adaface.pdmodel \
                --params_file mobilefacenet_adaface/mobilefacenet_adaface.pdiparams \
-                --face test_lite_focal_arcface_0.JPG \
-                --face_positive test_lite_focal_arcface_1.JPG \
-                --face_negative test_lite_focal_arcface_2.JPG \
-                 --device gpu \
-                 --use_trt True
+                --face face_0.jpg \
+                --face_positive face_1.jpg \
+                --face_negative face_2.jpg \
+                --device gpu \
+                --use_trt True

-# XPU推理
+# 昆仑芯XPU推理
 python infer.py --model mobilefacenet_adaface/mobilefacenet_adaface.pdmodel \
                --params_file mobilefacenet_adaface/mobilefacenet_adaface.pdiparams \
                --face test_lite_focal_arcface_0.JPG \
                --face_positive test_lite_focal_arcface_1.JPG \
                --face_negative test_lite_focal_arcface_2.JPG \
-                 --device xpu
+                 --device kunlunxin
 ```

 运行完成可视化结果如下图所示
@@ -106,11 +105,15 @@ AdaFace模型加载和初始化，其中model_file为导出的ONNX模型格式
 #### 预处理参数
 用户可按照自己的实际需求，修改下列预处理参数，从而影响最终的推理和部署效果

-
+#### AdaFacePreprocessor的成员变量
+以下变量为AdaFacePreprocessor的成员变量
 > > * **size**(list[int]): 通过此参数修改预处理过程中resize的大小，包含两个整型元素，表示[width, height], 默认值为[112, 112]
 > > * **alpha**(list[float]): 预处理归一化的alpha值，计算公式为`x'=x*alpha+beta`，alpha默认为[1. / 127.5, 1.f / 127.5, 1. / 127.5]
 > > * **beta**(list[float]): 预处理归一化的beta值，计算公式为`x'=x*alpha+beta`，beta默认为[-1.f, -1.f, -1.f]
 > > * **swap_rb**(bool): 预处理是否将BGR转换成RGB，默认True
+
+#### AdaFacePostprocessor的成员变量
+以下变量为AdaFacePostprocessor的成员变量
 > > * **l2_normalize**(bool): 输出人脸向量之前是否执行l2归一化，默认False


--- a/examples/vision/faceid/adaface/python/infer.py
+++ b/examples/vision/faceid/adaface/python/infer.py
@@ -39,7 +39,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'cpu' , 'xpu' or 'gpu'.")
+        help="Type of inference device, support 'cpu' , 'kunlunxin' or 'gpu'.")
    parser.add_argument(
        "--use_trt",
        type=ast.literal_eval,
@@ -54,8 +54,8 @@ def build_option(args):
    if args.device.lower() == "gpu":
        option.use_gpu()

-    if args.device.lower() == "xpu":
-        option.use_xpu()
+    if args.device.lower() == "kunlunxin":
+        option.use_kunlunxin()

    if args.use_trt:
        option.use_trt_backend()
--- a/examples/vision/faceid/insightface/cpp/README.md
+++ b/examples/vision/faceid/insightface/cpp/README.md
@@ -7,12 +7,11 @@
 - 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
 - 2. 根据开发环境，下载预编译部署库和samples代码，参考[FastDeploy预编译库](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)

-以Linux上CPU推理为例，在本目录执行如下命令即可完成编译测试，支持此模型需保证FastDeploy版本0.7.0以上(x.x.x>=0.7.0)
+以Linux上CPU推理为例，在本目录执行如下命令即可完成编译测试

 ```bash
 mkdir build
 cd build
-# 下载FastDeploy预编译库，用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
 wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
 tar xvf fastdeploy-linux-x64-x.x.x.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
@@ -20,17 +19,15 @@ make -j

 #下载官方转换好的ArcFace模型文件和测试图片
 wget https://bj.bcebos.com/paddlehub/fastdeploy/ms1mv3_arcface_r100.onnx
-wget https://bj.bcebos.com/paddlehub/test_samples/test_lite_focal_arcface_0.JPG
-wget https://bj.bcebos.com/paddlehub/test_samples/test_lite_focal_arcface_1.JPG
-wget https://bj.bcebos.com/paddlehub/test_samples/test_lite_focal_arcface_2.JPG
-
+wget https://bj.bcebos.com/paddlehub/fastdeploy/rknpu2/face_demo.zip
+unzip face_demo.zip

 # CPU推理
-./infer_arcface_demo ms1mv3_arcface_r100.onnx test_lite_focal_arcface_0.JPG test_lite_focal_arcface_1.JPG test_lite_focal_arcface_2.JPG 0
+./infer_arcface_demo ms1mv3_arcface_r100.onnx face_0.jpg face_1.jpg face_2.jpg 0
 # GPU推理
-./infer_arcface_demo ms1mv3_arcface_r100.onnx test_lite_focal_arcface_0.JPG test_lite_focal_arcface_1.JPG test_lite_focal_arcface_2.JPG 1
+./infer_arcface_demo ms1mv3_arcface_r100.onnx face_0.jpg face_1.jpg face_2.jpg 1
 # GPU上TensorRT推理
-./infer_arcface_demo ms1mv3_arcface_r100.onnx test_lite_focal_arcface_0.JPG test_lite_focal_arcface_1.JPG test_lite_focal_arcface_2.JPG 2
+./infer_arcface_demo ms1mv3_arcface_r100.onnx face_0.jpg face_1.jpg face_2.jpg 2
 ```

 运行完成可视化结果如下图所示
@@ -113,16 +110,22 @@ VPL模型加载和初始化，其中model_file为导出的ONNX模型格式。
 > > * **im**: 输入图像，注意需为HWC，BGR格式
 > > * **result**: 检测结果，包括检测框，各个框的置信度, FaceRecognitionResult说明参考[视觉模型预测结果](../../../../../docs/api/vision_results/)

-### 类成员变量
-#### 预处理参数
-用户可按照自己的实际需求，修改下列预处理参数，从而影响最终的推理和部署效果
+### 修改预处理以及后处理的参数
+预处理和后处理的参数的需要通过修改InsightFaceRecognitionPostprocessor，InsightFaceRecognitionPreprocessor的成员变量来进行修改。

+#### InsightFaceRecognitionPreprocessor成员变量(预处理参数)
+> > * **size**(vector&lt;int&gt;): 通过此参数修改预处理过程中resize的大小，包含两个整型元素，表示[width, height], 默认值为[112, 112],
+      通过InsightFaceRecognitionPreprocessor::SetSize(std::vector<int>& size)来进行修改
+> > * **alpha**(vector&lt;float&gt;): 预处理归一化的alpha值，计算公式为`x'=x*alpha+beta`，alpha默认为[1. / 127.5, 1.f / 127.5, 1. / 127.5],
+      通过InsightFaceRecognitionPreprocessor::SetAlpha(std::vector<float>& alpha)来进行修改
+> > * **beta**(vector&lt;float&gt;): 预处理归一化的beta值，计算公式为`x'=x*alpha+beta`，beta默认为[-1.f, -1.f, -1.f],
+      通过InsightFaceRecognitionPreprocessor::SetBeta(std::vector<float>& beta)来进行修改
+> > * **permute**(bool): 预处理是否将BGR转换成RGB，默认true,
+      通过InsightFaceRecognitionPreprocessor::SetPermute(bool permute)来进行修改

-> > * **size**(vector&lt;int&gt;): 通过此参数修改预处理过程中resize的大小，包含两个整型元素，表示[width, height], 默认值为[112, 112]
-> > * **alpha**(vector&lt;float&gt;): 预处理归一化的alpha值，计算公式为`x'=x*alpha+beta`，alpha默认为[1. / 127.5, 1.f / 127.5, 1. / 127.5]
-> > * **beta**(vector&lt;float&gt;): 预处理归一化的beta值，计算公式为`x'=x*alpha+beta`，beta默认为[-1.f, -1.f, -1.f]
-> > * **swap_rb**(bool): 预处理是否将BGR转换成RGB，默认true
-> > * **l2_normalize**(bool): 输出人脸向量之前是否执行l2归一化，默认false
+#### InsightFaceRecognitionPostprocessor成员变量(后处理参数)
+> > * **l2_normalize**(bool): 输出人脸向量之前是否执行l2归一化，默认false,
+      InsightFaceRecognitionPostprocessor::SetL2Normalize(bool& l2_normalize)来进行修改

 - [模型介绍](../../)
 - [Python部署](../python)
--- a/examples/vision/faceid/insightface/cpp/infer_arcface.cc
+++ b/examples/vision/faceid/insightface/cpp/infer_arcface.cc
@@ -16,11 +16,7 @@

 void CpuInfer(const std::string& model_file,
              const std::vector<std::string>& image_file) {
-  auto model = fastdeploy::vision::faceid::ArcFace(model_file);
-  if (!model.Initialized()) {
-    std::cerr << "Failed to initialize." << std::endl;
-    return;
-  }
+  auto model = fastdeploy::vision::faceid::ArcFace(model_file, "");

  cv::Mat face0 = cv::imread(image_file[0]);
  cv::Mat face1 = cv::imread(image_file[1]);
@@ -30,8 +26,8 @@ void CpuInfer(const std::string& model_file,
  fastdeploy::vision::FaceRecognitionResult res1;
  fastdeploy::vision::FaceRecognitionResult res2;

-  if ((!model.Predict(&face0, &res0)) || (!model.Predict(&face1, &res1)) ||
-      (!model.Predict(&face2, &res2))) {
+  if ((!model.Predict(face0, &res0)) || (!model.Predict(face1, &res1)) ||
+      (!model.Predict(face2, &res2))) {
    std::cerr << "Prediction Failed." << std::endl;
  }

@@ -42,9 +38,11 @@ void CpuInfer(const std::string& model_file,
  std::cout << "--- [Face 2]:" << res2.Str();

  float cosine01 = fastdeploy::vision::utils::CosineSimilarity(
-      res0.embedding, res1.embedding, model.l2_normalize);
+      res0.embedding, res1.embedding,
+      model.GetPostprocessor().GetL2Normalize());
  float cosine02 = fastdeploy::vision::utils::CosineSimilarity(
-      res0.embedding, res2.embedding, model.l2_normalize);
+      res0.embedding, res2.embedding,
+      model.GetPostprocessor().GetL2Normalize());
  std::cout << "Detect Done! Cosine 01: " << cosine01
            << ", Cosine 02:" << cosine02 << std::endl;
 }
@@ -67,8 +65,8 @@ void GpuInfer(const std::string& model_file,
  fastdeploy::vision::FaceRecognitionResult res1;
  fastdeploy::vision::FaceRecognitionResult res2;

-  if ((!model.Predict(&face0, &res0)) || (!model.Predict(&face1, &res1)) ||
-      (!model.Predict(&face2, &res2))) {
+  if ((!model.Predict(face0, &res0)) || (!model.Predict(face1, &res1)) ||
+      (!model.Predict(face2, &res2))) {
    std::cerr << "Prediction Failed." << std::endl;
  }

@@ -79,9 +77,11 @@ void GpuInfer(const std::string& model_file,
  std::cout << "--- [Face 2]:" << res2.Str();

  float cosine01 = fastdeploy::vision::utils::CosineSimilarity(
-      res0.embedding, res1.embedding, model.l2_normalize);
+      res0.embedding, res1.embedding,
+      model.GetPostprocessor().GetL2Normalize());
  float cosine02 = fastdeploy::vision::utils::CosineSimilarity(
-      res0.embedding, res2.embedding, model.l2_normalize);
+      res0.embedding, res2.embedding,
+      model.GetPostprocessor().GetL2Normalize());
  std::cout << "Detect Done! Cosine 01: " << cosine01
            << ", Cosine 02:" << cosine02 << std::endl;
 }
@@ -106,8 +106,8 @@ void TrtInfer(const std::string& model_file,
  fastdeploy::vision::FaceRecognitionResult res1;
  fastdeploy::vision::FaceRecognitionResult res2;

-  if ((!model.Predict(&face0, &res0)) || (!model.Predict(&face1, &res1)) ||
-      (!model.Predict(&face2, &res2))) {
+  if ((!model.Predict(face0, &res0)) || (!model.Predict(face1, &res1)) ||
+      (!model.Predict(face2, &res2))) {
    std::cerr << "Prediction Failed." << std::endl;
  }

@@ -118,9 +118,11 @@ void TrtInfer(const std::string& model_file,
  std::cout << "--- [Face 2]:" << res2.Str();

  float cosine01 = fastdeploy::vision::utils::CosineSimilarity(
-      res0.embedding, res1.embedding, model.l2_normalize);
+      res0.embedding, res1.embedding,
+      model.GetPostprocessor().GetL2Normalize());
  float cosine02 = fastdeploy::vision::utils::CosineSimilarity(
-      res0.embedding, res2.embedding, model.l2_normalize);
+      res0.embedding, res2.embedding,
+      model.GetPostprocessor().GetL2Normalize());
  std::cout << "Detect Done! Cosine 01: " << cosine01
            << ", Cosine 02:" << cosine02 << std::endl;
 }
@@ -129,8 +131,7 @@ int main(int argc, char* argv[]) {
  if (argc < 6) {
    std::cout << "Usage: infer_demo path/to/model path/to/image run_option, "
                 "e.g ./infer_arcface_demo ms1mv3_arcface_r100.onnx "
-                 "test_lite_focal_arcface_0.JPG test_lite_focal_arcface_1.JPG "
-                 "test_lite_focal_arcface_2.JPG 0"
+                 "face_0.jpg face_1.jpg face_2.jpg 0"
              << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
                 "with gpu; 2: run with gpu and use tensorrt backend."
--- a/examples/vision/faceid/insightface/cpp/infer_cosface.cc
+++ b/examples/vision/faceid/insightface/cpp/infer_cosface.cc
@@ -16,11 +16,7 @@

 void CpuInfer(const std::string& model_file,
              const std::vector<std::string>& image_file) {
-  auto model = fastdeploy::vision::faceid::CosFace(model_file);
-  if (!model.Initialized()) {
-    std::cerr << "Failed to initialize." << std::endl;
-    return;
-  }
+  auto model = fastdeploy::vision::faceid::CosFace(model_file, "");

  cv::Mat face0 = cv::imread(image_file[0]);
  cv::Mat face1 = cv::imread(image_file[1]);
@@ -30,8 +26,8 @@ void CpuInfer(const std::string& model_file,
  fastdeploy::vision::FaceRecognitionResult res1;
  fastdeploy::vision::FaceRecognitionResult res2;

-  if ((!model.Predict(&face0, &res0)) || (!model.Predict(&face1, &res1)) ||
-      (!model.Predict(&face2, &res2))) {
+  if ((!model.Predict(face0, &res0)) || (!model.Predict(face1, &res1)) ||
+      (!model.Predict(face2, &res2))) {
    std::cerr << "Prediction Failed." << std::endl;
  }

@@ -42,9 +38,11 @@ void CpuInfer(const std::string& model_file,
  std::cout << "--- [Face 2]:" << res2.Str();

  float cosine01 = fastdeploy::vision::utils::CosineSimilarity(
-      res0.embedding, res1.embedding, model.l2_normalize);
+      res0.embedding, res1.embedding,
+      model.GetPostprocessor().GetL2Normalize());
  float cosine02 = fastdeploy::vision::utils::CosineSimilarity(
-      res0.embedding, res2.embedding, model.l2_normalize);
+      res0.embedding, res2.embedding,
+      model.GetPostprocessor().GetL2Normalize());
  std::cout << "Detect Done! Cosine 01: " << cosine01
            << ", Cosine 02:" << cosine02 << std::endl;
 }
@@ -67,8 +65,8 @@ void GpuInfer(const std::string& model_file,
  fastdeploy::vision::FaceRecognitionResult res1;
  fastdeploy::vision::FaceRecognitionResult res2;

-  if ((!model.Predict(&face0, &res0)) || (!model.Predict(&face1, &res1)) ||
-      (!model.Predict(&face2, &res2))) {
+  if ((!model.Predict(face0, &res0)) || (!model.Predict(face1, &res1)) ||
+      (!model.Predict(face2, &res2))) {
    std::cerr << "Prediction Failed." << std::endl;
  }

@@ -79,9 +77,11 @@ void GpuInfer(const std::string& model_file,
  std::cout << "--- [Face 2]:" << res2.Str();

  float cosine01 = fastdeploy::vision::utils::CosineSimilarity(
-      res0.embedding, res1.embedding, model.l2_normalize);
+      res0.embedding, res1.embedding,
+      model.GetPostprocessor().GetL2Normalize());
  float cosine02 = fastdeploy::vision::utils::CosineSimilarity(
-      res0.embedding, res2.embedding, model.l2_normalize);
+      res0.embedding, res2.embedding,
+      model.GetPostprocessor().GetL2Normalize());
  std::cout << "Detect Done! Cosine 01: " << cosine01
            << ", Cosine 02:" << cosine02 << std::endl;
 }
@@ -106,8 +106,8 @@ void TrtInfer(const std::string& model_file,
  fastdeploy::vision::FaceRecognitionResult res1;
  fastdeploy::vision::FaceRecognitionResult res2;

-  if ((!model.Predict(&face0, &res0)) || (!model.Predict(&face1, &res1)) ||
-      (!model.Predict(&face2, &res2))) {
+  if ((!model.Predict(face0, &res0)) || (!model.Predict(face1, &res1)) ||
+      (!model.Predict(face2, &res2))) {
    std::cerr << "Prediction Failed." << std::endl;
  }

@@ -118,9 +118,11 @@ void TrtInfer(const std::string& model_file,
  std::cout << "--- [Face 2]:" << res2.Str();

  float cosine01 = fastdeploy::vision::utils::CosineSimilarity(
-      res0.embedding, res1.embedding, model.l2_normalize);
+      res0.embedding, res1.embedding,
+      model.GetPostprocessor().GetL2Normalize());
  float cosine02 = fastdeploy::vision::utils::CosineSimilarity(
-      res0.embedding, res2.embedding, model.l2_normalize);
+      res0.embedding, res2.embedding,
+      model.GetPostprocessor().GetL2Normalize());
  std::cout << "Detect Done! Cosine 01: " << cosine01
            << ", Cosine 02:" << cosine02 << std::endl;
 }
@@ -128,9 +130,8 @@ void TrtInfer(const std::string& model_file,
 int main(int argc, char* argv[]) {
  if (argc < 6) {
    std::cout << "Usage: infer_demo path/to/model path/to/image run_option, "
-                 "e.g ./infer_arcface_demo ms1mv3_arcface_r100.onnx "
-                 "test_lite_focal_arcface_0.JPG test_lite_focal_arcface_1.JPG "
-                 "test_lite_focal_arcface_2.JPG 0"
+                 "e.g ./infer_cosface_demo ms1mv3_cosface_r100.onnx "
+                 "face_0.jpg face_1.jpg face_2.jpg 0"
              << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
                 "with gpu; 2: run with gpu and use tensorrt backend."
--- a/Show More
+++ b/Show More