From 576b2e3cb0806fd8750cc94571cc8939754340dc Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Tue, 7 Feb 2023 09:06:06 +0000
Subject: [PATCH 01/41] Update PaddleSeg example directory

---
 docs/api/vision_results/README_CN.md          |   3 +
 docs/api/vision_results/faq_CN.md             |  25 ++++
 .../vision_results/segmentation_result_CN.md  |   2 +
 docs/cn/build_and_install/a311d.md            |  10 ++
 docs/cn/build_and_install/rv1126.md           |  10 ++
 .../segmentation/paddleseg/README_CN.md       |  60 +++-----
 .../segmentation/paddleseg/a311d/README_CN.md |  12 --
 .../paddleseg/{ => amlogic}/a311d/README.md   |   4 +-
 .../paddleseg/amlogic/a311d/README_CN.md      |  20 +++
 .../{ => amlogic}/a311d/cpp/CMakeLists.txt    |   0
 .../{ => amlogic}/a311d/cpp/README.md         |   0
 .../{ => amlogic}/a311d/cpp/README_CN.md      |  20 +--
 .../{ => amlogic}/a311d/cpp/infer.cc          |   6 +-
 .../{ => amlogic}/a311d/cpp/run_with_adb.sh   |   0
 .../paddleseg/cpu-gpu/README_CN.md            |  48 +++++++
 .../{ => cpu-gpu}/cpp/CMakeLists.txt          |   0
 .../paddleseg/{ => cpu-gpu}/cpp/README.md     |   8 +-
 .../paddleseg/cpu-gpu/cpp/README_CN.md        | 106 ++++++++++++++
 .../paddleseg/cpu-gpu/cpp/infer.cc            | 131 ++++++++++++++++++
 .../paddleseg/{ => cpu-gpu}/python/README.md  |   6 +-
 .../paddleseg/cpu-gpu/python/README_CN.md     |  88 ++++++++++++
 .../paddleseg/cpu-gpu/python/infer.py         |  57 ++++++++
 .../paddleseg/kunlun/README_CN.md             |  48 +++++++
 .../paddleseg/kunlun/cpp/CMakeLists.txt       |  14 ++
 .../paddleseg/kunlun/cpp/README.md            |  96 +++++++++++++
 .../paddleseg/{ => kunlun}/cpp/README_CN.md   |   0
 .../paddleseg/{ => kunlun}/cpp/infer.cc       |   6 +-
 .../paddleseg/kunlun/python/README.md         |  82 +++++++++++
 .../{ => kunlun}/python/README_CN.md          |   0
 .../paddleseg/{ => kunlun}/python/infer.py    |   0
 .../{ => kunlun}/python/serving/README.md     |   0
 .../{ => kunlun}/python/serving/README_CN.md  |   0
 .../{ => kunlun}/python/serving/client.py     |   0
 .../{ => kunlun}/python/serving/server.py     |   0
 .../paddleseg/quantize/README_CN.md           |  39 ++----
 .../paddleseg/{ => rockchip}/rknpu2/README.md |   0
 .../{ => rockchip}/rknpu2/README_CN.md        |   0
 .../{ => rockchip}/rknpu2/cpp/CMakeLists.txt  |   2 +-
 .../{ => rockchip}/rknpu2/cpp/README.md       |   2 +-
 .../{ => rockchip}/rknpu2/cpp/README_CN.md    |   0
 .../{ => rockchip}/rknpu2/cpp/infer.cc        |  17 +--
 .../{ => rockchip}/rknpu2/pp_humanseg.md      |   2 +-
 .../{ => rockchip}/rknpu2/pp_humanseg_EN.md   |   2 +-
 .../{ => rockchip}/rknpu2/python/README.md    |   0
 .../{ => rockchip}/rknpu2/python/README_CN.md |   0
 .../{ => rockchip}/rknpu2/python/infer.py     |   0
 .../paddleseg/{ => rockchip}/rv1126/README.md |   2 +-
 .../{ => rockchip}/rv1126/README_CN.md        |   0
 .../{ => rockchip}/rv1126/cpp/CMakeLists.txt  |   0
 .../{ => rockchip}/rv1126/cpp/README.md       |   0
 .../{ => rockchip}/rv1126/cpp/README_CN.md    |   0
 .../{ => rockchip}/rv1126/cpp/infer.cc        |   6 +-
 .../{ => rockchip}/rv1126/cpp/run_with_adb.sh |   0
 .../paddleseg/serving/README_CN.md            |  71 +---------
 .../deploy.yaml                               |   0
 .../{ => fastdeploy_serving}/README.md        |   0
 .../serving/fastdeploy_serving/README_CN.md   |  86 ++++++++++++
 .../models/paddleseg/1/README.md              |   0
 .../models/paddleseg/config.pbtxt             |   0
 .../models/postprocess/1/model.py             |   0
 .../models/postprocess/config.pbtxt           |   0
 .../models/preprocess/1/model.py              |   0
 .../models/preprocess/config.pbtxt            |   0
 .../models/runtime/1/README.md                |   0
 .../models/runtime/config.pbtxt               |   0
 .../paddleseg_grpc_client.py                  |   0
 .../serving/simple_serving/README.md          |  36 +++++
 .../serving/simple_serving/README_CN.md       |  32 +++++
 .../serving/simple_serving/client.py          |  23 +++
 .../serving/simple_serving/server.py          |  38 +++++
 .../segmentation/paddleseg/sophgo/README.md   |   9 +-
 .../paddleseg/sophgo/README_CN.md             |   9 +-
 72 files changed, 1050 insertions(+), 188 deletions(-)
 create mode 100644 docs/api/vision_results/faq_CN.md
 delete mode 100644 examples/vision/segmentation/paddleseg/a311d/README_CN.md
 rename examples/vision/segmentation/paddleseg/{ => amlogic}/a311d/README.md (78%)
 create mode 100644 examples/vision/segmentation/paddleseg/amlogic/a311d/README_CN.md
 rename examples/vision/segmentation/paddleseg/{ => amlogic}/a311d/cpp/CMakeLists.txt (100%)
 rename examples/vision/segmentation/paddleseg/{ => amlogic}/a311d/cpp/README.md (100%)
 rename examples/vision/segmentation/paddleseg/{ => amlogic}/a311d/cpp/README_CN.md (70%)
 rename examples/vision/segmentation/paddleseg/{ => amlogic}/a311d/cpp/infer.cc (95%)
 mode change 100755 => 100644
 rename examples/vision/segmentation/paddleseg/{ => amlogic}/a311d/cpp/run_with_adb.sh (100%)
 create mode 100644 examples/vision/segmentation/paddleseg/cpu-gpu/README_CN.md
 rename examples/vision/segmentation/paddleseg/{ => cpu-gpu}/cpp/CMakeLists.txt (100%)
 rename examples/vision/segmentation/paddleseg/{ => cpu-gpu}/cpp/README.md (98%)
 create mode 100644 examples/vision/segmentation/paddleseg/cpu-gpu/cpp/README_CN.md
 create mode 100644 examples/vision/segmentation/paddleseg/cpu-gpu/cpp/infer.cc
 rename examples/vision/segmentation/paddleseg/{ => cpu-gpu}/python/README.md (97%)
 create mode 100644 examples/vision/segmentation/paddleseg/cpu-gpu/python/README_CN.md
 create mode 100755 examples/vision/segmentation/paddleseg/cpu-gpu/python/infer.py
 create mode 100644 examples/vision/segmentation/paddleseg/kunlun/README_CN.md
 create mode 100644 examples/vision/segmentation/paddleseg/kunlun/cpp/CMakeLists.txt
 create mode 100755 examples/vision/segmentation/paddleseg/kunlun/cpp/README.md
 rename examples/vision/segmentation/paddleseg/{ => kunlun}/cpp/README_CN.md (100%)
 rename examples/vision/segmentation/paddleseg/{ => kunlun}/cpp/infer.cc (97%)
 mode change 100755 => 100644
 create mode 100755 examples/vision/segmentation/paddleseg/kunlun/python/README.md
 rename examples/vision/segmentation/paddleseg/{ => kunlun}/python/README_CN.md (100%)
 rename examples/vision/segmentation/paddleseg/{ => kunlun}/python/infer.py (100%)
 rename examples/vision/segmentation/paddleseg/{ => kunlun}/python/serving/README.md (100%)
 rename examples/vision/segmentation/paddleseg/{ => kunlun}/python/serving/README_CN.md (100%)
 rename examples/vision/segmentation/paddleseg/{ => kunlun}/python/serving/client.py (100%)
 rename examples/vision/segmentation/paddleseg/{ => kunlun}/python/serving/server.py (100%)
 rename examples/vision/segmentation/paddleseg/{ => rockchip}/rknpu2/README.md (100%)
 rename examples/vision/segmentation/paddleseg/{ => rockchip}/rknpu2/README_CN.md (100%)
 rename examples/vision/segmentation/paddleseg/{ => rockchip}/rknpu2/cpp/CMakeLists.txt (96%)
 rename examples/vision/segmentation/paddleseg/{ => rockchip}/rknpu2/cpp/README.md (99%)
 rename examples/vision/segmentation/paddleseg/{ => rockchip}/rknpu2/cpp/README_CN.md (100%)
 rename examples/vision/segmentation/paddleseg/{ => rockchip}/rknpu2/cpp/infer.cc (87%)
 rename examples/vision/segmentation/paddleseg/{ => rockchip}/rknpu2/pp_humanseg.md (99%)
 rename examples/vision/segmentation/paddleseg/{ => rockchip}/rknpu2/pp_humanseg_EN.md (99%)
 rename examples/vision/segmentation/paddleseg/{ => rockchip}/rknpu2/python/README.md (100%)
 rename examples/vision/segmentation/paddleseg/{ => rockchip}/rknpu2/python/README_CN.md (100%)
 rename examples/vision/segmentation/paddleseg/{ => rockchip}/rknpu2/python/infer.py (100%)
 rename examples/vision/segmentation/paddleseg/{ => rockchip}/rv1126/README.md (92%)
 rename examples/vision/segmentation/paddleseg/{ => rockchip}/rv1126/README_CN.md (100%)
 rename examples/vision/segmentation/paddleseg/{ => rockchip}/rv1126/cpp/CMakeLists.txt (100%)
 rename examples/vision/segmentation/paddleseg/{ => rockchip}/rv1126/cpp/README.md (100%)
 rename examples/vision/segmentation/paddleseg/{ => rockchip}/rv1126/cpp/README_CN.md (100%)
 rename examples/vision/segmentation/paddleseg/{ => rockchip}/rv1126/cpp/infer.cc (95%)
 mode change 100755 => 100644
 rename examples/vision/segmentation/paddleseg/{ => rockchip}/rv1126/cpp/run_with_adb.sh (100%)
 rename examples/vision/segmentation/paddleseg/serving/{models/preprocess/1 => fastdeploy_serving/PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer}/deploy.yaml (100%)
 rename examples/vision/segmentation/paddleseg/serving/{ => fastdeploy_serving}/README.md (100%)
 create mode 100644 examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/README_CN.md
 rename examples/vision/segmentation/paddleseg/serving/{ => fastdeploy_serving}/models/paddleseg/1/README.md (100%)
 rename examples/vision/segmentation/paddleseg/serving/{ => fastdeploy_serving}/models/paddleseg/config.pbtxt (100%)
 rename examples/vision/segmentation/paddleseg/serving/{ => fastdeploy_serving}/models/postprocess/1/model.py (100%)
 rename examples/vision/segmentation/paddleseg/serving/{ => fastdeploy_serving}/models/postprocess/config.pbtxt (100%)
 rename examples/vision/segmentation/paddleseg/serving/{ => fastdeploy_serving}/models/preprocess/1/model.py (100%)
 rename examples/vision/segmentation/paddleseg/serving/{ => fastdeploy_serving}/models/preprocess/config.pbtxt (100%)
 rename examples/vision/segmentation/paddleseg/serving/{ => fastdeploy_serving}/models/runtime/1/README.md (100%)
 rename examples/vision/segmentation/paddleseg/serving/{ => fastdeploy_serving}/models/runtime/config.pbtxt (100%)
 rename examples/vision/segmentation/paddleseg/serving/{ => fastdeploy_serving}/paddleseg_grpc_client.py (100%)
 create mode 100644 examples/vision/segmentation/paddleseg/serving/simple_serving/README.md
 create mode 100644 examples/vision/segmentation/paddleseg/serving/simple_serving/README_CN.md
 create mode 100644 examples/vision/segmentation/paddleseg/serving/simple_serving/client.py
 create mode 100644 examples/vision/segmentation/paddleseg/serving/simple_serving/server.py

diff --git a/docs/api/vision_results/README_CN.md b/docs/api/vision_results/README_CN.md
index 94efce21e..a6ad2a808 100755
--- a/docs/api/vision_results/README_CN.md
+++ b/docs/api/vision_results/README_CN.md
@@ -16,3 +16,6 @@ FastDeploy根据视觉模型的任务类型，定义了不同的结构体(`fastd
 | OCRResult               | [C++/Python文档](./ocr_result.md)               | 文本框检测，分类和文本识别返回结果 | OCR系列模型等                |
 | MOTResult               | [C++/Python文档](./mot_result.md)               | 多目标跟踪返回结果         | pptracking系列模型等         |
 | HeadPoseResult               | [C++/Python文档](./headpose_result.md)               | 头部姿态估计返回结果         | FSANet系列模型等         |
+
+## 常见问题
+- [如何将视觉模型预测结果转换为numpy格式](./faq_CN.md)
diff --git a/docs/api/vision_results/faq_CN.md b/docs/api/vision_results/faq_CN.md
new file mode 100644
index 000000000..59ce781d8
--- /dev/null
+++ b/docs/api/vision_results/faq_CN.md
@@ -0,0 +1,25 @@
+[English](faq.md)| 简体中文
+# 视觉模型预测结果常见问题
+
+## 将视觉模型预测结果转换为numpy格式
+
+这里以[SegmentationResult](./segmentation_result_CN.md)为例，展示如何抽取SegmentationResult中的label_map或者score_map来转为numpy格式，同时也可以利用已有数据new SegmentationResult结构体
+```
+import fastdeploy as fd
+import cv2
+import numpy as np
+
+model = fd.vision.segmentation.PaddleSegModel(
+    model_file, params_file, config_file)
+im = cv2.imread(image)
+result = model.predict(im)
+# convert label_map and score_map to numpy format
+numpy_label_map = np.array(result.label_map)
+numpy_score_map = np.array(result.score_map)
+
+# create SegmentationResult object
+result = fd.C.vision.SegmentationResult()
+result.label_map = numpy_label_map.tolist()
+result.score_map = numpy_score_map.tolist()
+```
+>> **注意**: 以上为示例代码，具体请参考[PaddleSeg example](../../../examples/vision/segmentation/paddleseg/)
diff --git a/docs/api/vision_results/segmentation_result_CN.md b/docs/api/vision_results/segmentation_result_CN.md
index 5b393c6ea..0b3e22ee1 100644
--- a/docs/api/vision_results/segmentation_result_CN.md
+++ b/docs/api/vision_results/segmentation_result_CN.md
@@ -14,6 +14,7 @@ struct SegmentationResult {
   std::vector<int64_t> shape;
   bool contain_score_map = false;
   void Clear();
+  void Free();
   std::string Str();
 };
 ```
@@ -22,6 +23,7 @@ struct SegmentationResult {
 - **score_map**: 成员变量，与label_map一一对应的所预测的分割类别概率值(当导出模型时指定`--output_op argmax`)或者经过softmax归一化化后的概率值(当导出模型时指定`--output_op softmax`或者导出模型时指定`--output_op none`同时模型初始化的时候设置模型[类成员属性](../../../examples/vision/segmentation/paddleseg/cpp/)`apply_softmax=True`)
 - **shape**: 成员变量，表示输出图片的shape，为H\*W
 - **Clear()**: 成员函数，用于清除结构体中存储的结果
+- **Free()**: 成员函数，用于清除结构体中存储的结果并释放内存
 - **Str()**: 成员函数，将结构体中的信息以字符串形式输出（用于Debug）
 
 ## Python 定义
diff --git a/docs/cn/build_and_install/a311d.md b/docs/cn/build_and_install/a311d.md
index 766be6954..7066ca9d5 100755
--- a/docs/cn/build_and_install/a311d.md
+++ b/docs/cn/build_and_install/a311d.md
@@ -2,6 +2,16 @@
 
 # 晶晨 A311D 部署环境编译安装
 
+## 导航目录
+
+* [简介以及编译选项](#简介以及编译选项)
+* [交叉编译环境搭建](#交叉编译环境搭建)
+* [基于 Paddle Lite 的 FastDeploy 交叉编译库编译](#基于-paddle-lite-的-fastdeploy-交叉编译库编译)
+* [准备设备运行环境](#准备设备运行环境)
+* [基于 FastDeploy 在 A311D 上的部署示例](#基于-fastdeploy-在-a311d-上的部署示例)
+
+## 简介以及编译选项
+
 FastDeploy 基于 Paddle Lite 后端支持在晶晨 NPU 上进行部署推理。
 更多详细的信息请参考：[Paddle Lite部署示例](https://www.paddlepaddle.org.cn/lite/develop/demo_guides/verisilicon_timvx.html)。
 
diff --git a/docs/cn/build_and_install/rv1126.md b/docs/cn/build_and_install/rv1126.md
index 159ac00e4..090393e1d 100755
--- a/docs/cn/build_and_install/rv1126.md
+++ b/docs/cn/build_and_install/rv1126.md
@@ -2,6 +2,16 @@
 
 # 瑞芯微 RV1126 部署环境编译安装
 
+## 导航目录
+
+* [简介以及编译选项](#简介以及编译选项)
+* [交叉编译环境搭建](#交叉编译环境搭建)
+* [基于 Paddle Lite 的 FastDeploy 交叉编译库编译](#基于-paddle-lite-的-fastdeploy-交叉编译库编译)
+* [准备设备运行环境](#准备设备运行环境)
+* [基于 FastDeploy 在 RV1126 上的部署示例](#基于-fastdeploy-在-rv1126-上的部署示例)
+
+## 简介以及编译选项
+
 FastDeploy基于 Paddle Lite 后端支持在瑞芯微（Rockchip）Soc 上进行部署推理。
 更多详细的信息请参考：[Paddle Lite部署示例](https://www.paddlepaddle.org.cn/lite/develop/demo_guides/verisilicon_timvx.html)。
 
diff --git a/examples/vision/segmentation/paddleseg/README_CN.md b/examples/vision/segmentation/paddleseg/README_CN.md
index 0b0cda349..28bdce086 100644
--- a/examples/vision/segmentation/paddleseg/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/README_CN.md
@@ -1,47 +1,23 @@
-# PaddleSeg 模型部署
+# 使用FastDeploy部署PaddleSeg模型
 
-## 模型版本说明
+## FastDeploy介绍
 
-- [PaddleSeg develop](https://github.com/PaddlePaddle/PaddleSeg/tree/develop)
+FastDeploy是一款全场景、易用灵活、极致高效的AI推理部署工具，使用FastDeploy可以简单高效的在10+款硬件上对PaddleSeg模型进行快速部署
 
-目前FastDeploy支持如下模型的部署 
+## 详细文档
 
-- [U-Net系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/unet/README.md)
-- [PP-LiteSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/pp_liteseg/README.md)
-- [PP-HumanSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/contrib/PP-HumanSeg/README.md)
-- [FCN系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/fcn/README.md)
-- [DeepLabV3系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/deeplabv3/README.md)
+- [NVIDIA GPU、X86 CPU、飞腾CPU、ARM CPU](cpu-gpu)
+- [昆仑](kunlun)
+- [升腾](ascend)
+- [瑞芯微](rockchip)
+- [晶晨](amlogic)
+- [算能](sophgo)
+- [Android ARM CPU部署](android)
+- [服务化Serving部署](serving)
+- [模型自动化压缩工具](quantize)
+- [web部署](web)
 
-【注意】如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../matting)
-
-## 准备PaddleSeg部署模型
-
-PaddleSeg模型导出，请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)  
-
-**注意**
-- PaddleSeg导出的模型包含`model.pdmodel`、`model.pdiparams`和`deploy.yaml`三个文件，FastDeploy会从yaml文件中获取模型在推理时需要的预处理信息
-
-## 下载预训练模型
-
-为了方便开发者的测试，下面提供了PaddleSeg导出的部分模型
-- without-argmax导出方式为：**不指定**`--input_shape`，**指定**`--output_op none`
-- with-argmax导出方式为：**不指定**`--input_shape`，**指定**`--output_op argmax`
-
-开发者可直接下载使用。
-
-| 模型                                                               | 参数文件大小    |输入Shape |  mIoU | mIoU (flip) | mIoU (ms+flip) |
-|:---------------------------------------------------------------- |:----- |:----- | :----- | :----- | :----- |
-| [Unet-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_with_argmax_infer.tgz) \| [Unet-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz)  | 52MB | 1024x512 | 65.00% | 66.02% | 66.89% |
-| [PP-LiteSeg-B(STDC2)-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz) \| [PP-LiteSeg-B(STDC2)-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz) | 31MB  | 1024x512 | 79.04% |	79.52% | 79.85% |
-|[PP-HumanSegV1-Lite-with-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV1_Lite_with_argmax_infer.tgz) \| [PP-HumanSegV1-Lite-without-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Lite_infer.tgz) |  543KB | 192x192 | 86.2% | - | - |
-|[PP-HumanSegV2-Lite-with-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Lite_192x192_with_argmax_infer.tgz) \| [PP-HumanSegV2-Lite-without-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Lite_192x192_infer.tgz) |  12MB | 192x192 | 92.52% | - | - |
-| [PP-HumanSegV2-Mobile-with-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Mobile_192x192_with_argmax_infer.tgz) \| [PP-HumanSegV2-Mobile-without-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Mobile_192x192_infer.tgz) |  29MB | 192x192 | 93.13% | - | - |
-|[PP-HumanSegV1-Server-with-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Server_with_argmax_infer.tgz) \| [PP-HumanSegV1-Server-without-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Server_infer.tgz) |  103MB | 512x512 | 96.47% | - | - |
-| [Portait-PP-HumanSegV2-Lite-with-argmax(肖像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV2_Lite_256x144_with_argmax_infer.tgz) \| [Portait-PP-HumanSegV2-Lite-without-argmax(肖像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV2_Lite_256x144_infer.tgz) |  3.6M | 256x144 | 96.63% | - | - |
-| [FCN-HRNet-W18-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/FCN_HRNet_W18_cityscapes_with_argmax_infer.tgz) \| [FCN-HRNet-W18-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/FCN_HRNet_W18_cityscapes_without_argmax_infer.tgz)(暂时不支持ONNXRuntime的GPU推理) |  37MB | 1024x512 | 78.97% | 79.49% | 79.74% |
-| [Deeplabv3-ResNet101-OS8-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Deeplabv3_ResNet101_OS8_cityscapes_with_argmax_infer.tgz) \| [Deeplabv3-ResNet101-OS8-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Deeplabv3_ResNet101_OS8_cityscapes_without_argmax_infer.tgz) |  150MB | 1024x512 | 79.90% | 80.22% | 80.47% |
-
-## 详细部署文档
-
-- [Python部署](python)
-- [C++部署](cpp)
+## 常见问题
+遇到问题可查看常见问题集合文档或搜索 FastDeploy issues，链接如下。若都无法解决，欢迎给 FastDeploy 提交新的issue
+[常见问题集合](https://github.com/PaddlePaddle/FastDeploy/tree/develop/docs/cn/faq)
+[FastDeploy issues](https://github.com/PaddlePaddle/FastDeploy/issues)
diff --git a/examples/vision/segmentation/paddleseg/a311d/README_CN.md b/examples/vision/segmentation/paddleseg/a311d/README_CN.md
deleted file mode 100644
index dad4f3924..000000000
--- a/examples/vision/segmentation/paddleseg/a311d/README_CN.md
+++ /dev/null
@@ -1,12 +0,0 @@
-[English](README.md) | 简体中文
-# PP-LiteSeg 量化模型在 A311D 上的部署
-目前 FastDeploy 已经支持基于 Paddle Lite 部署 PP-LiteSeg 量化模型到 A311D 上。
-
-模型的量化和量化模型的下载请参考：[模型量化](../quantize/README.md)
-
-
-## 详细部署文档
-
-在 A311D 上只支持 C++ 的部署。
-
-- [C++部署](cpp)
diff --git a/examples/vision/segmentation/paddleseg/a311d/README.md b/examples/vision/segmentation/paddleseg/amlogic/a311d/README.md
similarity index 78%
rename from examples/vision/segmentation/paddleseg/a311d/README.md
rename to examples/vision/segmentation/paddleseg/amlogic/a311d/README.md
index 07870aa59..db3e18110 100755
--- a/examples/vision/segmentation/paddleseg/a311d/README.md
+++ b/examples/vision/segmentation/paddleseg/amlogic/a311d/README.md
@@ -1,6 +1,6 @@
 English | [简体中文](README_CN.md)
-# Deployment of PP-LiteSeg Quantification Model on A311D 
-Now FastDeploy allows deploying PP-LiteSeg quantization model to A311D based on Paddle Lite. 
+# Deployment of PP-LiteSeg Quantification Model on A311D
+Now FastDeploy allows deploying PP-LiteSeg quantization model to A311D based on Paddle Lite.
 
 For model quantization and download of quantized models, refer to [Model Quantization](../quantize/README.md)
 
diff --git a/examples/vision/segmentation/paddleseg/amlogic/a311d/README_CN.md b/examples/vision/segmentation/paddleseg/amlogic/a311d/README_CN.md
new file mode 100644
index 000000000..ccb999450
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/amlogic/a311d/README_CN.md
@@ -0,0 +1,20 @@
+[English](README.md) | 简体中文
+# 在晶晨A311D上使用FastDeploy部署PaddleSeg模型
+晶晨A311D是一款先进的AI应用处理器。目前，FastDeploy支持在A311D上基于Paddle-Lite部署PaddleSeg相关模型
+
+## 晶晨A311D支持的PaddleSeg模型
+由于晶晨A311D的NPU仅支持INT8量化模型的部署，因此所支持的量化模型如下：
+- [PP-LiteSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/pp_liteseg/README.md)
+
+为了方便开发者的测试，下面提供了PaddleSeg导出的部分模型，开发者可直接下载使用。
+
+| 模型                              | 参数文件大小    |输入Shape |  mIoU | mIoU (flip) | mIoU (ms+flip) |
+|:---------------------------------------------------------------- |:----- |:----- | :----- | :----- | :----- |
+| [PP-LiteSeg-T(STDC1)-cityscapes-without-argmax](https://bj.bcebos.com/fastdeploy/models/rk1/ppliteseg.tar.gz)| 31MB  | 1024x512 | 77.04% | 77.73% | 77.46% |
+>> **注意**: FastDeploy模型量化的方法及一键自动化压缩工具可以参考[模型量化](../../../quantize/README.md)
+
+## 详细部署文档
+
+目前，A311D上只支持C++的部署。
+
+- [C++部署](cpp)
diff --git a/examples/vision/segmentation/paddleseg/a311d/cpp/CMakeLists.txt b/examples/vision/segmentation/paddleseg/amlogic/a311d/cpp/CMakeLists.txt
similarity index 100%
rename from examples/vision/segmentation/paddleseg/a311d/cpp/CMakeLists.txt
rename to examples/vision/segmentation/paddleseg/amlogic/a311d/cpp/CMakeLists.txt
diff --git a/examples/vision/segmentation/paddleseg/a311d/cpp/README.md b/examples/vision/segmentation/paddleseg/amlogic/a311d/cpp/README.md
similarity index 100%
rename from examples/vision/segmentation/paddleseg/a311d/cpp/README.md
rename to examples/vision/segmentation/paddleseg/amlogic/a311d/cpp/README.md
diff --git a/examples/vision/segmentation/paddleseg/a311d/cpp/README_CN.md b/examples/vision/segmentation/paddleseg/amlogic/a311d/cpp/README_CN.md
similarity index 70%
rename from examples/vision/segmentation/paddleseg/a311d/cpp/README_CN.md
rename to examples/vision/segmentation/paddleseg/amlogic/a311d/cpp/README_CN.md
index a9528e940..4780a9110 100644
--- a/examples/vision/segmentation/paddleseg/a311d/cpp/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/amlogic/a311d/cpp/README_CN.md
@@ -1,31 +1,31 @@
 [English](README.md) | 简体中文
 # PP-LiteSeg 量化模型 C++ 部署示例
 
-本目录下提供的 `infer.cc`，可以帮助用户快速完成 PP-LiteSeg 量化模型在 A311D 上的部署推理加速。
+本目录下提供的 `infer.cc`，可以帮助用户快速完成 PP-LiteSeg 量化模型在 晶晨A311D 上的部署推理加速。
 
 ## 部署准备
 ### FastDeploy 交叉编译环境准备
-1. 软硬件环境满足要求，以及交叉编译环境的准备，请参考：[FastDeploy 交叉编译环境准备](../../../../../../docs/cn/build_and_install/a311d.md#交叉编译环境搭建)  
+1. 软硬件环境满足要求，以及交叉编译环境的准备，请参考：[FastDeploy 交叉编译环境准备](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/a311d.md#交叉编译环境搭建)  
 
 ### 模型准备
 1. 用户可以直接使用由 FastDeploy 提供的量化模型进行部署。
 2. 用户可以使用 FastDeploy 提供的一键模型自动化压缩工具,自行进行模型量化, 并使用产出的量化模型进行部署.(注意: 推理量化后的分类模型仍然需要FP32模型文件夹下的 deploy.yaml 文件, 自行量化的模型文件夹内不包含此 yaml 文件, 用户从FP32模型文件夹下复制此yaml文件到量化后的模型文件夹内即可.)
-3. 模型需要异构计算，异构计算文件可以参考：[异构计算](./../../../../../../docs/cn/faq/heterogeneous_computing_on_timvx_npu.md)，由于 FastDeploy 已经提供了模型，可以先测试我们提供的异构文件，验证精度是否符合要求。
+3. 模型需要异构计算，异构计算文件可以参考：[异构计算](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/heterogeneous_computing_on_timvx_npu.md)，由于 FastDeploy 已经提供了模型，可以先测试我们提供的异构文件，验证精度是否符合要求。
 
-更多量化相关相关信息可查阅[模型量化](../../quantize/README.md)
+更多量化相关相关信息可查阅[模型量化](../../../quantize/README.md)
 
 ## 在 A311D 上部署量化后的 PP-LiteSeg 分割模型
 请按照以下步骤完成在 A311D 上部署 PP-LiteSeg 量化模型：
-1. 交叉编译编译 FastDeploy 库，具体请参考：[交叉编译 FastDeploy](../../../../../../docs/cn/build_and_install/a311d.md#基于-paddle-lite-的-fastdeploy-交叉编译库编译)
+1. 交叉编译编译 FastDeploy 库，具体请参考：[交叉编译 FastDeploy](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/a311d.md#基于-paddle-lite-的-fastdeploy-交叉编译库编译)
 
 2. 将编译后的库拷贝到当前目录，可使用如下命令：
 ```bash
-cp -r FastDeploy/build/fastdeploy-timvx/ FastDeploy/examples/vision/segmentation/paddleseg/a311d/cpp
+cp -r FastDeploy/build/fastdeploy-timvx/ FastDeploy/examples/vision/segmentation/paddleseg/amlogic/a311d/cpp
 ```
 
 3. 在当前路径下载部署所需的模型和示例图片：
 ```bash
-cd FastDeploy/examples/vision/segmentation/paddleseg/a311d/cpp
+cd FastDeploy/examples/vision/segmentation/paddleseg/amlogic/a311d/cpp
 mkdir models && mkdir images
 wget https://bj.bcebos.com/fastdeploy/models/rk1/ppliteseg.tar.gz
 tar -xvf ppliteseg.tar.gz
@@ -36,7 +36,7 @@ cp -r cityscapes_demo.png images
 
 4. 编译部署示例，可使入如下命令：
 ```bash
-cd FastDeploy/examples/vision/segmentation/paddleseg/a311d/cpp
+cd FastDeploy/examples/vision/segmentation/paddleseg/amlogic/a311d/cpp
 mkdir build && cd build
 cmake -DCMAKE_TOOLCHAIN_FILE=${PWD}/../fastdeploy-timvx/toolchain.cmake -DFASTDEPLOY_INSTALL_DIR=${PWD}/../fastdeploy-timvx -DTARGET_ABI=arm64 ..
 make -j8
@@ -47,7 +47,7 @@ make install
 5. 基于 adb 工具部署 PP-LiteSeg 分割模型到晶晨 A311D，可使用如下命令：
 ```bash
 # 进入 install 目录
-cd FastDeploy/examples/vision/segmentation/paddleseg/a311d/cpp/build/install/
+cd FastDeploy/examples/vision/segmentation/paddleseg/amlogic/a311d/cpp/build/install/
 # 如下命令表示：bash run_with_adb.sh 需要运行的demo 模型路径 图片路径 设备的DEVICE_ID
 bash run_with_adb.sh infer_demo ppliteseg cityscapes_demo.png $DEVICE_ID
 ```
@@ -56,4 +56,4 @@ bash run_with_adb.sh infer_demo ppliteseg cityscapes_demo.png $DEVICE_ID
 
 <img width="640" src="https://user-images.githubusercontent.com/30516196/205544166-9b2719ff-ed82-4908-b90a-095de47392e1.png">
 
-需要特别注意的是，在 A311D 上部署的模型需要是量化后的模型，模型的量化请参考：[模型量化](../../../../../../docs/cn/quantize.md)
+需要特别注意的是，在 A311D 上部署的模型需要是量化后的模型，模型的量化请参考：[模型量化](../../../quantize/README.md)
diff --git a/examples/vision/segmentation/paddleseg/a311d/cpp/infer.cc b/examples/vision/segmentation/paddleseg/amlogic/a311d/cpp/infer.cc
old mode 100755
new mode 100644
similarity index 95%
rename from examples/vision/segmentation/paddleseg/a311d/cpp/infer.cc
rename to examples/vision/segmentation/paddleseg/amlogic/a311d/cpp/infer.cc
index 38f5f7168..4d2eb3f29
--- a/examples/vision/segmentation/paddleseg/a311d/cpp/infer.cc
+++ b/examples/vision/segmentation/paddleseg/amlogic/a311d/cpp/infer.cc
@@ -24,13 +24,13 @@ void InitAndInfer(const std::string& model_dir, const std::string& image_file) {
   auto params_file = model_dir + sep + "model.pdiparams";
   auto config_file = model_dir + sep + "deploy.yaml";
   auto subgraph_file = model_dir + sep + "subgraph.txt";
-  fastdeploy::vision::EnableFlyCV(); 
+  fastdeploy::vision::EnableFlyCV();
   fastdeploy::RuntimeOption option;
   option.UseTimVX();
   option.SetLiteSubgraphPartitionPath(subgraph_file);
-  
+
   auto model = fastdeploy::vision::segmentation::PaddleSegModel(
-      model_file, params_file, config_file,option);
+      model_file, params_file, config_file, option);
 
   assert(model.Initialized());
 
diff --git a/examples/vision/segmentation/paddleseg/a311d/cpp/run_with_adb.sh b/examples/vision/segmentation/paddleseg/amlogic/a311d/cpp/run_with_adb.sh
similarity index 100%
rename from examples/vision/segmentation/paddleseg/a311d/cpp/run_with_adb.sh
rename to examples/vision/segmentation/paddleseg/amlogic/a311d/cpp/run_with_adb.sh
diff --git a/examples/vision/segmentation/paddleseg/cpu-gpu/README_CN.md b/examples/vision/segmentation/paddleseg/cpu-gpu/README_CN.md
new file mode 100644
index 000000000..a4ff4af5e
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/cpu-gpu/README_CN.md
@@ -0,0 +1,48 @@
+# 使用FastDeploy部署PaddleSeg模型
+
+## 模型版本说明
+
+- [PaddleSeg develop](https://github.com/PaddlePaddle/PaddleSeg/tree/develop)
+
+目前FastDeploy支持如下模型的部署
+
+- [U-Net系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/unet/README.md)
+- [PP-LiteSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/pp_liteseg/README.md)
+- [PP-HumanSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/contrib/PP-HumanSeg/README.md)
+- [FCN系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/fcn/README.md)
+- [DeepLabV3系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/deeplabv3/README.md)
+- [SegFormer系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/segformer/README.md)
+
+【注意】如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../matting/)
+
+## 准备PaddleSeg部署模型
+PaddleSeg模型导出，请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)  
+
+**注意**
+- PaddleSeg导出的模型包含`model.pdmodel`、`model.pdiparams`和`deploy.yaml`三个文件，FastDeploy会从yaml文件中获取模型在推理时需要的预处理信息
+
+## 下载预训练模型
+
+为了方便开发者的测试，下面提供了PaddleSeg导出的部分模型
+- without-argmax导出方式为：**不指定**`--input_shape`，**指定**`--output_op none`
+- with-argmax导出方式为：**不指定**`--input_shape`，**指定**`--output_op argmax`
+
+开发者可直接下载使用。
+
+| 模型                                                               | 参数文件大小    |输入Shape |  mIoU | mIoU (flip) | mIoU (ms+flip) |
+|:---------------------------------------------------------------- |:----- |:----- | :----- | :----- | :----- |
+| [Unet-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_with_argmax_infer.tgz) \| [Unet-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz)  | 52MB | 1024x512 | 65.00% | 66.02% | 66.89% |
+| [PP-LiteSeg-B(STDC2)-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz) \| [PP-LiteSeg-B(STDC2)-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz) | 31MB  | 1024x512 | 79.04% |	79.52% | 79.85% |
+|[PP-HumanSegV1-Lite-with-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV1_Lite_with_argmax_infer.tgz) \| [PP-HumanSegV1-Lite-without-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Lite_infer.tgz) |  543KB | 192x192 | 86.2% | - | - |
+|[PP-HumanSegV2-Lite-with-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Lite_192x192_with_argmax_infer.tgz) \| [PP-HumanSegV2-Lite-without-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Lite_192x192_infer.tgz) |  12MB | 192x192 | 92.52% | - | - |
+| [PP-HumanSegV2-Mobile-with-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Mobile_192x192_with_argmax_infer.tgz) \| [PP-HumanSegV2-Mobile-without-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Mobile_192x192_infer.tgz) |  29MB | 192x192 | 93.13% | - | - |
+|[PP-HumanSegV1-Server-with-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Server_with_argmax_infer.tgz) \| [PP-HumanSegV1-Server-without-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Server_infer.tgz) |  103MB | 512x512 | 96.47% | - | - |
+| [Portait-PP-HumanSegV2-Lite-with-argmax(肖像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV2_Lite_256x144_with_argmax_infer.tgz) \| [Portait-PP-HumanSegV2-Lite-without-argmax(肖像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV2_Lite_256x144_infer.tgz) |  3.6M | 256x144 | 96.63% | - | - |
+| [FCN-HRNet-W18-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/FCN_HRNet_W18_cityscapes_with_argmax_infer.tgz) \| [FCN-HRNet-W18-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/FCN_HRNet_W18_cityscapes_without_argmax_infer.tgz)(暂时不支持ONNXRuntime的GPU推理) |  37MB | 1024x512 | 78.97% | 79.49% | 79.74% |
+| [Deeplabv3-ResNet101-OS8-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Deeplabv3_ResNet101_OS8_cityscapes_with_argmax_infer.tgz) \| [Deeplabv3-ResNet101-OS8-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Deeplabv3_ResNet101_OS8_cityscapes_without_argmax_infer.tgz) |  150MB | 1024x512 | 79.90% | 80.22% | 80.47% |
+| [SegFormer_B0-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/SegFormer_B0-cityscapes-with-argmax.tgz) \| [SegFormer_B0-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/SegFormer_B0-cityscapes-without-argmax.tgz) |  15MB | 1024x1024 | 76.73% | 77.16% | - |
+
+## 详细部署文档
+
+- [Python部署](python)
+- [C++部署](cpp)
diff --git a/examples/vision/segmentation/paddleseg/cpp/CMakeLists.txt b/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/CMakeLists.txt
similarity index 100%
rename from examples/vision/segmentation/paddleseg/cpp/CMakeLists.txt
rename to examples/vision/segmentation/paddleseg/cpu-gpu/cpp/CMakeLists.txt
diff --git a/examples/vision/segmentation/paddleseg/cpp/README.md b/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/README.md
similarity index 98%
rename from examples/vision/segmentation/paddleseg/cpp/README.md
rename to examples/vision/segmentation/paddleseg/cpu-gpu/cpp/README.md
index 4c5be9f6c..bcccdd1cb 100755
--- a/examples/vision/segmentation/paddleseg/cpp/README.md
+++ b/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/README.md
@@ -1,7 +1,7 @@
 English | [简体中文](README_CN.md)
 # PaddleSeg C++ Deployment Example
 
-This directory provides examples that `infer.cc` fast finishes the deployment of Unet on CPU/GPU and GPU accelerated by TensorRT. 
+This directory provides examples that `infer.cc` fast finishes the deployment of Unet on CPU/GPU and GPU accelerated by TensorRT.
 
 Before deployment, two steps require confirmation
 
@@ -15,7 +15,7 @@ Taking the inference on Linux as an example, the compilation test can be complet
 ```bash
 mkdir build
 cd build
-# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above 
+# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
 wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
 tar xvf fastdeploy-linux-x64-x.x.x.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
@@ -45,7 +45,7 @@ The visualized result after running is as follows
 The above command works for Linux or MacOS. For SDK use-pattern in Windows, refer to:
 - [How to use FastDeploy C++ SDK in Windows](../../../../../docs/cn/faq/use_sdk_on_windows.md)
 
-## PaddleSeg C++ Interface 
+## PaddleSeg C++ Interface
 
 ### PaddleSeg Class
 
@@ -62,7 +62,7 @@ PaddleSegModel model loading and initialization, among which model_file is the e
 
 **Parameter**
 
-> * **model_file**(str): Model file path 
+> * **model_file**(str): Model file path
 > * **params_file**(str): Parameter file path
 > * **config_file**(str): Inference deployment configuration file
 > * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
diff --git a/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/README_CN.md b/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/README_CN.md
new file mode 100644
index 000000000..c5d39934b
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/README_CN.md
@@ -0,0 +1,106 @@
+[English](README.md) | 简体中文
+# PaddleSeg C++部署示例
+
+本目录下提供`infer.cc`快速完成PP-LiteSeg在CPU/GPU，以及GPU上通过TensorRT加速部署的示例。
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. 根据开发环境，下载预编译部署库和samples代码，参考[FastDeploy预编译库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+【注意】如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../../matting)
+
+以Linux上推理为例，在本目录执行如下命令即可完成编译测试，支持此模型需保证FastDeploy版本1.0.0以上(x.x.x>=1.0.0)
+
+```bash
+#下载部署示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/segmentation/paddleseg/cpp-gpu/cpp
+
+mkdir build
+cd build
+# 下载FastDeploy预编译库，用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-x.x.x.tgz
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j
+
+# 下载PP-LiteSeg模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
+tar -xvf PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
+wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
+
+
+# CPU推理
+./infer_demo PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer cityscapes_demo.png 0
+# GPU推理
+./infer_demo PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer cityscapes_demo.png 1
+# GPU上TensorRT推理
+./infer_demo PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer cityscapes_demo.png 2
+```
+
+运行完成可视化结果如下图所示
+<div  align="center">  
+<img src="https://user-images.githubusercontent.com/16222477/191712880-91ae128d-247a-43e0-b1e3-cafae78431e0.jpg", width=512px, height=256px />
+</div>
+
+> **注意：**
+以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:  
+- [如何在Windows中使用FastDeploy C++ SDK](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/use_sdk_on_windows.md)
+
+## PaddleSeg C++接口
+
+### PaddleSeg类
+
+```c++
+fastdeploy::vision::segmentation::PaddleSegModel(
+        const string& model_file,
+        const string& params_file = "",
+        const string& config_file,
+        const RuntimeOption& runtime_option = RuntimeOption(),
+        const ModelFormat& model_format = ModelFormat::PADDLE)
+```
+
+PaddleSegModel模型加载和初始化，其中model_file为导出的Paddle模型格式。
+
+**参数**
+
+> * **model_file**(str): 模型文件路径
+> * **params_file**(str): 参数文件路径
+> * **config_file**(str): 推理部署配置文件
+> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
+> * **model_format**(ModelFormat): 模型格式，默认为Paddle格式
+
+#### Predict函数
+
+> ```c++
+> PaddleSegModel::Predict(const cv::Mat &im, SegmentationResult *result)
+> ```
+>
+> 模型预测接口，输入图像直接输出检测结果。
+>
+> **参数**
+>
+> > * **im**: 输入图像，注意需为HWC，BGR格式
+> > * **result**: 分割结果，包括分割预测的标签以及标签对应的概率值, SegmentationResult说明参考[SegmentationResult结构体介绍](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
+
+### 类成员属性
+#### 预处理参数
+用户可按照自己的实际需求，修改下列预处理参数，从而影响最终的推理和部署效果
+
+> > * **is_vertical_screen**(bool): PP-HumanSeg系列模型通过设置此参数为`true`表明输入图片是竖屏，即height大于width的图片
+
+#### 后处理参数
+> > * **apply_softmax**(bool): 当模型导出时，并未指定`apply_softmax`参数，可通过此设置此参数为`true`，将预测的输出分割标签（label_map）对应的概率结果(score_map)做softmax归一化处理
+
+## 快速链接
+- [PaddleSeg模型介绍](../../)
+- [Python部署](../python)
+
+## 常见问题
+- [如何将模型预测结果SegmentationResult转为numpy格式](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [PaddleSeg C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1segmentation.html)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
diff --git a/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/infer.cc b/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/infer.cc
new file mode 100644
index 000000000..5269a0c2e
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/infer.cc
@@ -0,0 +1,131 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void CpuInfer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "deploy.yaml";
+  auto option = fastdeploy::RuntimeOption();
+  option.UseCpu();
+  auto model = fastdeploy::vision::segmentation::PaddleSegModel(
+      model_file, params_file, config_file, option);
+
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::SegmentationResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << res.Str() << std::endl;
+  auto vis_im = fastdeploy::vision::VisSegmentation(im, res, 0.5);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+void GpuInfer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "deploy.yaml";
+
+  auto option = fastdeploy::RuntimeOption();
+  option.UseGpu();
+  auto model = fastdeploy::vision::segmentation::PaddleSegModel(
+      model_file, params_file, config_file, option);
+
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::SegmentationResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << res.Str() << std::endl;
+  auto vis_im = fastdeploy::vision::VisSegmentation(im, res, 0.5);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+void TrtInfer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "deploy.yaml";
+
+  auto option = fastdeploy::RuntimeOption();
+  option.UseGpu();
+  option.UseTrtBackend();
+  auto model = fastdeploy::vision::segmentation::PaddleSegModel(
+      model_file, params_file, config_file, option);
+
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::SegmentationResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << res.Str() << std::endl;
+  auto vis_im = fastdeploy::vision::VisSegmentation(im, res, 0.5);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+  if (argc < 4) {
+    std::cout
+        << "Usage: infer_demo path/to/model_dir path/to/image run_option, "
+           "e.g ./infer_model ./ppseg_model_dir ./test.jpeg 0"
+        << std::endl;
+    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
+                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run "
+                 "with kunlunxin."
+              << std::endl;
+    return -1;
+  }
+
+  if (std::atoi(argv[3]) == 0) {
+    CpuInfer(argv[1], argv[2]);
+  } else if (std::atoi(argv[3]) == 1) {
+    GpuInfer(argv[1], argv[2]);
+  } else if (std::atoi(argv[3]) == 2) {
+    TrtInfer(argv[1], argv[2]);
+  }
+  return 0;
+}
diff --git a/examples/vision/segmentation/paddleseg/python/README.md b/examples/vision/segmentation/paddleseg/cpu-gpu/python/README.md
similarity index 97%
rename from examples/vision/segmentation/paddleseg/python/README.md
rename to examples/vision/segmentation/paddleseg/cpu-gpu/python/README.md
index 95885a2f2..add5b053d 100755
--- a/examples/vision/segmentation/paddleseg/python/README.md
+++ b/examples/vision/segmentation/paddleseg/cpu-gpu/python/README.md
@@ -10,7 +10,7 @@ Before deployment, two steps require confirmation
 
 This directory provides examples that `infer.py`  fast finishes the deployment of Unet on CPU/GPU and GPU accelerated by TensorRT. The script is as follows
 ```bash
-# Download the deployment example code 
+# Download the deployment example code
 git clone https://github.com/PaddlePaddle/FastDeploy.git
 cd FastDeploy/examples/vision/segmentation/paddleseg/python
 
@@ -34,7 +34,7 @@ The visualized result after running is as follows
 <img src="https://user-images.githubusercontent.com/16222477/191712880-91ae128d-247a-43e0-b1e3-cafae78431e0.jpg", width=512px, height=256px />
 </div>
 
-## PaddleSegModel Python Interface 
+## PaddleSegModel Python Interface
 
 ```python
 fd.vision.segmentation.PaddleSegModel(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
@@ -44,7 +44,7 @@ PaddleSeg model loading and initialization, among which model_file, params_file,
 
 **Parameter**
 
-> * **model_file**(str): Model file path 
+> * **model_file**(str): Model file path
 > * **params_file**(str): Parameter file path
 > * **config_file**(str): Inference deployment configuration file
 > * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
diff --git a/examples/vision/segmentation/paddleseg/cpu-gpu/python/README_CN.md b/examples/vision/segmentation/paddleseg/cpu-gpu/python/README_CN.md
new file mode 100644
index 000000000..1e31bd014
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/cpu-gpu/python/README_CN.md
@@ -0,0 +1,88 @@
+[English](README.md) | 简体中文
+# PaddleSeg Python部署示例
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. FastDeploy Python whl包安装，参考[FastDeploy Python安装](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+【注意】如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../../matting)
+
+本目录下提供`infer.py`快速完成PP-LiteSeg在CPU/GPU，以及GPU上通过TensorRT加速部署的示例。执行如下脚本即可完成
+
+```bash
+#下载部署示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/segmentation/paddleseg/cpu-gpu/python
+
+# 下载Unet模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
+tar -xvf PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
+wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
+
+# CPU推理
+python infer.py --model PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer --image cityscapes_demo.png --device cpu
+# GPU推理
+python infer.py --model PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer --image cityscapes_demo.png --device gpu
+# GPU上使用TensorRT推理 （注意：TensorRT推理第一次运行，有序列化模型的操作，有一定耗时，需要耐心等待）
+python infer.py --model PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer --image cityscapes_demo.png --device gpu --use_trt True
+```
+
+运行完成可视化结果如下图所示
+<div  align="center">  
+<img src="https://user-images.githubusercontent.com/16222477/191712880-91ae128d-247a-43e0-b1e3-cafae78431e0.jpg", width=512px, height=256px />
+</div>
+
+## PaddleSegModel Python接口
+
+```python
+fd.vision.segmentation.PaddleSegModel(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+```
+
+PaddleSeg模型加载和初始化，其中model_file, params_file以及config_file为训练模型导出的Paddle inference文件，具体请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/docs/model_export_cn.md)
+
+**参数**
+
+> * **model_file**(str): 模型文件路径
+> * **params_file**(str): 参数文件路径
+> * **config_file**(str): 推理部署配置文件
+> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
+> * **model_format**(ModelFormat): 模型格式，默认为Paddle格式
+
+### predict函数
+
+> ```python
+> PaddleSegModel.predict(input_image)
+> ```
+>
+> 模型预测结口，输入图像直接输出检测结果。
+>
+> **参数**
+>
+> > * **input_image**(np.ndarray): 输入数据，注意需为HWC，BGR格式
+
+> **返回**
+>
+> > 返回`fastdeploy.vision.SegmentationResult`结构体，结构体说明参考文档[SegmentationResult结构体介绍](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
+
+### 类成员属性
+#### 预处理参数
+用户可按照自己的实际需求，修改下列预处理参数，从而影响最终的推理和部署效果
+
+> > * **is_vertical_screen**(bool): PP-HumanSeg系列模型通过设置此参数为`true`表明输入图片是竖屏，即height大于width的图片
+
+#### 后处理参数
+> > * **apply_softmax**(bool): 当模型导出时，并未指定`apply_softmax`参数，可通过此设置此参数为`true`，将预测的输出分割标签（label_map）对应的概率结果(score_map)做softmax归一化处理
+
+## 其它文档
+
+- [PaddleSeg 模型介绍](..)
+- [PaddleSeg C++部署](../cpp)
+
+## 常见问题
+- [如何将模型预测结果SegmentationResult转为numpy格式](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [PaddleSeg python API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/semantic_segmentation.html)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
diff --git a/examples/vision/segmentation/paddleseg/cpu-gpu/python/infer.py b/examples/vision/segmentation/paddleseg/cpu-gpu/python/infer.py
new file mode 100755
index 000000000..ba961159f
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/cpu-gpu/python/infer.py
@@ -0,0 +1,57 @@
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+    import argparse
+    import ast
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model", required=True, help="Path of PaddleSeg model.")
+    parser.add_argument(
+        "--image", type=str, required=True, help="Path of test image file.")
+    parser.add_argument(
+        "--device",
+        type=str,
+        default='cpu',
+        help="Type of inference device, support 'kunlunxin', 'cpu' or 'gpu'.")
+    parser.add_argument(
+        "--use_trt",
+        type=ast.literal_eval,
+        default=False,
+        help="Wether to use tensorrt.")
+    return parser.parse_args()
+
+
+def build_option(args):
+    option = fd.RuntimeOption()
+
+    if args.device.lower() == "gpu":
+        option.use_gpu()
+
+    if args.use_trt:
+        option.use_trt_backend()
+        option.set_trt_input_shape("x", [1, 3, 256, 256], [1, 3, 1024, 1024],
+                                   [1, 3, 2048, 2048])
+    return option
+
+
+args = parse_arguments()
+
+# 配置runtime，加载模型
+runtime_option = build_option(args)
+model_file = os.path.join(args.model, "model.pdmodel")
+params_file = os.path.join(args.model, "model.pdiparams")
+config_file = os.path.join(args.model, "deploy.yaml")
+model = fd.vision.segmentation.PaddleSegModel(
+    model_file, params_file, config_file, runtime_option=runtime_option)
+
+# 预测图片分割结果
+im = cv2.imread(args.image)
+result = model.predict(im)
+print(result)
+
+# 可视化结果
+vis_im = fd.vision.vis_segmentation(im, result, weight=0.5)
+cv2.imwrite("vis_img.png", vis_im)
diff --git a/examples/vision/segmentation/paddleseg/kunlun/README_CN.md b/examples/vision/segmentation/paddleseg/kunlun/README_CN.md
new file mode 100644
index 000000000..a4ff4af5e
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/kunlun/README_CN.md
@@ -0,0 +1,48 @@
+# 使用FastDeploy部署PaddleSeg模型
+
+## 模型版本说明
+
+- [PaddleSeg develop](https://github.com/PaddlePaddle/PaddleSeg/tree/develop)
+
+目前FastDeploy支持如下模型的部署
+
+- [U-Net系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/unet/README.md)
+- [PP-LiteSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/pp_liteseg/README.md)
+- [PP-HumanSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/contrib/PP-HumanSeg/README.md)
+- [FCN系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/fcn/README.md)
+- [DeepLabV3系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/deeplabv3/README.md)
+- [SegFormer系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/segformer/README.md)
+
+【注意】如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../matting/)
+
+## 准备PaddleSeg部署模型
+PaddleSeg模型导出，请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)  
+
+**注意**
+- PaddleSeg导出的模型包含`model.pdmodel`、`model.pdiparams`和`deploy.yaml`三个文件，FastDeploy会从yaml文件中获取模型在推理时需要的预处理信息
+
+## 下载预训练模型
+
+为了方便开发者的测试，下面提供了PaddleSeg导出的部分模型
+- without-argmax导出方式为：**不指定**`--input_shape`，**指定**`--output_op none`
+- with-argmax导出方式为：**不指定**`--input_shape`，**指定**`--output_op argmax`
+
+开发者可直接下载使用。
+
+| 模型                                                               | 参数文件大小    |输入Shape |  mIoU | mIoU (flip) | mIoU (ms+flip) |
+|:---------------------------------------------------------------- |:----- |:----- | :----- | :----- | :----- |
+| [Unet-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_with_argmax_infer.tgz) \| [Unet-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz)  | 52MB | 1024x512 | 65.00% | 66.02% | 66.89% |
+| [PP-LiteSeg-B(STDC2)-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz) \| [PP-LiteSeg-B(STDC2)-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz) | 31MB  | 1024x512 | 79.04% |	79.52% | 79.85% |
+|[PP-HumanSegV1-Lite-with-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV1_Lite_with_argmax_infer.tgz) \| [PP-HumanSegV1-Lite-without-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Lite_infer.tgz) |  543KB | 192x192 | 86.2% | - | - |
+|[PP-HumanSegV2-Lite-with-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Lite_192x192_with_argmax_infer.tgz) \| [PP-HumanSegV2-Lite-without-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Lite_192x192_infer.tgz) |  12MB | 192x192 | 92.52% | - | - |
+| [PP-HumanSegV2-Mobile-with-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Mobile_192x192_with_argmax_infer.tgz) \| [PP-HumanSegV2-Mobile-without-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Mobile_192x192_infer.tgz) |  29MB | 192x192 | 93.13% | - | - |
+|[PP-HumanSegV1-Server-with-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Server_with_argmax_infer.tgz) \| [PP-HumanSegV1-Server-without-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Server_infer.tgz) |  103MB | 512x512 | 96.47% | - | - |
+| [Portait-PP-HumanSegV2-Lite-with-argmax(肖像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV2_Lite_256x144_with_argmax_infer.tgz) \| [Portait-PP-HumanSegV2-Lite-without-argmax(肖像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV2_Lite_256x144_infer.tgz) |  3.6M | 256x144 | 96.63% | - | - |
+| [FCN-HRNet-W18-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/FCN_HRNet_W18_cityscapes_with_argmax_infer.tgz) \| [FCN-HRNet-W18-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/FCN_HRNet_W18_cityscapes_without_argmax_infer.tgz)(暂时不支持ONNXRuntime的GPU推理) |  37MB | 1024x512 | 78.97% | 79.49% | 79.74% |
+| [Deeplabv3-ResNet101-OS8-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Deeplabv3_ResNet101_OS8_cityscapes_with_argmax_infer.tgz) \| [Deeplabv3-ResNet101-OS8-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Deeplabv3_ResNet101_OS8_cityscapes_without_argmax_infer.tgz) |  150MB | 1024x512 | 79.90% | 80.22% | 80.47% |
+| [SegFormer_B0-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/SegFormer_B0-cityscapes-with-argmax.tgz) \| [SegFormer_B0-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/SegFormer_B0-cityscapes-without-argmax.tgz) |  15MB | 1024x1024 | 76.73% | 77.16% | - |
+
+## 详细部署文档
+
+- [Python部署](python)
+- [C++部署](cpp)
diff --git a/examples/vision/segmentation/paddleseg/kunlun/cpp/CMakeLists.txt b/examples/vision/segmentation/paddleseg/kunlun/cpp/CMakeLists.txt
new file mode 100644
index 000000000..93540a7e8
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/kunlun/cpp/CMakeLists.txt
@@ -0,0 +1,14 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+# 指定下载解压后的fastdeploy库路径
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+# 添加FastDeploy依赖头文件
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
+# 添加FastDeploy库依赖
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
diff --git a/examples/vision/segmentation/paddleseg/kunlun/cpp/README.md b/examples/vision/segmentation/paddleseg/kunlun/cpp/README.md
new file mode 100755
index 000000000..bcccdd1cb
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/kunlun/cpp/README.md
@@ -0,0 +1,96 @@
+English | [简体中文](README_CN.md)
+# PaddleSeg C++ Deployment Example
+
+This directory provides examples that `infer.cc` fast finishes the deployment of Unet on CPU/GPU and GPU accelerated by TensorRT.
+
+Before deployment, two steps require confirmation
+
+- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+【Attention】For the deployment of **PP-Matting**、**PP-HumanMatting** and **ModNet**, refer to [Matting Model Deployment](../../../matting)
+
+Taking the inference on Linux as an example, the compilation test can be completed by executing the following command in this directory. FastDeploy version 1.0.0 or above (x.x.x>=1.0.0) is required to support this model.
+
+```bash
+mkdir build
+cd build
+# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-x.x.x.tgz
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j
+
+# Download Unet model files and test images
+wget https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz
+tar -xvf Unet_cityscapes_without_argmax_infer.tgz
+wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
+
+
+# CPU inference
+./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 0
+# GPU inference
+./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 1
+# TensorRT inference on GPU
+./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 2
+# kunlunxin XPU inference
+./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 3
+```
+
+The visualized result after running is as follows
+<div  align="center">  
+<img src="https://user-images.githubusercontent.com/16222477/191712880-91ae128d-247a-43e0-b1e3-cafae78431e0.jpg", width=512px, height=256px />
+</div>
+
+The above command works for Linux or MacOS. For SDK use-pattern in Windows, refer to:
+- [How to use FastDeploy C++ SDK in Windows](../../../../../docs/cn/faq/use_sdk_on_windows.md)
+
+## PaddleSeg C++ Interface
+
+### PaddleSeg Class
+
+```c++
+fastdeploy::vision::segmentation::PaddleSegModel(
+        const string& model_file,
+        const string& params_file = "",
+        const string& config_file,
+        const RuntimeOption& runtime_option = RuntimeOption(),
+        const ModelFormat& model_format = ModelFormat::PADDLE)
+```
+
+PaddleSegModel model loading and initialization, among which model_file is the exported Paddle model format.
+
+**Parameter**
+
+> * **model_file**(str): Model file path
+> * **params_file**(str): Parameter file path
+> * **config_file**(str): Inference deployment configuration file
+> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
+> * **model_format**(ModelFormat): Model format. Paddle format by default
+
+#### Predict Function
+
+> ```c++
+> PaddleSegModel::Predict(cv::Mat* im, DetectionResult* result)
+> ```
+>
+> Model prediction interface. Input images and output detection results.
+>
+> **Parameter**
+>
+> > * **im**: Input images in HWC or BGR format
+> > * **result**: The segmentation result, including the predicted label of the segmentation and the corresponding probability of the label. Refer to [Vision Model Prediction Results](../../../../../docs/api/vision_results/) for the description of SegmentationResult
+
+### Class Member Variable
+#### Pre-processing Parameter
+Users can modify the following pre-processing parameters to their needs, which affects the final inference and deployment results
+
+> > * **is_vertical_screen**(bool): For PP-HumanSeg models, the input image is portrait, height greater than a width, by setting this parameter to`true`
+
+#### Post-processing Parameter
+> > * **apply_softmax**(bool): The `apply_softmax` parameter is not specified when the model is exported. Set this parameter to `true` to normalize the probability result (score_map) of the predicted output segmentation label (label_map)
+
+- [Model Description](../../)
+- [Python Deployment](../python)
+- [Vision Model Prediction Results](../../../../../docs/api/vision_results/)
+- [How to switch the model inference backend engine](../../../../../docs/cn/faq/how_to_change_backend.md)
diff --git a/examples/vision/segmentation/paddleseg/cpp/README_CN.md b/examples/vision/segmentation/paddleseg/kunlun/cpp/README_CN.md
similarity index 100%
rename from examples/vision/segmentation/paddleseg/cpp/README_CN.md
rename to examples/vision/segmentation/paddleseg/kunlun/cpp/README_CN.md
diff --git a/examples/vision/segmentation/paddleseg/cpp/infer.cc b/examples/vision/segmentation/paddleseg/kunlun/cpp/infer.cc
old mode 100755
new mode 100644
similarity index 97%
rename from examples/vision/segmentation/paddleseg/cpp/infer.cc
rename to examples/vision/segmentation/paddleseg/kunlun/cpp/infer.cc
index ae97c0406..e4d6e39d9
--- a/examples/vision/segmentation/paddleseg/cpp/infer.cc
+++ b/examples/vision/segmentation/paddleseg/kunlun/cpp/infer.cc
@@ -48,7 +48,8 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
 
-void KunlunXinInfer(const std::string& model_dir, const std::string& image_file) {
+void KunlunXinInfer(const std::string& model_dir,
+                    const std::string& image_file) {
   auto model_file = model_dir + sep + "model.pdmodel";
   auto params_file = model_dir + sep + "model.pdiparams";
   auto config_file = model_dir + sep + "deploy.yaml";
@@ -170,7 +171,8 @@ int main(int argc, char* argv[]) {
            "e.g ./infer_model ./ppseg_model_dir ./test.jpeg 0"
         << std::endl;
     std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run with kunlunxin."
+                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run "
+                 "with kunlunxin."
               << std::endl;
     return -1;
   }
diff --git a/examples/vision/segmentation/paddleseg/kunlun/python/README.md b/examples/vision/segmentation/paddleseg/kunlun/python/README.md
new file mode 100755
index 000000000..add5b053d
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/kunlun/python/README.md
@@ -0,0 +1,82 @@
+English | [简体中文](README_CN.md)
+# PaddleSeg Python Deployment Example
+
+Before deployment, two steps require confirmation
+
+- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. Install FastDeploy Python whl package. Refer to [FastDeploy Python Installation](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+【Attention】For the deployment of  **PP-Matting**、**PP-HumanMatting** and **ModNet**, refer to [Matting Model Deployment](../../../matting)
+
+This directory provides examples that `infer.py`  fast finishes the deployment of Unet on CPU/GPU and GPU accelerated by TensorRT. The script is as follows
+```bash
+# Download the deployment example code
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/segmentation/paddleseg/python
+
+# Download Unet model files and test images
+wget https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz
+tar -xvf Unet_cityscapes_without_argmax_infer.tgz
+wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
+
+# CPU inference
+python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device cpu
+# GPU inference
+python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device gpu
+# TensorRT inference on GPU（Attention: It is somewhat time-consuming for the operation of model serialization when running TensorRT inference for the first time. Please be patient.）
+python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device gpu --use_trt True
+# kunlunxin XPU inference
+python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device kunlunxin
+```
+
+The visualized result after running is as follows
+<div  align="center">  
+<img src="https://user-images.githubusercontent.com/16222477/191712880-91ae128d-247a-43e0-b1e3-cafae78431e0.jpg", width=512px, height=256px />
+</div>
+
+## PaddleSegModel Python Interface
+
+```python
+fd.vision.segmentation.PaddleSegModel(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+```
+
+PaddleSeg model loading and initialization, among which model_file, params_file, and config_file are the Paddle inference files exported from the training model. Refer to [Model Export](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/docs/model_export_cn.md)  for more information
+
+**Parameter**
+
+> * **model_file**(str): Model file path
+> * **params_file**(str): Parameter file path
+> * **config_file**(str): Inference deployment configuration file
+> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
+> * **model_format**(ModelFormat): Model format. Paddle format by default
+
+### predict function
+
+> ```python
+> PaddleSegModel.predict(input_image)
+> ```
+>
+> Model prediction interface. Input images and output detection results.
+>
+> **Parameter**
+>
+> > * **input_image**(np.ndarray): Input data in HWC or BGR format
+
+> **Return**
+>
+> > Return `fastdeploy.vision.SegmentationResult` structure. Refer to [Vision Model Prediction Results](../../../../../docs/api/vision_results/) for the description of the structure.
+
+### Class Member Variable
+#### Pre-processing Parameter
+Users can modify the following pre-processing parameters to their needs, which affects the final inference and deployment results
+
+> > * **is_vertical_screen**(bool): For PP-HumanSeg models, the input image is portrait with height greater than width by setting this parameter to `true`
+#### Post-processing Parameter
+> > * **apply_softmax**(bool): The `apply_softmax` parameter is not specified when the model is exported. Set this parameter to `true` to normalize the probability result (score_map) of the predicted output segmentation label (label_map) in softmax
+
+## Other Documents
+
+- [PaddleSeg Model Description](..)
+- [PaddleSeg C++ Deployment](../cpp)
+- [Model Prediction Results](../../../../../docs/api/vision_results/)
+- [How to switch the model inference backend engine](../../../../../docs/cn/faq/how_to_change_backend.md)
diff --git a/examples/vision/segmentation/paddleseg/python/README_CN.md b/examples/vision/segmentation/paddleseg/kunlun/python/README_CN.md
similarity index 100%
rename from examples/vision/segmentation/paddleseg/python/README_CN.md
rename to examples/vision/segmentation/paddleseg/kunlun/python/README_CN.md
diff --git a/examples/vision/segmentation/paddleseg/python/infer.py b/examples/vision/segmentation/paddleseg/kunlun/python/infer.py
similarity index 100%
rename from examples/vision/segmentation/paddleseg/python/infer.py
rename to examples/vision/segmentation/paddleseg/kunlun/python/infer.py
diff --git a/examples/vision/segmentation/paddleseg/python/serving/README.md b/examples/vision/segmentation/paddleseg/kunlun/python/serving/README.md
similarity index 100%
rename from examples/vision/segmentation/paddleseg/python/serving/README.md
rename to examples/vision/segmentation/paddleseg/kunlun/python/serving/README.md
diff --git a/examples/vision/segmentation/paddleseg/python/serving/README_CN.md b/examples/vision/segmentation/paddleseg/kunlun/python/serving/README_CN.md
similarity index 100%
rename from examples/vision/segmentation/paddleseg/python/serving/README_CN.md
rename to examples/vision/segmentation/paddleseg/kunlun/python/serving/README_CN.md
diff --git a/examples/vision/segmentation/paddleseg/python/serving/client.py b/examples/vision/segmentation/paddleseg/kunlun/python/serving/client.py
similarity index 100%
rename from examples/vision/segmentation/paddleseg/python/serving/client.py
rename to examples/vision/segmentation/paddleseg/kunlun/python/serving/client.py
diff --git a/examples/vision/segmentation/paddleseg/python/serving/server.py b/examples/vision/segmentation/paddleseg/kunlun/python/serving/server.py
similarity index 100%
rename from examples/vision/segmentation/paddleseg/python/serving/server.py
rename to examples/vision/segmentation/paddleseg/kunlun/python/serving/server.py
diff --git a/examples/vision/segmentation/paddleseg/quantize/README_CN.md b/examples/vision/segmentation/paddleseg/quantize/README_CN.md
index a35f1d99d..a9b287754 100644
--- a/examples/vision/segmentation/paddleseg/quantize/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/quantize/README_CN.md
@@ -5,33 +5,22 @@ FastDeploy已支持部署量化模型,并提供一键模型自动化压缩的工
 
 ## FastDeploy一键模型自动化压缩工具
 FastDeploy 提供了一键模型自动化压缩工具, 能够简单地通过输入一个配置文件, 对模型进行量化.
-详细教程请见: [一键模型自动化压缩工具](../../../../../tools/common_tools/auto_compression/)
-注意: 推理量化后的分类模型仍然需要FP32模型文件夹下的deploy.yaml文件, 自行量化的模型文件夹内不包含此yaml文件, 用户从FP32模型文件夹下复制此yaml文件到量化后的模型文件夹内即可。
+详细教程请见: [一键模型自动化压缩工具](https://github.com/PaddlePaddle/FastDeploy/tree/develop/tools/common_tools/auto_compression)
+>> **注意**: 推理量化后的分类模型仍然需要FP32模型文件夹下的deploy.yaml文件, 自行量化的模型文件夹内不包含此yaml文件, 用户从FP32模型文件夹下复制此yaml文件到量化后的模型文件夹内即可。
 
-## 下载量化完成的PaddleSeg模型
+## 量化完成的PaddleSeg模型
 用户也可以直接下载下表中的量化模型进行部署.(点击模型名字即可下载)
 
-Benchmark表格说明:
-- Runtime时延为模型在各种Runtime上的推理时延,包含CPU->GPU数据拷贝,GPU推理,GPU->CPU数据拷贝时间. 不包含模型各自的前后处理时间.
-- 端到端时延为模型在实际推理场景中的时延, 包含模型的前后处理.
-- 所测时延均为推理1000次后求得的平均值, 单位是毫秒.
-- INT8 + FP16 为在推理INT8量化模型的同时, 给Runtime 开启FP16推理选项
-- INT8 + FP16 + PM, 为在推理INT8量化模型和开启FP16的同时, 开启使用Pinned Memory的选项,可加速GPU->CPU数据拷贝的速度
-- 最大加速比, 为FP32时延除以INT8推理的最快时延,得到最大加速比.
-- 策略为量化蒸馏训练时, 采用少量无标签数据集训练得到量化模型, 并在全量验证集上验证精度, INT8精度并不代表最高的INT8精度.
-- CPU为Intel(R) Xeon(R) Gold 6271C, 所有测试中固定CPU线程数为1.  GPU为Tesla T4, TensorRT版本8.4.15.
+| 模型                 | 量化方式   |
+| [PP-LiteSeg-T(STDC1)-cityscapes](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_QAT_new.tar) |量化蒸馏训练 |
 
-#### Runtime Benchmark
-| 模型                 |推理后端            |部署硬件    | FP32 Runtime时延   | INT8 Runtime时延 | INT8 + FP16 Runtime时延  | INT8+FP16+PM Runtime时延  | 最大加速比    | FP32 mIoU | INT8 mIoU | 量化方式   |
-| ------------------- | -----------------|-----------|  --------     |--------      |--------      | --------- |-------- |----- |----- |----- |
-| [PP-LiteSeg-T(STDC1)-cityscapes](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_QAT_new.tar)  | Paddle Inference |    CPU    |     1138.04|   602.62 |None|None     |      1.89      |77.37 | 71.62 |量化蒸馏训练 |
+量化后模型的Benchmark比较，请参考[量化模型 Benchmark](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/quantize.md)
 
-#### 端到端 Benchmark
-| 模型                 |推理后端            |部署硬件    | FP32 End2End时延   | INT8 End2End时延 | INT8 + FP16 End2End时延  | INT8+FP16+PM End2End时延  | 最大加速比    | FP32 mIoU | INT8 mIoU | 量化方式   |
-| ------------------- | -----------------|-----------|  --------     |--------      |--------      | --------- |-------- |----- |----- |----- |
-| [PP-LiteSeg-T(STDC1)-cityscapes](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_QAT_new.tar)  | Paddle Inference |    CPU    |     4726.65|   4134.91|None|None     |      1.14      |77.37 | 71.62 |量化蒸馏训练 |
-
-## 详细部署文档
-
-- [Python部署](python)
-- [C++部署](cpp)
+## 支持部署量化模型的硬件
+FastDeploy 量化模型部署的过程大致都与FP32模型类似，只是模型量化与非量化的区别，如果硬件在量化模型部署过程有特殊处理，也会在文档中特别标明，因此量化模型部署可以参考如下硬件的链接
+- [NVIDIA GPU、X86 CPU、飞腾CPU、ARM CPU](../cpu-gpu)
+- [昆仑](../kunlun)
+- [升腾](../ascend)
+- [瑞芯微](../rockchip)
+- [晶晨](../amlogic)
+- [算能](../sophgo)
diff --git a/examples/vision/segmentation/paddleseg/rknpu2/README.md b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/README.md
similarity index 100%
rename from examples/vision/segmentation/paddleseg/rknpu2/README.md
rename to examples/vision/segmentation/paddleseg/rockchip/rknpu2/README.md
diff --git a/examples/vision/segmentation/paddleseg/rknpu2/README_CN.md b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/README_CN.md
similarity index 100%
rename from examples/vision/segmentation/paddleseg/rknpu2/README_CN.md
rename to examples/vision/segmentation/paddleseg/rockchip/rknpu2/README_CN.md
diff --git a/examples/vision/segmentation/paddleseg/rknpu2/cpp/CMakeLists.txt b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/CMakeLists.txt
similarity index 96%
rename from examples/vision/segmentation/paddleseg/rknpu2/cpp/CMakeLists.txt
rename to examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/CMakeLists.txt
index 28161a83d..7fc1f6e22 100644
--- a/examples/vision/segmentation/paddleseg/rknpu2/cpp/CMakeLists.txt
+++ b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/CMakeLists.txt
@@ -33,4 +33,4 @@ file(GLOB PADDLETOONNX_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/paddle2
 install(PROGRAMS ${PADDLETOONNX_LIBS} DESTINATION lib)
 
 file(GLOB RKNPU2_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/rknpu2_runtime/RK3588/lib/*)
-install(PROGRAMS ${RKNPU2_LIBS} DESTINATION lib)
\ No newline at end of file
+install(PROGRAMS ${RKNPU2_LIBS} DESTINATION lib)
diff --git a/examples/vision/segmentation/paddleseg/rknpu2/cpp/README.md b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/README.md
similarity index 99%
rename from examples/vision/segmentation/paddleseg/rknpu2/cpp/README.md
rename to examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/README.md
index 48c4646e2..1fe268f81 100644
--- a/examples/vision/segmentation/paddleseg/rknpu2/cpp/README.md
+++ b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/README.md
@@ -40,7 +40,7 @@ mkdir thirdpartys
 Please refer to [How to Build RKNPU2 Deployment Environment](../../../../../../docs/en/build_and_install/rknpu2.md) to compile SDK.After compiling, the fastdeploy-0.0.3 directory will be created in the build directory, please move it to the thirdpartys directory.
 
 ### Copy model and configuration files to folder Model
-In the process of Paddle dynamic map model -> Paddle static map model -> ONNX mdoel, ONNX file and the corresponding yaml configuration file will be generated. Please move the configuration file to the folder model. 
+In the process of Paddle dynamic map model -> Paddle static map model -> ONNX mdoel, ONNX file and the corresponding yaml configuration file will be generated. Please move the configuration file to the folder model.
 After converting to RKNN, the model file also needs to be copied to folder model. Run the following command to download and use (the model file is RK3588. RK3568 needs to be [reconverted to PPSeg RKNN model](../README.md)).
 
 ### Prepare Test Images to folder image
diff --git a/examples/vision/segmentation/paddleseg/rknpu2/cpp/README_CN.md b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/README_CN.md
similarity index 100%
rename from examples/vision/segmentation/paddleseg/rknpu2/cpp/README_CN.md
rename to examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/README_CN.md
diff --git a/examples/vision/segmentation/paddleseg/rknpu2/cpp/infer.cc b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/infer.cc
similarity index 87%
rename from examples/vision/segmentation/paddleseg/rknpu2/cpp/infer.cc
rename to examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/infer.cc
index f80d3fc8f..b501fc899 100644
--- a/examples/vision/segmentation/paddleseg/rknpu2/cpp/infer.cc
+++ b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/infer.cc
@@ -16,7 +16,8 @@
 #include "fastdeploy/vision.h"
 
 void ONNXInfer(const std::string& model_dir, const std::string& image_file) {
-  std::string model_file = model_dir + "/Portrait_PP_HumanSegV2_Lite_256x144_infer.onnx";
+  std::string model_file =
+      model_dir + "/Portrait_PP_HumanSegV2_Lite_256x144_infer.onnx";
   std::string params_file;
   std::string config_file = model_dir + "/deploy.yaml";
   auto option = fastdeploy::RuntimeOption();
@@ -43,13 +44,12 @@ void ONNXInfer(const std::string& model_dir, const std::string& image_file) {
   tc.PrintInfo("PPSeg in ONNX");
 
   cv::imwrite("infer_onnx.jpg", vis_im);
-  std::cout
-      << "Visualized result saved in ./infer_onnx.jpg"
-      << std::endl;
+  std::cout << "Visualized result saved in ./infer_onnx.jpg" << std::endl;
 }
 
 void RKNPU2Infer(const std::string& model_dir, const std::string& image_file) {
-  std::string model_file = model_dir + "/Portrait_PP_HumanSegV2_Lite_256x144_infer_rk3588.rknn";
+  std::string model_file =
+      model_dir + "/Portrait_PP_HumanSegV2_Lite_256x144_infer_rk3588.rknn";
   std::string params_file;
   std::string config_file = model_dir + "/deploy.yaml";
   auto option = fastdeploy::RuntimeOption();
@@ -78,9 +78,7 @@ void RKNPU2Infer(const std::string& model_dir, const std::string& image_file) {
   tc.PrintInfo("PPSeg in RKNPU2");
 
   cv::imwrite("infer_rknn.jpg", vis_im);
-  std::cout
-      << "Visualized result saved in ./infer_rknn.jpg"
-      << std::endl;
+  std::cout << "Visualized result saved in ./infer_rknn.jpg" << std::endl;
 }
 
 int main(int argc, char* argv[]) {
@@ -93,7 +91,6 @@ int main(int argc, char* argv[]) {
   }
 
   RKNPU2Infer(argv[1], argv[2]);
-//  ONNXInfer(argv[1], argv[2]);
+  //  ONNXInfer(argv[1], argv[2]);
   return 0;
 }
-
diff --git a/examples/vision/segmentation/paddleseg/rknpu2/pp_humanseg.md b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/pp_humanseg.md
similarity index 99%
rename from examples/vision/segmentation/paddleseg/rknpu2/pp_humanseg.md
rename to examples/vision/segmentation/paddleseg/rockchip/rknpu2/pp_humanseg.md
index 2b14f6b9d..e0f458eb0 100644
--- a/examples/vision/segmentation/paddleseg/rknpu2/pp_humanseg.md
+++ b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/pp_humanseg.md
@@ -78,4 +78,4 @@ Deploy:
     - 144
     type: Resize
   - type: Normalize
-```
\ No newline at end of file
+```
diff --git a/examples/vision/segmentation/paddleseg/rknpu2/pp_humanseg_EN.md b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/pp_humanseg_EN.md
similarity index 99%
rename from examples/vision/segmentation/paddleseg/rknpu2/pp_humanseg_EN.md
rename to examples/vision/segmentation/paddleseg/rockchip/rknpu2/pp_humanseg_EN.md
index 6870d32c7..312b9f7ae 100644
--- a/examples/vision/segmentation/paddleseg/rknpu2/pp_humanseg_EN.md
+++ b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/pp_humanseg_EN.md
@@ -78,4 +78,4 @@ Deploy:
     - 144
     type: Resize
   - type: Normalize
-```
\ No newline at end of file
+```
diff --git a/examples/vision/segmentation/paddleseg/rknpu2/python/README.md b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/python/README.md
similarity index 100%
rename from examples/vision/segmentation/paddleseg/rknpu2/python/README.md
rename to examples/vision/segmentation/paddleseg/rockchip/rknpu2/python/README.md
diff --git a/examples/vision/segmentation/paddleseg/rknpu2/python/README_CN.md b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/python/README_CN.md
similarity index 100%
rename from examples/vision/segmentation/paddleseg/rknpu2/python/README_CN.md
rename to examples/vision/segmentation/paddleseg/rockchip/rknpu2/python/README_CN.md
diff --git a/examples/vision/segmentation/paddleseg/rknpu2/python/infer.py b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/python/infer.py
similarity index 100%
rename from examples/vision/segmentation/paddleseg/rknpu2/python/infer.py
rename to examples/vision/segmentation/paddleseg/rockchip/rknpu2/python/infer.py
diff --git a/examples/vision/segmentation/paddleseg/rv1126/README.md b/examples/vision/segmentation/paddleseg/rockchip/rv1126/README.md
similarity index 92%
rename from examples/vision/segmentation/paddleseg/rv1126/README.md
rename to examples/vision/segmentation/paddleseg/rockchip/rv1126/README.md
index dc9755272..7e4620b18 100755
--- a/examples/vision/segmentation/paddleseg/rv1126/README.md
+++ b/examples/vision/segmentation/paddleseg/rockchip/rv1126/README.md
@@ -1,6 +1,6 @@
 English | [简体中文](README_CN.md)
 # Deployment of PP-LiteSeg Quantification Model on RV1126
-Now FastDeploy allows deploying PP-LiteSeg quantization model to RV1126 based on Paddle Lite. 
+Now FastDeploy allows deploying PP-LiteSeg quantization model to RV1126 based on Paddle Lite.
 
 For model quantization and download of quantized models, refer to [Model Quantization](../quantize/README.md)
 
diff --git a/examples/vision/segmentation/paddleseg/rv1126/README_CN.md b/examples/vision/segmentation/paddleseg/rockchip/rv1126/README_CN.md
similarity index 100%
rename from examples/vision/segmentation/paddleseg/rv1126/README_CN.md
rename to examples/vision/segmentation/paddleseg/rockchip/rv1126/README_CN.md
diff --git a/examples/vision/segmentation/paddleseg/rv1126/cpp/CMakeLists.txt b/examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/CMakeLists.txt
similarity index 100%
rename from examples/vision/segmentation/paddleseg/rv1126/cpp/CMakeLists.txt
rename to examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/CMakeLists.txt
diff --git a/examples/vision/segmentation/paddleseg/rv1126/cpp/README.md b/examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/README.md
similarity index 100%
rename from examples/vision/segmentation/paddleseg/rv1126/cpp/README.md
rename to examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/README.md
diff --git a/examples/vision/segmentation/paddleseg/rv1126/cpp/README_CN.md b/examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/README_CN.md
similarity index 100%
rename from examples/vision/segmentation/paddleseg/rv1126/cpp/README_CN.md
rename to examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/README_CN.md
diff --git a/examples/vision/segmentation/paddleseg/rv1126/cpp/infer.cc b/examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/infer.cc
old mode 100755
new mode 100644
similarity index 95%
rename from examples/vision/segmentation/paddleseg/rv1126/cpp/infer.cc
rename to examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/infer.cc
index d9f7c3311..513a03a01
--- a/examples/vision/segmentation/paddleseg/rv1126/cpp/infer.cc
+++ b/examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/infer.cc
@@ -24,13 +24,13 @@ void InitAndInfer(const std::string& model_dir, const std::string& image_file) {
   auto params_file = model_dir + sep + "model.pdiparams";
   auto config_file = model_dir + sep + "deploy.yaml";
   auto subgraph_file = model_dir + sep + "subgraph.txt";
-  fastdeploy::vision::EnableFlyCV(); 
+  fastdeploy::vision::EnableFlyCV();
   fastdeploy::RuntimeOption option;
   option.UseTimVX();
   option.SetLiteSubgraphPartitionPath(subgraph_file);
-  
+
   auto model = fastdeploy::vision::segmentation::PaddleSegModel(
-      model_file, params_file, config_file,option);
+      model_file, params_file, config_file, option);
 
   assert(model.Initialized());
 
diff --git a/examples/vision/segmentation/paddleseg/rv1126/cpp/run_with_adb.sh b/examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/run_with_adb.sh
similarity index 100%
rename from examples/vision/segmentation/paddleseg/rv1126/cpp/run_with_adb.sh
rename to examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/run_with_adb.sh
diff --git a/examples/vision/segmentation/paddleseg/serving/README_CN.md b/examples/vision/segmentation/paddleseg/serving/README_CN.md
index 676272f2e..0c2e5194b 100644
--- a/examples/vision/segmentation/paddleseg/serving/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/serving/README_CN.md
@@ -1,68 +1,9 @@
 [English](README.md) | 简体中文
-# PaddleSegmentation 服务化部署示例
+# 使用 FastDeploy 服务化部署 PaddleSeg 模型
+## FastDeploy 服务化部署介绍
+在线推理作为企业或个人线上部署模型的最后一环，是工业界必不可少的环节，其中最重要的就是服务化推理框架。FastDeploy 目前提供两种服务化部署方式：simple_serving和fastdeploy_serving。simple_serving 基于Flask框架具有简单高效的特点，可以快速验证线上部署模型的可行性。fastdeploy_serving基于Triton Inference Server框架，是一套完备且性能卓越的服务化部署框架，可用于实际生产。
 
-在服务化部署前，需确认
+## 详细部署文档
 
-- 1. 服务化镜像的软硬件环境要求和镜像拉取命令请参考[FastDeploy服务化部署](../../../../../serving/README_CN.md)
-
-
-## 启动服务
-
-```bash
-#下载部署示例代码
-git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd FastDeploy/examples/vision/segmentation/paddleseg/serving
-
-#下载yolov5模型文件
-wget  https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
-tar -xvf PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
-
-# 将模型文件放入 models/runtime/1目录下
-mv PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer/model.pdmodel models/runtime/1/
-mv PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer/model.pdiparams models/runtime/1/
-
-# 拉取fastdeploy镜像(x.y.z为镜像版本号，需参照serving文档替换为数字)
-# GPU镜像
-docker pull registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10
-# CPU镜像
-docker pull registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-cpu-only-21.10
-
-# 运行容器.容器名字为 fd_serving, 并挂载当前目录为容器的 /serving 目录
-nvidia-docker run -it --net=host --name fd_serving -v `pwd`/:/serving registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10  bash
-
-# 启动服务(不设置CUDA_VISIBLE_DEVICES环境变量，会拥有所有GPU卡的调度权限)
-CUDA_VISIBLE_DEVICES=0 fastdeployserver --model-repository=/serving/models --backend-config=python,shm-default-byte-size=10485760
-```
->> **注意**: 当出现"Address already in use", 请使用`--grpc-port`指定端口号来启动服务，同时更改paddleseg_grpc_client.py中的请求端口号
-
-服务启动成功后， 会有以下输出:
-```
-......
-I0928 04:51:15.784517 206 grpc_server.cc:4117] Started GRPCInferenceService at 0.0.0.0:8001
-I0928 04:51:15.785177 206 http_server.cc:2815] Started HTTPService at 0.0.0.0:8000
-I0928 04:51:15.826578 206 http_server.cc:167] Started Metrics Service at 0.0.0.0:8002
-```
-
-
-## 客户端请求
-
-在物理机器中执行以下命令，发送grpc请求并输出结果
-```
-#下载测试图片
-wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
-
-#安装客户端依赖
-python3 -m pip install tritonclient[all]
-
-# 发送请求
-python3 paddleseg_grpc_client.py
-```
-
-发送请求成功后，会返回json格式的检测结果并打印输出:
-```
-
-```
-
-## 配置修改
-
-当前默认配置在CPU上运行ONNXRuntime引擎， 如果要在GPU或其他推理引擎上运行。 需要修改`models/runtime/config.pbtxt`中配置，详情请参考[配置文档](../../../../../serving/docs/zh_CN/model_configuration.md)
+- [fastdeploy serving](fastdeploy_serving)
+- [simple serving](simple_serving)
diff --git a/examples/vision/segmentation/paddleseg/serving/models/preprocess/1/deploy.yaml b/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer/deploy.yaml
similarity index 100%
rename from examples/vision/segmentation/paddleseg/serving/models/preprocess/1/deploy.yaml
rename to examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer/deploy.yaml
diff --git a/examples/vision/segmentation/paddleseg/serving/README.md b/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/README.md
similarity index 100%
rename from examples/vision/segmentation/paddleseg/serving/README.md
rename to examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/README.md
diff --git a/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/README_CN.md b/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/README_CN.md
new file mode 100644
index 000000000..ac8965d75
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/README_CN.md
@@ -0,0 +1,86 @@
+[English](README.md) | 简体中文
+# PaddleSeg 服务化部署示例
+
+在服务化部署前，需确认
+
+- 1. 服务化镜像的软硬件环境要求和镜像拉取命令请参考[FastDeploy服务化部署](https://github.com/PaddlePaddle/FastDeploy/blob/develop/serving/README_CN.md)
+
+
+## 启动服务
+
+```bash
+#下载部署示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving
+
+#下载PP-LiteSeg模型文件
+wget  https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
+tar -xvf PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
+
+# 将模型文件放入 models/runtime/1目录下
+mv PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer/model.pdmodel models/runtime/1/
+mv PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer/model.pdiparams models/runtime/1/
+
+# 拉取fastdeploy镜像(x.y.z为镜像版本号，需参照serving文档替换为数字)
+# GPU镜像
+docker pull registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10
+# CPU镜像
+docker pull registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-cpu-only-21.10
+
+# 运行容器.容器名字为 fd_serving, 并挂载当前目录为容器的 /serving 目录
+nvidia-docker run -it --net=host --name fd_serving -v `pwd`/:/serving registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10  bash
+
+# 启动服务(不设置CUDA_VISIBLE_DEVICES环境变量，会拥有所有GPU卡的调度权限)
+CUDA_VISIBLE_DEVICES=0 fastdeployserver --model-repository=/serving/models --backend-config=python,shm-default-byte-size=10485760
+```
+>> **注意**: 当出现"Address already in use", 请使用`--grpc-port`指定端口号来启动服务，同时更改paddleseg_grpc_client.py中的请求端口号
+
+服务启动成功后， 会有以下输出:
+```
+......
+I0928 04:51:15.784517 206 grpc_server.cc:4117] Started GRPCInferenceService at 0.0.0.0:8001
+I0928 04:51:15.785177 206 http_server.cc:2815] Started HTTPService at 0.0.0.0:8000
+I0928 04:51:15.826578 206 http_server.cc:167] Started Metrics Service at 0.0.0.0:8002
+```
+
+
+## 客户端请求
+
+在物理机器中执行以下命令，发送grpc请求并输出结果
+```
+#下载测试图片
+wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
+
+#安装客户端依赖
+python3 -m pip install tritonclient[all]
+
+# 发送请求
+python3 paddleseg_grpc_client.py
+```
+
+发送请求成功后，会返回json格式的检测结果并打印输出:
+```
+tm: name: "INPUT"
+datatype: "UINT8"
+shape: -1
+shape: -1
+shape: -1
+shape: 3
+
+output_name: SEG_RESULT
+Only print the first 20 labels in label_map of SEG_RESULT
+{'label_map': [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2], 'score_map': [], 'shape': [1024, 2048], 'contain_score_map': False}
+```
+
+## 配置修改
+
+当前默认配置在CPU上运行ONNXRuntime引擎， 如果要在GPU或其他推理引擎上运行。 需要修改`models/runtime/config.pbtxt`中配置，详情请参考[配置文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/serving/docs/zh_CN/model_configuration.md)
+
+## 更多部署方式
+- [使用 VisualDL 进行 Serving 可视化部署](https://github.com/PaddlePaddle/FastDeploy/blob/develop/serving/docs/zh_CN/vdl_management.md)
+
+## 常见问题
+- [如何编写客户端 HTTP/GRPC 请求](https://github.com/PaddlePaddle/FastDeploy/blob/develop/serving/docs/zh_CN/client.md)
+- [如何编译服务化部署镜像](https://github.com/PaddlePaddle/FastDeploy/blob/develop/serving/docs/zh_CN/compile.md)
+- [服务化部署原理及动态Batch介绍](https://github.com/PaddlePaddle/FastDeploy/blob/develop/serving/docs/zh_CN/demo.md)
+- [模型仓库介绍](https://github.com/PaddlePaddle/FastDeploy/blob/develop/serving/docs/zh_CN/model_repository.md)
diff --git a/examples/vision/segmentation/paddleseg/serving/models/paddleseg/1/README.md b/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/models/paddleseg/1/README.md
similarity index 100%
rename from examples/vision/segmentation/paddleseg/serving/models/paddleseg/1/README.md
rename to examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/models/paddleseg/1/README.md
diff --git a/examples/vision/segmentation/paddleseg/serving/models/paddleseg/config.pbtxt b/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/models/paddleseg/config.pbtxt
similarity index 100%
rename from examples/vision/segmentation/paddleseg/serving/models/paddleseg/config.pbtxt
rename to examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/models/paddleseg/config.pbtxt
diff --git a/examples/vision/segmentation/paddleseg/serving/models/postprocess/1/model.py b/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/models/postprocess/1/model.py
similarity index 100%
rename from examples/vision/segmentation/paddleseg/serving/models/postprocess/1/model.py
rename to examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/models/postprocess/1/model.py
diff --git a/examples/vision/segmentation/paddleseg/serving/models/postprocess/config.pbtxt b/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/models/postprocess/config.pbtxt
similarity index 100%
rename from examples/vision/segmentation/paddleseg/serving/models/postprocess/config.pbtxt
rename to examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/models/postprocess/config.pbtxt
diff --git a/examples/vision/segmentation/paddleseg/serving/models/preprocess/1/model.py b/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/models/preprocess/1/model.py
similarity index 100%
rename from examples/vision/segmentation/paddleseg/serving/models/preprocess/1/model.py
rename to examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/models/preprocess/1/model.py
diff --git a/examples/vision/segmentation/paddleseg/serving/models/preprocess/config.pbtxt b/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/models/preprocess/config.pbtxt
similarity index 100%
rename from examples/vision/segmentation/paddleseg/serving/models/preprocess/config.pbtxt
rename to examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/models/preprocess/config.pbtxt
diff --git a/examples/vision/segmentation/paddleseg/serving/models/runtime/1/README.md b/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/models/runtime/1/README.md
similarity index 100%
rename from examples/vision/segmentation/paddleseg/serving/models/runtime/1/README.md
rename to examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/models/runtime/1/README.md
diff --git a/examples/vision/segmentation/paddleseg/serving/models/runtime/config.pbtxt b/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/models/runtime/config.pbtxt
similarity index 100%
rename from examples/vision/segmentation/paddleseg/serving/models/runtime/config.pbtxt
rename to examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/models/runtime/config.pbtxt
diff --git a/examples/vision/segmentation/paddleseg/serving/paddleseg_grpc_client.py b/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/paddleseg_grpc_client.py
similarity index 100%
rename from examples/vision/segmentation/paddleseg/serving/paddleseg_grpc_client.py
rename to examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/paddleseg_grpc_client.py
diff --git a/examples/vision/segmentation/paddleseg/serving/simple_serving/README.md b/examples/vision/segmentation/paddleseg/serving/simple_serving/README.md
new file mode 100644
index 000000000..da41a3a00
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/serving/simple_serving/README.md
@@ -0,0 +1,36 @@
+English | [简体中文](README_CN.md)
+
+# PaddleSegmentation Python Simple Serving Demo
+
+
+## Environment
+
+- 1. Prepare environment and install FastDeploy Python whl, refer to [download_prebuilt_libraries](../../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
+
+Server:
+```bash
+# Download demo code
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/segmentation/paddleseg/python/serving
+
+# Download PP_LiteSeg model
+wget  https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
+tar -xvf PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
+
+# Launch server, change the configurations in server.py to select hardware, backend, etc.
+# and use --host, --port to specify IP and port
+fastdeploy simple_serving --app server:app
+```
+
+Client:
+```bash
+# Download demo code
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/segmentation/paddleseg/python/serving
+
+# Download test image
+wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
+
+# Send request and get inference result (Please adapt the IP and port if necessary)
+python client.py
+```
diff --git a/examples/vision/segmentation/paddleseg/serving/simple_serving/README_CN.md b/examples/vision/segmentation/paddleseg/serving/simple_serving/README_CN.md
new file mode 100644
index 000000000..d12bb9f2e
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/serving/simple_serving/README_CN.md
@@ -0,0 +1,32 @@
+简体中文 | [English](README.md)
+
+# PaddleSeg Python轻量服务化部署示例
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. FastDeploy Python whl包安装，参考[FastDeploy Python安装](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+服务端：
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/segmentation/paddleseg/python/serving
+
+# 下载PP-LiteSeg模型文件
+wget  https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
+tar -xvf PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
+
+# 启动服务，可修改server.py中的配置项来指定硬件、后端等
+# 可通过--host、--port指定IP和端口号
+fastdeploy simple_serving --app server:app
+```
+
+客户端：
+```bash
+# 下载测试图片
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+
+# 请求服务，获取推理结果（如有必要，请修改脚本中的IP和端口号）
+python client.py
+```
diff --git a/examples/vision/segmentation/paddleseg/serving/simple_serving/client.py b/examples/vision/segmentation/paddleseg/serving/simple_serving/client.py
new file mode 100644
index 000000000..e652c4462
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/serving/simple_serving/client.py
@@ -0,0 +1,23 @@
+import requests
+import json
+import cv2
+import fastdeploy as fd
+from fastdeploy.serving.utils import cv2_to_base64
+
+if __name__ == '__main__':
+    url = "http://127.0.0.1:8000/fd/ppliteseg"
+    headers = {"Content-Type": "application/json"}
+
+    im = cv2.imread("cityscapes_demo.png")
+    data = {"data": {"image": cv2_to_base64(im)}, "parameters": {}}
+
+    resp = requests.post(url=url, headers=headers, data=json.dumps(data))
+    if resp.status_code == 200:
+        r_json = json.loads(resp.json()["result"])
+        result = fd.vision.utils.json_to_segmentation(r_json)
+        vis_im = fd.vision.vis_segmentation(im, result, weight=0.5)
+        cv2.imwrite("visualized_result.jpg", vis_im)
+        print("Visualized result save in ./visualized_result.jpg")
+    else:
+        print("Error code:", resp.status_code)
+        print(resp.text)
diff --git a/examples/vision/segmentation/paddleseg/serving/simple_serving/server.py b/examples/vision/segmentation/paddleseg/serving/simple_serving/server.py
new file mode 100644
index 000000000..2ae2df09c
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/serving/simple_serving/server.py
@@ -0,0 +1,38 @@
+import fastdeploy as fd
+from fastdeploy.serving.server import SimpleServer
+import os
+import logging
+
+logging.getLogger().setLevel(logging.INFO)
+
+# Configurations
+model_dir = 'PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer'
+device = 'cpu'
+use_trt = False
+
+# Prepare model
+model_file = os.path.join(model_dir, "model.pdmodel")
+params_file = os.path.join(model_dir, "model.pdiparams")
+config_file = os.path.join(model_dir, "deploy.yaml")
+
+# Setup runtime option to select hardware, backend, etc.
+option = fd.RuntimeOption()
+if device.lower() == 'gpu':
+    option.use_gpu()
+if use_trt:
+    option.use_trt_backend()
+    option.set_trt_cache_file('pp_lite_seg.trt')
+
+# Create model instance
+model_instance = fd.vision.segmentation.PaddleSegModel(
+    model_file=model_file,
+    params_file=params_file,
+    config_file=config_file,
+    runtime_option=option)
+
+# Create server, setup REST API
+app = SimpleServer()
+app.register(
+    task_name="fd/ppliteseg",
+    model_handler=fd.serving.handler.VisionModelHandler,
+    predictor=model_instance)
diff --git a/examples/vision/segmentation/paddleseg/sophgo/README.md b/examples/vision/segmentation/paddleseg/sophgo/README.md
index 85a4360fa..337507b49 100644
--- a/examples/vision/segmentation/paddleseg/sophgo/README.md
+++ b/examples/vision/segmentation/paddleseg/sophgo/README.md
@@ -18,7 +18,14 @@ Here we take [PP-LiteSeg-B(STDC2)-cityscapes-without-argmax](https://bj.bcebos.c
 
 ### Download PP-LiteSeg-B(STDC2)-cityscapes-without-argmax, and convert it to ONNX
 ```shell
-https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
+# Download Paddle2ONNX repository.
+git clone https://github.com/PaddlePaddle/Paddle2ONNX
+
+# Download the Paddle static map model and fix the input shape.
+## Go to the directory where the input shape is fixed for the Paddle static map model.
+cd Paddle2ONNX/tools/paddle
+
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
 tar xvf PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
 
 # Modify the input shape of PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer model from dynamic input to constant input.
diff --git a/examples/vision/segmentation/paddleseg/sophgo/README_CN.md b/examples/vision/segmentation/paddleseg/sophgo/README_CN.md
index 5961d2e94..566691889 100644
--- a/examples/vision/segmentation/paddleseg/sophgo/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/sophgo/README_CN.md
@@ -18,7 +18,14 @@ SOPHGO-TPU部署模型前需要将Paddle模型转换成bmodel模型，具体步
 
 ### 下载PP-LiteSeg-B(STDC2)-cityscapes-without-argmax模型,并转换为ONNX模型
 ```shell
-https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
+# 下载Paddle2ONNX仓库
+git clone https://github.com/PaddlePaddle/Paddle2ONNX
+
+# 下载Paddle静态图模型并为Paddle静态图模型固定输入shape
+## 进入为Paddle静态图模型固定输入shape的目录
+cd Paddle2ONNX/tools/paddle
+
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
 tar xvf PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
 
 # 修改PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer模型的输入shape，由动态输入变成固定输入

From bc3a7ce8fe093482fc61f15acd616b4be2fa0b9d Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Wed, 8 Feb 2023 03:12:50 +0000
Subject: [PATCH 02/41] Update paddleseg doc

---
 .../segmentation/paddleseg/README_CN.md       |   2 +-
 .../paddleseg/amlogic/a311d/README_CN.md      |   4 +-
 .../paddleseg/android/README_CN.md            |   4 +-
 .../paddleseg/ascend/README_CN.md             |  48 +++++++
 .../{quantize => ascend}/cpp/CMakeLists.txt   |   2 +-
 .../paddleseg/ascend/cpp/README.md            |  96 +++++++++++++
 .../paddleseg/ascend/cpp/README_CN.md         |  88 ++++++++++++
 .../{quantize => ascend}/cpp/infer.cc         |  52 +++----
 .../paddleseg/ascend/python/README.md         |  82 +++++++++++
 .../paddleseg/ascend/python/README_CN.md      |  79 ++++++++++
 .../paddleseg/ascend/python/infer.py          |  34 +++++
 .../paddleseg/cpu-gpu/README_CN.md            |   4 +-
 .../paddleseg/cpu-gpu/cpp/README_CN.md        |   2 +-
 .../paddleseg/cpu-gpu/python/README.md        |   2 +-
 .../paddleseg/cpu-gpu/python/README_CN.md     |   2 +-
 .../paddleseg/kunlun/README_CN.md             |   2 +-
 .../paddleseg/kunlun/cpp/README_CN.md         |  57 +++-----
 .../paddleseg/kunlun/cpp/infer.cc             | 136 +-----------------
 .../paddleseg/kunlun/python/README.md         |   2 +-
 .../paddleseg/kunlun/python/README_CN.md      |  39 +++--
 .../paddleseg/kunlun/python/infer.py          |  33 +----
 .../paddleseg/kunlun/python/serving/README.md |  36 -----
 .../kunlun/python/serving/README_CN.md        |  36 -----
 .../paddleseg/kunlun/python/serving/client.py |  23 ---
 .../paddleseg/kunlun/python/serving/server.py |  38 -----
 .../paddleseg/quantize/cpp/README.md          |  32 -----
 .../paddleseg/quantize/cpp/README_CN.md       |  32 -----
 .../paddleseg/quantize/python/README.md       |  29 ----
 .../paddleseg/quantize/python/README_CN.md    |  29 ----
 .../paddleseg/quantize/python/infer.py        |  76 ----------
 .../paddleseg/rockchip/rknpu2/README_CN.md    |  31 +++-
 .../rockchip/rknpu2/cpp/README_CN.md          |   4 +-
 .../paddleseg/rockchip/rknpu2/pp_humanseg.md  |   2 +-
 .../rockchip/rknpu2/python/README_CN.md       |   8 +-
 .../paddleseg/rockchip/rv1126/README_CN.md    |  16 ++-
 .../rockchip/rv1126/cpp/README_CN.md          |  12 +-
 .../paddleseg/sophgo/README_CN.md             |  13 +-
 .../paddleseg/sophgo/cpp/README_CN.md         |   4 +-
 .../paddleseg/sophgo/python/README_CN.md      |   2 +-
 .../segmentation/paddleseg/web/README_CN.md   |   2 +-
 40 files changed, 570 insertions(+), 625 deletions(-)
 create mode 100644 examples/vision/segmentation/paddleseg/ascend/README_CN.md
 rename examples/vision/segmentation/paddleseg/{quantize => ascend}/cpp/CMakeLists.txt (91%)
 create mode 100755 examples/vision/segmentation/paddleseg/ascend/cpp/README.md
 create mode 100644 examples/vision/segmentation/paddleseg/ascend/cpp/README_CN.md
 rename examples/vision/segmentation/paddleseg/{quantize => ascend}/cpp/infer.cc (55%)
 create mode 100755 examples/vision/segmentation/paddleseg/ascend/python/README.md
 create mode 100644 examples/vision/segmentation/paddleseg/ascend/python/README_CN.md
 create mode 100755 examples/vision/segmentation/paddleseg/ascend/python/infer.py
 delete mode 100644 examples/vision/segmentation/paddleseg/kunlun/python/serving/README.md
 delete mode 100644 examples/vision/segmentation/paddleseg/kunlun/python/serving/README_CN.md
 delete mode 100644 examples/vision/segmentation/paddleseg/kunlun/python/serving/client.py
 delete mode 100644 examples/vision/segmentation/paddleseg/kunlun/python/serving/server.py
 delete mode 100755 examples/vision/segmentation/paddleseg/quantize/cpp/README.md
 delete mode 100644 examples/vision/segmentation/paddleseg/quantize/cpp/README_CN.md
 delete mode 100755 examples/vision/segmentation/paddleseg/quantize/python/README.md
 delete mode 100644 examples/vision/segmentation/paddleseg/quantize/python/README_CN.md
 delete mode 100644 examples/vision/segmentation/paddleseg/quantize/python/infer.py

diff --git a/examples/vision/segmentation/paddleseg/README_CN.md b/examples/vision/segmentation/paddleseg/README_CN.md
index 28bdce086..865c6c4aa 100644
--- a/examples/vision/segmentation/paddleseg/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/README_CN.md
@@ -6,7 +6,7 @@ FastDeploy是一款全场景、易用灵活、极致高效的AI推理部署工
 
 ## 详细文档
 
-- [NVIDIA GPU、X86 CPU、飞腾CPU、ARM CPU](cpu-gpu)
+- [NVIDIA GPU、X86 CPU、飞腾CPU、ARM CPU、Intel GPU(独立显卡/集成显卡)](cpu-gpu)
 - [昆仑](kunlun)
 - [升腾](ascend)
 - [瑞芯微](rockchip)
diff --git a/examples/vision/segmentation/paddleseg/amlogic/a311d/README_CN.md b/examples/vision/segmentation/paddleseg/amlogic/a311d/README_CN.md
index ccb999450..3537dfef1 100644
--- a/examples/vision/segmentation/paddleseg/amlogic/a311d/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/amlogic/a311d/README_CN.md
@@ -6,7 +6,9 @@
 由于晶晨A311D的NPU仅支持INT8量化模型的部署，因此所支持的量化模型如下：
 - [PP-LiteSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/pp_liteseg/README.md)
 
-为了方便开发者的测试，下面提供了PaddleSeg导出的部分模型，开发者可直接下载使用。
+为了方便开发者的测试，下面提供了PaddleSeg导出的部分推理模型，开发者可直接下载使用。
+
+PaddleSeg模型导出，请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)  
 
 | 模型                              | 参数文件大小    |输入Shape |  mIoU | mIoU (flip) | mIoU (ms+flip) |
 |:---------------------------------------------------------------- |:----- |:----- | :----- | :----- | :----- |
diff --git a/examples/vision/segmentation/paddleseg/android/README_CN.md b/examples/vision/segmentation/paddleseg/android/README_CN.md
index eb683bdfa..442947009 100644
--- a/examples/vision/segmentation/paddleseg/android/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/android/README_CN.md
@@ -173,5 +173,5 @@ model.init(modelFile, paramFile, configFile, option);
 
 ## 更多参考文档
 如果您想知道更多的FastDeploy Java API文档以及如何通过JNI来接入FastDeploy C++ API感兴趣，可以参考以下内容:  
-- [在 Android 中使用 FastDeploy Java SDK](../../../../../java/android/)
-- [在 Android 中使用 FastDeploy C++ SDK](../../../../../docs/cn/faq/use_cpp_sdk_on_android.md)  
+- [在 Android 中使用 FastDeploy Java SDK](https://github.com/PaddlePaddle/FastDeploy/tree/develop/java/android)
+- [在 Android 中使用 FastDeploy C++ SDK](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/use_cpp_sdk_on_android.md)  
diff --git a/examples/vision/segmentation/paddleseg/ascend/README_CN.md b/examples/vision/segmentation/paddleseg/ascend/README_CN.md
new file mode 100644
index 000000000..fb29615fe
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/ascend/README_CN.md
@@ -0,0 +1,48 @@
+# 使用FastDeploy部署PaddleSeg模型
+
+FastDeploy支持在华为昇腾上部署PaddleSeg模型
+
+## 模型版本说明
+
+- [PaddleSeg develop](https://github.com/PaddlePaddle/PaddleSeg/tree/develop)
+
+目前FastDeploy支持如下模型的部署
+
+- [PP-LiteSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/pp_liteseg/README.md)
+- [PP-HumanSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/contrib/PP-HumanSeg/README.md)
+- [FCN系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/fcn/README.md)
+- [DeepLabV3系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/deeplabv3/README.md)
+- [SegFormer系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/segformer/README.md)
+
+>>**注意** 若需要在华为昇腾上部署**PP-Matting**、**PP-HumanMatting**请从[Matting模型部署](../../matting/)下载对应模型，部署过程与此文档一致
+
+## 准备PaddleSeg部署模型
+PaddleSeg模型导出，请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)  
+
+**注意**
+- PaddleSeg导出的模型包含`model.pdmodel`、`model.pdiparams`和`deploy.yaml`三个文件，FastDeploy会从yaml文件中获取模型在推理时需要的预处理信息
+
+## 下载预训练模型
+
+为了方便开发者的测试，下面提供了PaddleSeg导出的部分推理模型模型
+- without-argmax导出方式为：**不指定**`--input_shape`，**指定**`--output_op none`
+- with-argmax导出方式为：**不指定**`--input_shape`，**指定**`--output_op argmax`
+
+开发者可直接下载使用。
+
+| 模型                                                               | 参数文件大小    |输入Shape |  mIoU | mIoU (flip) | mIoU (ms+flip) |
+|:---------------------------------------------------------------- |:----- |:----- | :----- | :----- | :----- |
+| [PP-LiteSeg-B(STDC2)-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz) \| [PP-LiteSeg-B(STDC2)-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz) | 31MB  | 1024x512 | 79.04% |	79.52% | 79.85% |
+|[PP-HumanSegV1-Lite-with-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV1_Lite_with_argmax_infer.tgz) \| [PP-HumanSegV1-Lite-without-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Lite_infer.tgz) |  543KB | 192x192 | 86.2% | - | - |
+|[PP-HumanSegV2-Lite-with-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Lite_192x192_with_argmax_infer.tgz) \| [PP-HumanSegV2-Lite-without-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Lite_192x192_infer.tgz) |  12MB | 192x192 | 92.52% | - | - |
+| [PP-HumanSegV2-Mobile-with-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Mobile_192x192_with_argmax_infer.tgz) \| [PP-HumanSegV2-Mobile-without-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Mobile_192x192_infer.tgz) |  29MB | 192x192 | 93.13% | - | - |
+|[PP-HumanSegV1-Server-with-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Server_with_argmax_infer.tgz) \| [PP-HumanSegV1-Server-without-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Server_infer.tgz) |  103MB | 512x512 | 96.47% | - | - |
+| [Portait-PP-HumanSegV2-Lite-with-argmax(肖像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV2_Lite_256x144_with_argmax_infer.tgz) \| [Portait-PP-HumanSegV2-Lite-without-argmax(肖像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV2_Lite_256x144_infer.tgz) |  3.6M | 256x144 | 96.63% | - | - |
+| [FCN-HRNet-W18-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/FCN_HRNet_W18_cityscapes_with_argmax_infer.tgz) \| [FCN-HRNet-W18-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/FCN_HRNet_W18_cityscapes_without_argmax_infer.tgz)(暂时不支持ONNXRuntime的GPU推理) |  37MB | 1024x512 | 78.97% | 79.49% | 79.74% |
+| [Deeplabv3-ResNet101-OS8-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Deeplabv3_ResNet101_OS8_cityscapes_with_argmax_infer.tgz) \| [Deeplabv3-ResNet101-OS8-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Deeplabv3_ResNet101_OS8_cityscapes_without_argmax_infer.tgz) |  150MB | 1024x512 | 79.90% | 80.22% | 80.47% |
+| [SegFormer_B0-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/SegFormer_B0-cityscapes-with-argmax.tgz) \| [SegFormer_B0-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/SegFormer_B0-cityscapes-without-argmax.tgz) |  15MB | 1024x1024 | 76.73% | 77.16% | - |
+
+## 详细部署文档
+
+- [Python部署](python)
+- [C++部署](cpp)
diff --git a/examples/vision/segmentation/paddleseg/quantize/cpp/CMakeLists.txt b/examples/vision/segmentation/paddleseg/ascend/cpp/CMakeLists.txt
similarity index 91%
rename from examples/vision/segmentation/paddleseg/quantize/cpp/CMakeLists.txt
rename to examples/vision/segmentation/paddleseg/ascend/cpp/CMakeLists.txt
index fea1a2888..93540a7e8 100644
--- a/examples/vision/segmentation/paddleseg/quantize/cpp/CMakeLists.txt
+++ b/examples/vision/segmentation/paddleseg/ascend/cpp/CMakeLists.txt
@@ -1,5 +1,5 @@
 PROJECT(infer_demo C CXX)
-CMAKE_MINIMUM_REQUIRED (VERSION 3.12)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
 
 # 指定下载解压后的fastdeploy库路径
 option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
diff --git a/examples/vision/segmentation/paddleseg/ascend/cpp/README.md b/examples/vision/segmentation/paddleseg/ascend/cpp/README.md
new file mode 100755
index 000000000..bcccdd1cb
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/ascend/cpp/README.md
@@ -0,0 +1,96 @@
+English | [简体中文](README_CN.md)
+# PaddleSeg C++ Deployment Example
+
+This directory provides examples that `infer.cc` fast finishes the deployment of Unet on CPU/GPU and GPU accelerated by TensorRT.
+
+Before deployment, two steps require confirmation
+
+- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+【Attention】For the deployment of **PP-Matting**、**PP-HumanMatting** and **ModNet**, refer to [Matting Model Deployment](../../../matting)
+
+Taking the inference on Linux as an example, the compilation test can be completed by executing the following command in this directory. FastDeploy version 1.0.0 or above (x.x.x>=1.0.0) is required to support this model.
+
+```bash
+mkdir build
+cd build
+# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-x.x.x.tgz
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j
+
+# Download Unet model files and test images
+wget https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz
+tar -xvf Unet_cityscapes_without_argmax_infer.tgz
+wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
+
+
+# CPU inference
+./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 0
+# GPU inference
+./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 1
+# TensorRT inference on GPU
+./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 2
+# kunlunxin XPU inference
+./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 3
+```
+
+The visualized result after running is as follows
+<div  align="center">  
+<img src="https://user-images.githubusercontent.com/16222477/191712880-91ae128d-247a-43e0-b1e3-cafae78431e0.jpg", width=512px, height=256px />
+</div>
+
+The above command works for Linux or MacOS. For SDK use-pattern in Windows, refer to:
+- [How to use FastDeploy C++ SDK in Windows](../../../../../docs/cn/faq/use_sdk_on_windows.md)
+
+## PaddleSeg C++ Interface
+
+### PaddleSeg Class
+
+```c++
+fastdeploy::vision::segmentation::PaddleSegModel(
+        const string& model_file,
+        const string& params_file = "",
+        const string& config_file,
+        const RuntimeOption& runtime_option = RuntimeOption(),
+        const ModelFormat& model_format = ModelFormat::PADDLE)
+```
+
+PaddleSegModel model loading and initialization, among which model_file is the exported Paddle model format.
+
+**Parameter**
+
+> * **model_file**(str): Model file path
+> * **params_file**(str): Parameter file path
+> * **config_file**(str): Inference deployment configuration file
+> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
+> * **model_format**(ModelFormat): Model format. Paddle format by default
+
+#### Predict Function
+
+> ```c++
+> PaddleSegModel::Predict(cv::Mat* im, DetectionResult* result)
+> ```
+>
+> Model prediction interface. Input images and output detection results.
+>
+> **Parameter**
+>
+> > * **im**: Input images in HWC or BGR format
+> > * **result**: The segmentation result, including the predicted label of the segmentation and the corresponding probability of the label. Refer to [Vision Model Prediction Results](../../../../../docs/api/vision_results/) for the description of SegmentationResult
+
+### Class Member Variable
+#### Pre-processing Parameter
+Users can modify the following pre-processing parameters to their needs, which affects the final inference and deployment results
+
+> > * **is_vertical_screen**(bool): For PP-HumanSeg models, the input image is portrait, height greater than a width, by setting this parameter to`true`
+
+#### Post-processing Parameter
+> > * **apply_softmax**(bool): The `apply_softmax` parameter is not specified when the model is exported. Set this parameter to `true` to normalize the probability result (score_map) of the predicted output segmentation label (label_map)
+
+- [Model Description](../../)
+- [Python Deployment](../python)
+- [Vision Model Prediction Results](../../../../../docs/api/vision_results/)
+- [How to switch the model inference backend engine](../../../../../docs/cn/faq/how_to_change_backend.md)
diff --git a/examples/vision/segmentation/paddleseg/ascend/cpp/README_CN.md b/examples/vision/segmentation/paddleseg/ascend/cpp/README_CN.md
new file mode 100644
index 000000000..38692dc26
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/ascend/cpp/README_CN.md
@@ -0,0 +1,88 @@
+[English](README.md) | 简体中文
+# PaddleSeg C++部署示例
+
+本目录下提供`infer.cc`快速完成PP-LiteSeg在华为昇腾上部署的示例。
+
+在部署前，需自行编译基于华为昇腾NPU的预测库，参考文档[华为昇腾NPU部署环境编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/huawei_ascend.md)
+
+>>**注意** **PP-Matting**、**PP-HumanMatting**的模型，请从[Matting模型部署](../../../matting)下载
+
+```bash
+#下载部署示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/segmentation/paddleseg/ascend/cpp
+
+mkdir build
+cd build
+# 使用编译完成的FastDeploy库编译infer_demo
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-ascend
+make -j
+
+# 下载PP-LiteSeg模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
+tar -xvf PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
+wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
+
+# 华为昇腾推理
+./infer_demo PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer cityscapes_demo.png
+```
+
+运行完成可视化结果如下图所示
+<div  align="center">  
+<img src="https://user-images.githubusercontent.com/16222477/191712880-91ae128d-247a-43e0-b1e3-cafae78431e0.jpg", width=512px, height=256px />
+</div>
+
+## PaddleSeg C++接口
+
+### PaddleSeg类
+
+```c++
+fastdeploy::vision::segmentation::PaddleSegModel(
+        const string& model_file,
+        const string& params_file = "",
+        const string& config_file,
+        const RuntimeOption& runtime_option = RuntimeOption(),
+        const ModelFormat& model_format = ModelFormat::PADDLE)
+```
+
+PaddleSegModel模型加载和初始化，其中model_file为导出的Paddle模型格式。
+
+**参数**
+
+> * **model_file**(str): 模型文件路径
+> * **params_file**(str): 参数文件路径
+> * **config_file**(str): 推理部署配置文件
+> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
+> * **model_format**(ModelFormat): 模型格式，默认为Paddle格式
+
+#### Predict函数
+
+> ```c++
+> PaddleSegModel::Predict(cv::Mat* im, DetectionResult* result)
+> ```
+>
+> 模型预测接口，输入图像直接输出检测结果。
+>
+> **参数**
+>
+> > * **im**: 输入图像，注意需为HWC，BGR格式
+> > * **result**: 分割结果，包括分割预测的标签以及标签对应的概率值, SegmentationResult结构体说明参考[SegmentationResult结构体介绍](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
+
+### 类成员属性
+#### 预处理参数
+用户可按照自己的实际需求，修改下列预处理参数，从而影响最终的推理和部署效果
+
+> > * **is_vertical_screen**(bool): PP-HumanSeg系列模型通过设置此参数为`true`表明输入图片是竖屏，即height大于width的图片
+
+#### 后处理参数
+> > * **apply_softmax**(bool): 当模型导出时，并未指定`apply_softmax`参数，可通过此设置此参数为`true`，将预测的输出分割标签（label_map）对应的概率结果(score_map)做softmax归一化处理
+
+## 快速链接
+- [PaddleSeg模型介绍](../../)
+- [Python部署](../python)
+
+## 常见问题
+- [如何将模型预测结果SegmentationResult转为numpy格式](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [PaddleSeg C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1segmentation.html)
+)
diff --git a/examples/vision/segmentation/paddleseg/quantize/cpp/infer.cc b/examples/vision/segmentation/paddleseg/ascend/cpp/infer.cc
similarity index 55%
rename from examples/vision/segmentation/paddleseg/quantize/cpp/infer.cc
rename to examples/vision/segmentation/paddleseg/ascend/cpp/infer.cc
index 158a30263..cf98dae4e 100644
--- a/examples/vision/segmentation/paddleseg/quantize/cpp/infer.cc
+++ b/examples/vision/segmentation/paddleseg/ascend/cpp/infer.cc
@@ -13,25 +13,28 @@
 // limitations under the License.
 
 #include "fastdeploy/vision.h"
+
 #ifdef WIN32
 const char sep = '\\';
 #else
 const char sep = '/';
 #endif
 
-void InitAndInfer(const std::string& model_dir, const std::string& image_file,
-                  const fastdeploy::RuntimeOption& option) {
+void AscendInfer(const std::string& model_dir, const std::string& image_file) {
   auto model_file = model_dir + sep + "model.pdmodel";
   auto params_file = model_dir + sep + "model.pdiparams";
   auto config_file = model_dir + sep + "deploy.yaml";
-
+  auto option = fastdeploy::RuntimeOption();
+  option.UseAscend();
   auto model = fastdeploy::vision::segmentation::PaddleSegModel(
-      model_file, params_file, config_file,option);
+      model_file, params_file, config_file, option);
 
-  assert(model.Initialized());
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::SegmentationResult res;
   if (!model.Predict(im, &res)) {
@@ -40,37 +43,20 @@ void InitAndInfer(const std::string& model_dir, const std::string& image_file,
   }
 
   std::cout << res.Str() << std::endl;
-
+  auto vis_im = fastdeploy::vision::VisSegmentation(im, res, 0.5);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
 
-
 int main(int argc, char* argv[]) {
-  if (argc < 4) {
-    std::cout << "Usage: infer_demo path/to/quant_model "
-                 "path/to/image "
-                 "run_option, "
-                 "e.g ./infer_demo ./ResNet50_vd_quant ./test.jpeg 0"
-              << std::endl;
-    std::cout << "The data type of run_option is int, 0: run on cpu with ORT "
-                 "backend; 1: run "
-                 "on gpu with TensorRT backend. "
-              << std::endl;
+  if (argc < 3) {
+    std::cout
+        << "Usage: infer_demo path/to/model_dir path/to/image run_option, "
+           "e.g ./infer_model ./ppseg_model_dir ./test.jpeg"
+        << std::endl;
     return -1;
   }
 
-  fastdeploy::RuntimeOption option;
-  int flag = std::atoi(argv[3]);
-
-  if (flag == 0) {
-    option.UseCpu();
-    option.UseOrtBackend();
-  } else if (flag == 1) {
-    option.UseCpu();
-    option.UsePaddleInferBackend();
-  }
-
-  std::string model_dir = argv[1];
-  std::string test_image = argv[2];
-  InitAndInfer(model_dir, test_image, option);
+  AscendInfer(argv[1], argv[2]);
   return 0;
-}
\ No newline at end of file
+}
diff --git a/examples/vision/segmentation/paddleseg/ascend/python/README.md b/examples/vision/segmentation/paddleseg/ascend/python/README.md
new file mode 100755
index 000000000..d37d92c9e
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/ascend/python/README.md
@@ -0,0 +1,82 @@
+English | [简体中文](README_CN.md)
+# PaddleSeg Python Deployment Example
+
+Before deployment, two steps require confirmation
+
+- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. Install FastDeploy Python whl package. Refer to [FastDeploy Python Installation](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+【Attention】For the deployment of  **PP-Matting**、**PP-HumanMatting** and **ModNet**, refer to [Matting Model Deployment](../../../matting)
+
+This directory provides examples that `infer.py`  fast finishes the deployment of Unet on CPU/GPU and GPU accelerated by TensorRT. The script is as follows
+```bash
+# Download the deployment example code
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/segmentation/paddleseg/python
+
+# Download Unet model files and test images
+wget https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz
+tar -xvf Unet_cityscapes_without_argmax_infer.tgz
+wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
+
+# CPU inference
+python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device cpu
+# GPU inference
+python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device gpu
+# TensorRT inference on GPU（Attention: It is somewhat time-consuming for the operation of model serialization when running TensorRT inference for the first time. Please be patient.）
+python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device gpu --use_trt True
+# kunlunxin XPU inference
+python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device kunlunxin
+```
+
+The visualized result after running is as follows
+<div  align="center">  
+<img src="https://user-images.githubusercontent.com/16222477/191712880-91ae128d-247a-43e0-b1e3-cafae78431e0.jpg", width=512px, height=256px />
+</div>
+
+## PaddleSegModel Python Interface
+
+```python
+fd.vision.segmentation.PaddleSegModel(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+```
+
+PaddleSeg model loading and initialization, among which model_file, params_file, and config_file are the Paddle inference files exported from the training model. Refer to [Model Export](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)  for more information
+
+**Parameter**
+
+> * **model_file**(str): Model file path
+> * **params_file**(str): Parameter file path
+> * **config_file**(str): Inference deployment configuration file
+> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
+> * **model_format**(ModelFormat): Model format. Paddle format by default
+
+### predict function
+
+> ```python
+> PaddleSegModel.predict(input_image)
+> ```
+>
+> Model prediction interface. Input images and output detection results.
+>
+> **Parameter**
+>
+> > * **input_image**(np.ndarray): Input data in HWC or BGR format
+
+> **Return**
+>
+> > Return `fastdeploy.vision.SegmentationResult` structure. Refer to [Vision Model Prediction Results](../../../../../docs/api/vision_results/) for the description of the structure.
+
+### Class Member Variable
+#### Pre-processing Parameter
+Users can modify the following pre-processing parameters to their needs, which affects the final inference and deployment results
+
+> > * **is_vertical_screen**(bool): For PP-HumanSeg models, the input image is portrait with height greater than width by setting this parameter to `true`
+#### Post-processing Parameter
+> > * **apply_softmax**(bool): The `apply_softmax` parameter is not specified when the model is exported. Set this parameter to `true` to normalize the probability result (score_map) of the predicted output segmentation label (label_map) in softmax
+
+## Other Documents
+
+- [PaddleSeg Model Description](..)
+- [PaddleSeg C++ Deployment](../cpp)
+- [Model Prediction Results](../../../../../docs/api/vision_results/)
+- [How to switch the model inference backend engine](../../../../../docs/cn/faq/how_to_change_backend.md)
diff --git a/examples/vision/segmentation/paddleseg/ascend/python/README_CN.md b/examples/vision/segmentation/paddleseg/ascend/python/README_CN.md
new file mode 100644
index 000000000..909784fd3
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/ascend/python/README_CN.md
@@ -0,0 +1,79 @@
+[English](README.md) | 简体中文
+# PaddleSeg Python部署示例
+
+本目录下提供`infer.py`快速完成PP-LiteSeg在华为昇腾上部署的示例。
+
+在部署前，需自行编译基于华为昇腾NPU的FastDeploy python wheel包，参考文档[华为昇腾NPU部署环境编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/huawei_ascend.md)，编译python wheel包并安装
+
+>>**注意** **PP-Matting**、**PP-HumanMatting**的模型，请从[Matting模型部署](../../../matting)下载
+
+
+```bash
+#下载部署示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/segmentation/paddleseg/ascend/cpp
+
+# 下载PP-LiteSeg模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
+tar -xvf PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
+wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
+
+# 华为昇腾推理
+python infer.py --model PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer --image cityscapes_demo.png
+```
+
+运行完成可视化结果如下图所示
+<div  align="center">  
+<img src="https://user-images.githubusercontent.com/16222477/191712880-91ae128d-247a-43e0-b1e3-cafae78431e0.jpg", width=512px, height=256px />
+</div>
+
+## PaddleSegModel Python接口
+
+```python
+fd.vision.segmentation.PaddleSegModel(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+```
+
+PaddleSeg模型加载和初始化，其中model_file, params_file以及config_file为训练模型导出的Paddle inference文件，具体请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)
+
+**参数**
+
+> * **model_file**(str): 模型文件路径
+> * **params_file**(str): 参数文件路径
+> * **config_file**(str): 推理部署配置文件
+> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
+> * **model_format**(ModelFormat): 模型格式，默认为Paddle格式
+
+### predict函数
+
+> ```python
+> PaddleSegModel.predict(input_image)
+> ```
+>
+> 模型预测结口，输入图像直接输出检测结果。
+>
+> **参数**
+>
+> > * **input_image**(np.ndarray): 输入数据，注意需为HWC，BGR格式
+
+> **返回**
+>
+> > 返回`fastdeploy.vision.SegmentationResult`结构体，SegmentationResult结构体说明参考[SegmentationResult结构体介绍](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
+
+### 类成员属性
+#### 预处理参数
+用户可按照自己的实际需求，修改下列预处理参数，从而影响最终的推理和部署效果
+
+> > * **is_vertical_screen**(bool): PP-HumanSeg系列模型通过设置此参数为`true`表明输入图片是竖屏，即height大于width的图片
+
+#### 后处理参数
+> > * **apply_softmax**(bool): 当模型导出时，并未指定`apply_softmax`参数，可通过此设置此参数为`true`，将预测的输出分割标签（label_map）对应的概率结果(score_map)做softmax归一化处理
+
+## 快速链接
+
+- [PaddleSeg 模型介绍](..)
+- [PaddleSeg C++部署](../cpp)
+
+## 常见问题
+- [如何将模型预测结果SegmentationResult转为numpy格式](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [PaddleSeg python API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/semantic_segmentation.html)
diff --git a/examples/vision/segmentation/paddleseg/ascend/python/infer.py b/examples/vision/segmentation/paddleseg/ascend/python/infer.py
new file mode 100755
index 000000000..180f30e80
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/ascend/python/infer.py
@@ -0,0 +1,34 @@
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+    import argparse
+    import ast
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model", required=True, help="Path of PaddleSeg model.")
+    parser.add_argument(
+        "--image", type=str, required=True, help="Path of test image file.")
+    return parser.parse_args()
+
+
+runtime_option = fd.RuntimeOption()
+runtime_option.use_ascend()
+
+# 配置runtime，加载模型
+model_file = os.path.join(args.model, "model.pdmodel")
+params_file = os.path.join(args.model, "model.pdiparams")
+config_file = os.path.join(args.model, "deploy.yaml")
+model = fd.vision.segmentation.PaddleSegModel(
+    model_file, params_file, config_file, runtime_option=runtime_option)
+
+# 预测图片分割结果
+im = cv2.imread(args.image)
+result = model.predict(im)
+print(result)
+
+# 可视化结果
+vis_im = fd.vision.vis_segmentation(im, result, weight=0.5)
+cv2.imwrite("vis_img.png", vis_im)
diff --git a/examples/vision/segmentation/paddleseg/cpu-gpu/README_CN.md b/examples/vision/segmentation/paddleseg/cpu-gpu/README_CN.md
index a4ff4af5e..0109ac01a 100644
--- a/examples/vision/segmentation/paddleseg/cpu-gpu/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/cpu-gpu/README_CN.md
@@ -1,5 +1,7 @@
 # 使用FastDeploy部署PaddleSeg模型
 
+FastDeploy支持在NVIDIA GPU、X86 CPU、飞腾CPU、ARM CPU、Intel GPU(独立显卡/集成显卡)硬件上部署PaddleSeg模型
+
 ## 模型版本说明
 
 - [PaddleSeg develop](https://github.com/PaddlePaddle/PaddleSeg/tree/develop)
@@ -13,7 +15,7 @@
 - [DeepLabV3系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/deeplabv3/README.md)
 - [SegFormer系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/segformer/README.md)
 
-【注意】如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../matting/)
+>>**注意**】如部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../matting/)
 
 ## 准备PaddleSeg部署模型
 PaddleSeg模型导出，请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)  
diff --git a/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/README_CN.md b/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/README_CN.md
index c5d39934b..7618e6f15 100644
--- a/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/README_CN.md
@@ -82,7 +82,7 @@ PaddleSegModel模型加载和初始化，其中model_file为导出的Paddle模
 > **参数**
 >
 > > * **im**: 输入图像，注意需为HWC，BGR格式
-> > * **result**: 分割结果，包括分割预测的标签以及标签对应的概率值, SegmentationResult说明参考[SegmentationResult结构体介绍](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
+> > * **result**: 分割结果，包括分割预测的标签以及标签对应的概率值, SegmentationResult结构体说明参考[SegmentationResult结构体介绍](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
 
 ### 类成员属性
 #### 预处理参数
diff --git a/examples/vision/segmentation/paddleseg/cpu-gpu/python/README.md b/examples/vision/segmentation/paddleseg/cpu-gpu/python/README.md
index add5b053d..d37d92c9e 100755
--- a/examples/vision/segmentation/paddleseg/cpu-gpu/python/README.md
+++ b/examples/vision/segmentation/paddleseg/cpu-gpu/python/README.md
@@ -40,7 +40,7 @@ The visualized result after running is as follows
 fd.vision.segmentation.PaddleSegModel(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
 ```
 
-PaddleSeg model loading and initialization, among which model_file, params_file, and config_file are the Paddle inference files exported from the training model. Refer to [Model Export](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/docs/model_export_cn.md)  for more information
+PaddleSeg model loading and initialization, among which model_file, params_file, and config_file are the Paddle inference files exported from the training model. Refer to [Model Export](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)  for more information
 
 **Parameter**
 
diff --git a/examples/vision/segmentation/paddleseg/cpu-gpu/python/README_CN.md b/examples/vision/segmentation/paddleseg/cpu-gpu/python/README_CN.md
index 1e31bd014..da23623b2 100644
--- a/examples/vision/segmentation/paddleseg/cpu-gpu/python/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/cpu-gpu/python/README_CN.md
@@ -39,7 +39,7 @@ python infer.py --model PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer --ima
 fd.vision.segmentation.PaddleSegModel(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
 ```
 
-PaddleSeg模型加载和初始化，其中model_file, params_file以及config_file为训练模型导出的Paddle inference文件，具体请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/docs/model_export_cn.md)
+PaddleSeg模型加载和初始化，其中model_file, params_file以及config_file为训练模型导出的Paddle inference文件，具体请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)
 
 **参数**
 
diff --git a/examples/vision/segmentation/paddleseg/kunlun/README_CN.md b/examples/vision/segmentation/paddleseg/kunlun/README_CN.md
index a4ff4af5e..5fba79c12 100644
--- a/examples/vision/segmentation/paddleseg/kunlun/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/kunlun/README_CN.md
@@ -13,7 +13,7 @@
 - [DeepLabV3系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/deeplabv3/README.md)
 - [SegFormer系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/segformer/README.md)
 
-【注意】如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../matting/)
+>>**注意** 若需要在华为昇腾上部署**PP-Matting**、**PP-HumanMatting**请从[Matting模型部署](../../matting/)下载对应模型，部署过程与此文档一致
 
 ## 准备PaddleSeg部署模型
 PaddleSeg模型导出，请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)  
diff --git a/examples/vision/segmentation/paddleseg/kunlun/cpp/README_CN.md b/examples/vision/segmentation/paddleseg/kunlun/cpp/README_CN.md
index df99e324e..55c6996fc 100644
--- a/examples/vision/segmentation/paddleseg/kunlun/cpp/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/kunlun/cpp/README_CN.md
@@ -1,42 +1,30 @@
 [English](README.md) | 简体中文
 # PaddleSeg C++部署示例
 
-本目录下提供`infer.cc`快速完成Unet在CPU/GPU，以及GPU上通过TensorRT加速部署的示例。
+本目录下提供`infer.cc`快速完成PP-LiteSeg在华为昇腾上部署的示例。
 
-在部署前，需确认以下两个步骤
+在部署前，需自行编译基于昆仑芯XPU的预测库，参考文档[昆仑芯XPU部署环境编译安装](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/kunlunxin.md)
 
-- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
-- 2. 根据开发环境，下载预编译部署库和samples代码，参考[FastDeploy预编译库](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
-
-【注意】如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../../matting)
-
-以Linux上推理为例，在本目录执行如下命令即可完成编译测试，支持此模型需保证FastDeploy版本1.0.0以上(x.x.x>=1.0.0)
+>>**注意** **PP-Matting**、**PP-HumanMatting**的模型，请从[Matting模型部署](../../../matting)下载
 
 ```bash
+#下载部署示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/segmentation/paddleseg/ascend/cpp
+
 mkdir build
 cd build
-# 下载FastDeploy预编译库，用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
-wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
-tar xvf fastdeploy-linux-x64-x.x.x.tgz
-cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+# 使用编译完成的FastDeploy库编译infer_demo
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-ascend
 make -j
 
-# 下载Unet模型文件和测试图片
-wget https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz
-tar -xvf Unet_cityscapes_without_argmax_infer.tgz
+# 下载PP-LiteSeg模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
+tar -xvf PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
 wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
 
-
-# CPU推理
-./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 0
-# GPU推理
-./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 1
-# GPU上TensorRT推理
-./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 2
-# 昆仑芯XPU推理
-./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 3
 # 华为昇腾推理
-./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 4
+./infer_demo PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer cityscapes_demo.png
 ```
 
 运行完成可视化结果如下图所示
@@ -44,12 +32,6 @@ wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
 <img src="https://user-images.githubusercontent.com/16222477/191712880-91ae128d-247a-43e0-b1e3-cafae78431e0.jpg", width=512px, height=256px />
 </div>
 
-以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:  
-- [如何在Windows中使用FastDeploy C++ SDK](../../../../../docs/cn/faq/use_sdk_on_windows.md)
-
-如果用户使用华为昇腾NPU部署, 请参考以下方式在部署前初始化部署环境:
-- [如何使用华为昇腾NPU部署](../../../../../docs/cn/faq/use_sdk_on_ascend.md)
-
 ## PaddleSeg C++接口
 
 ### PaddleSeg类
@@ -84,7 +66,7 @@ PaddleSegModel模型加载和初始化，其中model_file为导出的Paddle模
 > **参数**
 >
 > > * **im**: 输入图像，注意需为HWC，BGR格式
-> > * **result**: 分割结果，包括分割预测的标签以及标签对应的概率值, SegmentationResult说明参考[视觉模型预测结果](../../../../../docs/api/vision_results/)
+> > * **result**: 分割结果，包括分割预测的标签以及标签对应的概率值, SegmentationResult结构体说明参考[SegmentationResult结构体介绍](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
 
 ### 类成员属性
 #### 预处理参数
@@ -95,7 +77,12 @@ PaddleSegModel模型加载和初始化，其中model_file为导出的Paddle模
 #### 后处理参数
 > > * **apply_softmax**(bool): 当模型导出时，并未指定`apply_softmax`参数，可通过此设置此参数为`true`，将预测的输出分割标签（label_map）对应的概率结果(score_map)做softmax归一化处理
 
-- [模型介绍](../../)
+## 快速链接
+- [PaddleSeg模型介绍](../../)
 - [Python部署](../python)
-- [视觉模型预测结果](../../../../../docs/api/vision_results/)
-- [如何切换模型推理后端引擎](../../../../../docs/cn/faq/how_to_change_backend.md)
+
+## 常见问题
+- [如何将模型预测结果SegmentationResult转为numpy格式](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [PaddleSeg C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1segmentation.html)
+)
diff --git a/examples/vision/segmentation/paddleseg/kunlun/cpp/infer.cc b/examples/vision/segmentation/paddleseg/kunlun/cpp/infer.cc
index e4d6e39d9..c695cd732 100644
--- a/examples/vision/segmentation/paddleseg/kunlun/cpp/infer.cc
+++ b/examples/vision/segmentation/paddleseg/kunlun/cpp/infer.cc
@@ -20,34 +20,6 @@ const char sep = '\\';
 const char sep = '/';
 #endif
 
-void CpuInfer(const std::string& model_dir, const std::string& image_file) {
-  auto model_file = model_dir + sep + "model.pdmodel";
-  auto params_file = model_dir + sep + "model.pdiparams";
-  auto config_file = model_dir + sep + "deploy.yaml";
-  auto option = fastdeploy::RuntimeOption();
-  option.UseCpu();
-  auto model = fastdeploy::vision::segmentation::PaddleSegModel(
-      model_file, params_file, config_file, option);
-
-  if (!model.Initialized()) {
-    std::cerr << "Failed to initialize." << std::endl;
-    return;
-  }
-
-  auto im = cv::imread(image_file);
-
-  fastdeploy::vision::SegmentationResult res;
-  if (!model.Predict(im, &res)) {
-    std::cerr << "Failed to predict." << std::endl;
-    return;
-  }
-
-  std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::VisSegmentation(im, res, 0.5);
-  cv::imwrite("vis_result.jpg", vis_im);
-  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
-}
-
 void KunlunXinInfer(const std::string& model_dir,
                     const std::string& image_file) {
   auto model_file = model_dir + sep + "model.pdmodel";
@@ -77,116 +49,14 @@ void KunlunXinInfer(const std::string& model_dir,
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
 
-void GpuInfer(const std::string& model_dir, const std::string& image_file) {
-  auto model_file = model_dir + sep + "model.pdmodel";
-  auto params_file = model_dir + sep + "model.pdiparams";
-  auto config_file = model_dir + sep + "deploy.yaml";
-
-  auto option = fastdeploy::RuntimeOption();
-  option.UseGpu();
-  auto model = fastdeploy::vision::segmentation::PaddleSegModel(
-      model_file, params_file, config_file, option);
-
-  if (!model.Initialized()) {
-    std::cerr << "Failed to initialize." << std::endl;
-    return;
-  }
-
-  auto im = cv::imread(image_file);
-
-  fastdeploy::vision::SegmentationResult res;
-  if (!model.Predict(im, &res)) {
-    std::cerr << "Failed to predict." << std::endl;
-    return;
-  }
-
-  std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::VisSegmentation(im, res, 0.5);
-  cv::imwrite("vis_result.jpg", vis_im);
-  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
-}
-
-void TrtInfer(const std::string& model_dir, const std::string& image_file) {
-  auto model_file = model_dir + sep + "model.pdmodel";
-  auto params_file = model_dir + sep + "model.pdiparams";
-  auto config_file = model_dir + sep + "deploy.yaml";
-
-  auto option = fastdeploy::RuntimeOption();
-  option.UseGpu();
-  option.UseTrtBackend();
-  auto model = fastdeploy::vision::segmentation::PaddleSegModel(
-      model_file, params_file, config_file, option);
-
-  if (!model.Initialized()) {
-    std::cerr << "Failed to initialize." << std::endl;
-    return;
-  }
-
-  auto im = cv::imread(image_file);
-
-  fastdeploy::vision::SegmentationResult res;
-  if (!model.Predict(im, &res)) {
-    std::cerr << "Failed to predict." << std::endl;
-    return;
-  }
-
-  std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::VisSegmentation(im, res, 0.5);
-  cv::imwrite("vis_result.jpg", vis_im);
-  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
-}
-
-void AscendInfer(const std::string& model_dir, const std::string& image_file) {
-  auto model_file = model_dir + sep + "model.pdmodel";
-  auto params_file = model_dir + sep + "model.pdiparams";
-  auto config_file = model_dir + sep + "deploy.yaml";
-  auto option = fastdeploy::RuntimeOption();
-  option.UseAscend();
-  auto model = fastdeploy::vision::segmentation::PaddleSegModel(
-      model_file, params_file, config_file, option);
-
-  if (!model.Initialized()) {
-    std::cerr << "Failed to initialize." << std::endl;
-    return;
-  }
-
-  auto im = cv::imread(image_file);
-
-  fastdeploy::vision::SegmentationResult res;
-  if (!model.Predict(im, &res)) {
-    std::cerr << "Failed to predict." << std::endl;
-    return;
-  }
-
-  std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::VisSegmentation(im, res, 0.5);
-  cv::imwrite("vis_result.jpg", vis_im);
-  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
-}
-
 int main(int argc, char* argv[]) {
-  if (argc < 4) {
+  if (argc < 3) {
     std::cout
         << "Usage: infer_demo path/to/model_dir path/to/image run_option, "
-           "e.g ./infer_model ./ppseg_model_dir ./test.jpeg 0"
+           "e.g ./infer_model ./ppseg_model_dir ./test.jpeg"
         << std::endl;
-    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run "
-                 "with kunlunxin."
-              << std::endl;
     return -1;
   }
-
-  if (std::atoi(argv[3]) == 0) {
-    CpuInfer(argv[1], argv[2]);
-  } else if (std::atoi(argv[3]) == 1) {
-    GpuInfer(argv[1], argv[2]);
-  } else if (std::atoi(argv[3]) == 2) {
-    TrtInfer(argv[1], argv[2]);
-  } else if (std::atoi(argv[3]) == 3) {
-    KunlunXinInfer(argv[1], argv[2]);
-  } else if (std::atoi(argv[3]) == 4) {
-    AscendInfer(argv[1], argv[2]);
-  }
+  KunlunXinInfer(argv[1], argv[2]);
   return 0;
 }
diff --git a/examples/vision/segmentation/paddleseg/kunlun/python/README.md b/examples/vision/segmentation/paddleseg/kunlun/python/README.md
index add5b053d..d37d92c9e 100755
--- a/examples/vision/segmentation/paddleseg/kunlun/python/README.md
+++ b/examples/vision/segmentation/paddleseg/kunlun/python/README.md
@@ -40,7 +40,7 @@ The visualized result after running is as follows
 fd.vision.segmentation.PaddleSegModel(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
 ```
 
-PaddleSeg model loading and initialization, among which model_file, params_file, and config_file are the Paddle inference files exported from the training model. Refer to [Model Export](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/docs/model_export_cn.md)  for more information
+PaddleSeg model loading and initialization, among which model_file, params_file, and config_file are the Paddle inference files exported from the training model. Refer to [Model Export](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)  for more information
 
 **Parameter**
 
diff --git a/examples/vision/segmentation/paddleseg/kunlun/python/README_CN.md b/examples/vision/segmentation/paddleseg/kunlun/python/README_CN.md
index 61edc5b2b..7ce98b44e 100644
--- a/examples/vision/segmentation/paddleseg/kunlun/python/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/kunlun/python/README_CN.md
@@ -1,35 +1,25 @@
 [English](README.md) | 简体中文
 # PaddleSeg Python部署示例
 
-在部署前，需确认以下两个步骤
+本目录下提供`infer.py`快速完成PP-LiteSeg在华为昇腾上部署的示例。
 
-- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
-- 2. FastDeploy Python whl包安装，参考[FastDeploy Python安装](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+在部署前，需自行编译基于昆仑芯XPU的FastDeploy wheel 包，参考文档[昆仑芯XPU部署环境编译安装](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/kunlunxin.md)，编译python wheel包并安装
 
-【注意】如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../../matting)
+>>**注意** **PP-Matting**、**PP-HumanMatting**的模型，请从[Matting模型部署](../../../matting)下载
 
-本目录下提供`infer.py`快速完成Unet在CPU/GPU，以及GPU上通过TensorRT加速部署的示例。执行如下脚本即可完成
 
 ```bash
 #下载部署示例代码
 git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd FastDeploy/examples/vision/segmentation/paddleseg/python
+cd FastDeploy/examples/vision/segmentation/paddleseg/ascend/cpp
 
-# 下载Unet模型文件和测试图片
-wget https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz
-tar -xvf Unet_cityscapes_without_argmax_infer.tgz
+# 下载PP-LiteSeg模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
+tar -xvf PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
 wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
 
-# CPU推理
-python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device cpu
-# GPU推理
-python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device gpu
-# GPU上使用TensorRT推理 （注意：TensorRT推理第一次运行，有序列化模型的操作，有一定耗时，需要耐心等待）
-python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device gpu --use_trt True
-# 昆仑芯XPU推理
-python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device kunlunxin
 # 华为昇腾推理
-python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device ascend
+python infer.py --model PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer --image cityscapes_demo.png
 ```
 
 运行完成可视化结果如下图所示
@@ -43,7 +33,7 @@ python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_
 fd.vision.segmentation.PaddleSegModel(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
 ```
 
-PaddleSeg模型加载和初始化，其中model_file, params_file以及config_file为训练模型导出的Paddle inference文件，具体请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/docs/model_export_cn.md)
+PaddleSeg模型加载和初始化，其中model_file, params_file以及config_file为训练模型导出的Paddle inference文件，具体请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)
 
 **参数**
 
@@ -67,7 +57,7 @@ PaddleSeg模型加载和初始化，其中model_file, params_file以及config_fi
 
 > **返回**
 >
-> > 返回`fastdeploy.vision.SegmentationResult`结构体，结构体说明参考文档[视觉模型预测结果](../../../../../docs/api/vision_results/)
+> > 返回`fastdeploy.vision.SegmentationResult`结构体，SegmentationResult结构体说明参考[SegmentationResult结构体介绍](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
 
 ### 类成员属性
 #### 预处理参数
@@ -78,9 +68,12 @@ PaddleSeg模型加载和初始化，其中model_file, params_file以及config_fi
 #### 后处理参数
 > > * **apply_softmax**(bool): 当模型导出时，并未指定`apply_softmax`参数，可通过此设置此参数为`true`，将预测的输出分割标签（label_map）对应的概率结果(score_map)做softmax归一化处理
 
-## 其它文档
+## 快速链接
 
 - [PaddleSeg 模型介绍](..)
 - [PaddleSeg C++部署](../cpp)
-- [模型预测结果说明](../../../../../docs/api/vision_results/)
-- [如何切换模型推理后端引擎](../../../../../docs/cn/faq/how_to_change_backend.md)
+
+## 常见问题
+- [如何将模型预测结果SegmentationResult转为numpy格式](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [PaddleSeg python API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/semantic_segmentation.html)
diff --git a/examples/vision/segmentation/paddleseg/kunlun/python/infer.py b/examples/vision/segmentation/paddleseg/kunlun/python/infer.py
index 6862330ed..bfbde415f 100755
--- a/examples/vision/segmentation/paddleseg/kunlun/python/infer.py
+++ b/examples/vision/segmentation/paddleseg/kunlun/python/infer.py
@@ -11,42 +11,13 @@ def parse_arguments():
         "--model", required=True, help="Path of PaddleSeg model.")
     parser.add_argument(
         "--image", type=str, required=True, help="Path of test image file.")
-    parser.add_argument(
-        "--device",
-        type=str,
-        default='cpu',
-        help="Type of inference device, support 'kunlunxin', 'cpu' or 'gpu'.")
-    parser.add_argument(
-        "--use_trt",
-        type=ast.literal_eval,
-        default=False,
-        help="Wether to use tensorrt.")
     return parser.parse_args()
 
 
-def build_option(args):
-    option = fd.RuntimeOption()
-
-    if args.device.lower() == "gpu":
-        option.use_gpu()
-
-    if args.device.lower() == "kunlunxin":
-        option.use_kunlunxin()
-
-    if args.device.lower() == "ascend":
-        option.use_ascend()
-
-    if args.use_trt:
-        option.use_trt_backend()
-        option.set_trt_input_shape("x", [1, 3, 256, 256], [1, 3, 1024, 1024],
-                                   [1, 3, 2048, 2048])
-    return option
-
-
-args = parse_arguments()
+runtime_option = fd.RuntimeOption()
+runtime_option.use_kunlunxin()
 
 # 配置runtime，加载模型
-runtime_option = build_option(args)
 model_file = os.path.join(args.model, "model.pdmodel")
 params_file = os.path.join(args.model, "model.pdiparams")
 config_file = os.path.join(args.model, "deploy.yaml")
diff --git a/examples/vision/segmentation/paddleseg/kunlun/python/serving/README.md b/examples/vision/segmentation/paddleseg/kunlun/python/serving/README.md
deleted file mode 100644
index da41a3a00..000000000
--- a/examples/vision/segmentation/paddleseg/kunlun/python/serving/README.md
+++ /dev/null
@@ -1,36 +0,0 @@
-English | [简体中文](README_CN.md)
-
-# PaddleSegmentation Python Simple Serving Demo
-
-
-## Environment
-
-- 1. Prepare environment and install FastDeploy Python whl, refer to [download_prebuilt_libraries](../../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
-
-Server:
-```bash
-# Download demo code
-git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd FastDeploy/examples/vision/segmentation/paddleseg/python/serving
-
-# Download PP_LiteSeg model
-wget  https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
-tar -xvf PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
-
-# Launch server, change the configurations in server.py to select hardware, backend, etc.
-# and use --host, --port to specify IP and port
-fastdeploy simple_serving --app server:app
-```
-
-Client:
-```bash
-# Download demo code
-git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd FastDeploy/examples/vision/segmentation/paddleseg/python/serving
-
-# Download test image
-wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
-
-# Send request and get inference result (Please adapt the IP and port if necessary)
-python client.py
-```
diff --git a/examples/vision/segmentation/paddleseg/kunlun/python/serving/README_CN.md b/examples/vision/segmentation/paddleseg/kunlun/python/serving/README_CN.md
deleted file mode 100644
index 3f382c904..000000000
--- a/examples/vision/segmentation/paddleseg/kunlun/python/serving/README_CN.md
+++ /dev/null
@@ -1,36 +0,0 @@
-简体中文 | [English](README.md)
-
-# PaddleSegmentation Python轻量服务化部署示例
-
-在部署前，需确认以下两个步骤
-
-- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
-- 2. FastDeploy Python whl包安装，参考[FastDeploy Python安装](../../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
-
-服务端：
-```bash
-# 下载部署示例代码
-git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd FastDeploy/examples/vision/segmentation/paddleseg/python/serving
-
-# 下载PP_LiteSeg模型文件
-wget  https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
-tar -xvf PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
-
-# 启动服务，可修改server.py中的配置项来指定硬件、后端等
-# 可通过--host、--port指定IP和端口号
-fastdeploy simple_serving --app server:app
-```
-
-客户端：
-```bash
-# 下载部署示例代码
-git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd FastDeploy/examples/vision/detection/paddledetection/python/serving
-
-# 下载测试图片
-wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
-
-# 请求服务，获取推理结果（如有必要，请修改脚本中的IP和端口号）
-python client.py
-```
diff --git a/examples/vision/segmentation/paddleseg/kunlun/python/serving/client.py b/examples/vision/segmentation/paddleseg/kunlun/python/serving/client.py
deleted file mode 100644
index e652c4462..000000000
--- a/examples/vision/segmentation/paddleseg/kunlun/python/serving/client.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import requests
-import json
-import cv2
-import fastdeploy as fd
-from fastdeploy.serving.utils import cv2_to_base64
-
-if __name__ == '__main__':
-    url = "http://127.0.0.1:8000/fd/ppliteseg"
-    headers = {"Content-Type": "application/json"}
-
-    im = cv2.imread("cityscapes_demo.png")
-    data = {"data": {"image": cv2_to_base64(im)}, "parameters": {}}
-
-    resp = requests.post(url=url, headers=headers, data=json.dumps(data))
-    if resp.status_code == 200:
-        r_json = json.loads(resp.json()["result"])
-        result = fd.vision.utils.json_to_segmentation(r_json)
-        vis_im = fd.vision.vis_segmentation(im, result, weight=0.5)
-        cv2.imwrite("visualized_result.jpg", vis_im)
-        print("Visualized result save in ./visualized_result.jpg")
-    else:
-        print("Error code:", resp.status_code)
-        print(resp.text)
diff --git a/examples/vision/segmentation/paddleseg/kunlun/python/serving/server.py b/examples/vision/segmentation/paddleseg/kunlun/python/serving/server.py
deleted file mode 100644
index 2ae2df09c..000000000
--- a/examples/vision/segmentation/paddleseg/kunlun/python/serving/server.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import fastdeploy as fd
-from fastdeploy.serving.server import SimpleServer
-import os
-import logging
-
-logging.getLogger().setLevel(logging.INFO)
-
-# Configurations
-model_dir = 'PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer'
-device = 'cpu'
-use_trt = False
-
-# Prepare model
-model_file = os.path.join(model_dir, "model.pdmodel")
-params_file = os.path.join(model_dir, "model.pdiparams")
-config_file = os.path.join(model_dir, "deploy.yaml")
-
-# Setup runtime option to select hardware, backend, etc.
-option = fd.RuntimeOption()
-if device.lower() == 'gpu':
-    option.use_gpu()
-if use_trt:
-    option.use_trt_backend()
-    option.set_trt_cache_file('pp_lite_seg.trt')
-
-# Create model instance
-model_instance = fd.vision.segmentation.PaddleSegModel(
-    model_file=model_file,
-    params_file=params_file,
-    config_file=config_file,
-    runtime_option=option)
-
-# Create server, setup REST API
-app = SimpleServer()
-app.register(
-    task_name="fd/ppliteseg",
-    model_handler=fd.serving.handler.VisionModelHandler,
-    predictor=model_instance)
diff --git a/examples/vision/segmentation/paddleseg/quantize/cpp/README.md b/examples/vision/segmentation/paddleseg/quantize/cpp/README.md
deleted file mode 100755
index 9eb7c9146..000000000
--- a/examples/vision/segmentation/paddleseg/quantize/cpp/README.md
+++ /dev/null
@@ -1,32 +0,0 @@
-English | [简体中文](README_CN.md)
-# PaddleSeg Quantitative Model C++ Deployment Example
- `infer.cc` in this directory can help you quickly complete the inference acceleration of PaddleSeg quantization model deployment on CPU.
-
-## Deployment Preparations
-### FastDeploy Environment Preparations
-- 1. For the software and hardware requirements, please refer to [FastDeploy Environment Requirements](../../../../../../docs/en/build_and_install/download_prebuilt_libraries.md).
-- 2. For the installation of FastDeploy Python whl package, please refer to [FastDeploy Python Installation](../../../../../../docs/en/build_and_install/download_prebuilt_libraries.md).
-
-### Quantized Model Preparations
-- 1. You can directly use the quantized model provided by FastDeploy for deployment.
-- 2. You can use [one-click automatical compression tool](../../../../../../tools/common_tools/auto_compression/) provided by FastDeploy to quantize model by yourself, and use the generated quantized model for deployment.(Note: The quantized classification model still needs the deploy.yaml file in the FP32 model folder. Self-quantized model folder does not contain this yaml file, you can copy it from the FP32 model folder to the quantized model folder.)
-
-## Take the Quantized PP_LiteSeg_T_STDC1_cityscapes Model as an example for Deployment
-Run the following commands in this directory to compile and deploy the quantized model. FastDeploy version 0.7.0 or higher is required (x.x.x>=0.7.0).
-```bash
-mkdir build
-cd build
-# Download pre-compiled FastDeploy libraries. You can choose the appropriate version from `pre-compiled FastDeploy libraries` mentioned above.
-wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
-tar xvf fastdeploy-linux-x64-x.x.x.tgz
-cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
-make -j
-
-# Download the PP_LiteSeg_T_STDC1_cityscapes quantized model and test images provided by FastDeloy.
-wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_PTQ.tar
-tar -xvf PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_PTQ.tar
-wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
-
-# Use Paddle-Inference inference quantization model on CPU.
-./infer_demo PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_PTQ cityscapes_demo.png 1
-```
diff --git a/examples/vision/segmentation/paddleseg/quantize/cpp/README_CN.md b/examples/vision/segmentation/paddleseg/quantize/cpp/README_CN.md
deleted file mode 100644
index c4cde0b1f..000000000
--- a/examples/vision/segmentation/paddleseg/quantize/cpp/README_CN.md
+++ /dev/null
@@ -1,32 +0,0 @@
-[English](README.md) | 简体中文
-# PaddleSeg 量化模型 C++部署示例
-本目录下提供的`infer.cc`,可以帮助用户快速完成PaddleSeg量化模型在CPU上的部署推理加速.
-
-## 部署准备
-### FastDeploy环境准备
-- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
-- 2. FastDeploy Python whl包安装，参考[FastDeploy Python安装](../../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
-
-### 量化模型准备
-- 1. 用户可以直接使用由FastDeploy提供的量化模型进行部署.
-- 2. 用户可以使用FastDeploy提供的[一键模型自动化压缩工具](../../../../../../tools/common_tools/auto_compression/),自行进行模型量化, 并使用产出的量化模型进行部署.(注意: 推理量化后的分类模型仍然需要FP32模型文件夹下的deploy.yaml文件, 自行量化的模型文件夹内不包含此yaml文件, 用户从FP32模型文件夹下复制此yaml文件到量化后的模型文件夹内即可.)
-
-## 以量化后的PP_LiteSeg_T_STDC1_cityscapes模型为例, 进行部署
-在本目录执行如下命令即可完成编译,以及量化模型部署.支持此模型需保证FastDeploy版本0.7.0以上(x.x.x>=0.7.0)
-```bash
-mkdir build
-cd build
-# 下载FastDeploy预编译库，用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
-wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
-tar xvf fastdeploy-linux-x64-x.x.x.tgz
-cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
-make -j
-
-# 下载FastDeloy提供的PP_LiteSeg_T_STDC1_cityscapes量化模型文件和测试图片
-wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_PTQ.tar
-tar -xvf PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_PTQ.tar
-wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
-
-# 在CPU上使用Paddle-Inference推理量化模型
-./infer_demo PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_PTQ cityscapes_demo.png 1
-```
diff --git a/examples/vision/segmentation/paddleseg/quantize/python/README.md b/examples/vision/segmentation/paddleseg/quantize/python/README.md
deleted file mode 100755
index 5607e1a80..000000000
--- a/examples/vision/segmentation/paddleseg/quantize/python/README.md
+++ /dev/null
@@ -1,29 +0,0 @@
-English | [简体中文](README_CN.md)
-# PaddleSeg Quantitative Model Python Deployment Example
- `infer.py` in this directory can help you quickly complete the inference acceleration of PaddleSeg quantization model deployment on CPU/GPU.
-
-## Deployment Preparations
-### FastDeploy Environment Preparations
-- 1. For the software and hardware requirements, please refer to [FastDeploy Environment Requirements](../../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)  
-- 2. For the installation of FastDeploy Python whl package, please refer to [FastDeploy Python Installation](../../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
-
-### Quantized Model Preparations
-- 1. You can directly use the quantized model provided by FastDeploy for deployment.
-- 2. You can use [one-click automatical compression tool](../../../../../../tools/common_tools/auto_compression/) provided by FastDeploy to quantize model by yourself, and use the generated quantized model for deployment.(Note: The quantized classification model still needs the deploy.yaml file in the FP32 model folder. Self-quantized model folder does not contain this yaml file, you can copy it from the FP32 model folder to the quantized model folder.)
-
-
-## Take the Quantized PP_LiteSeg_T_STDC1_cityscapes Model as an example for Deployment
-```bash
-# Download sample deployment code.
-git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd examples/vision/segmentation/paddleseg/quantize/python
-
-# Download the PP_LiteSeg_T_STDC1_cityscapes quantized model and test images provided by FastDeloy.
-wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_PTQ.tar
-tar -xvf PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_PTQ.tar
-wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
-
-# Use Paddle-Inference inference quantization model on CPU.
-python infer.py --model PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_QAT --image cityscapes_demo.png --device cpu --backend paddle
-
-```
diff --git a/examples/vision/segmentation/paddleseg/quantize/python/README_CN.md b/examples/vision/segmentation/paddleseg/quantize/python/README_CN.md
deleted file mode 100644
index 1975a84fe..000000000
--- a/examples/vision/segmentation/paddleseg/quantize/python/README_CN.md
+++ /dev/null
@@ -1,29 +0,0 @@
-[English](README.md) | 简体中文
-# PaddleSeg 量化模型 Python部署示例
-本目录下提供的`infer.py`,可以帮助用户快速完成PaddleSeg量化模型在CPU/GPU上的部署推理加速.
-
-## 部署准备
-### FastDeploy环境准备
-- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
-- 2. FastDeploy Python whl包安装，参考[FastDeploy Python安装](../../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
-
-### 量化模型准备
-- 1. 用户可以直接使用由FastDeploy提供的量化模型进行部署.
-- 2. 用户可以使用FastDeploy提供的[一键模型自动化压缩工具](../../../../../../tools/common_tools/auto_compression/),自行进行模型量化, 并使用产出的量化模型进行部署.(注意: 推理量化后的分类模型仍然需要FP32模型文件夹下的deploy.yaml文件, 自行量化的模型文件夹内不包含此yaml文件, 用户从FP32模型文件夹下复制此yaml文件到量化后的模型文件夹内即可.)
-
-
-## 以量化后的PP_LiteSeg_T_STDC1_cityscapes模型为例, 进行部署
-```bash
-# 下载部署示例代码
-git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd examples/vision/segmentation/paddleseg/quantize/python
-
-# 下载FastDeloy提供的PP_LiteSeg_T_STDC1_cityscapes量化模型文件和测试图片
-wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_PTQ.tar
-tar -xvf PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_PTQ.tar
-wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
-
-# 在CPU上使用Paddle-Inference推理量化模型
-python infer.py --model PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_QAT --image cityscapes_demo.png --device cpu --backend paddle
-
-```
diff --git a/examples/vision/segmentation/paddleseg/quantize/python/infer.py b/examples/vision/segmentation/paddleseg/quantize/python/infer.py
deleted file mode 100644
index 85a875c1e..000000000
--- a/examples/vision/segmentation/paddleseg/quantize/python/infer.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import fastdeploy as fd
-import cv2
-import os
-
-
-def parse_arguments():
-    import argparse
-    import ast
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--model", required=True, help="Path of PaddleSeg model.")
-    parser.add_argument(
-        "--image", required=True, help="Path of test image file.")
-    parser.add_argument(
-        "--device",
-        type=str,
-        default='cpu',
-        help="Type of inference device, support 'cpu' or 'gpu'.")
-    parser.add_argument(
-        "--backend",
-        type=str,
-        default="default",
-        help="Type of inference backend, support ort/trt/paddle/openvino, default 'openvino' for cpu, 'tensorrt' for gpu"
-    )
-    parser.add_argument(
-        "--device_id",
-        type=int,
-        default=0,
-        help="Define which GPU card used to run model.")
-    parser.add_argument(
-        "--cpu_thread_num",
-        type=int,
-        default=9,
-        help="Number of threads while inference on CPU.")
-    return parser.parse_args()
-
-
-def build_option(args):
-    option = fd.RuntimeOption()
-    if args.device.lower() == "gpu":
-        option.use_gpu(0)
-
-    option.set_cpu_thread_num(args.cpu_thread_num)
-
-    if args.backend.lower() == "trt":
-        assert args.device.lower(
-        ) == "gpu", "TensorRT backend require inferences on device GPU."
-        option.use_trt_backend()
-        option.set_trt_cache_file(os.path.join(args.model, "model.trt"))
-        option.set_trt_input_shape("x", [1, 3, 256, 256], [1, 3, 1024, 1024],
-                                   [1, 3, 2048, 2048])
-    elif args.backend.lower() == "ort":
-        option.use_ort_backend()
-    elif args.backend.lower() == "paddle":
-        option.use_paddle_infer_backend()
-    elif args.backend.lower() == "openvino":
-        assert args.device.lower(
-        ) == "cpu", "OpenVINO backend require inference on device CPU."
-        option.use_openvino_backend()
-    return option
-
-
-args = parse_arguments()
-
-# 配置runtime，加载模型
-runtime_option = build_option(args)
-model_file = os.path.join(args.model, "model.pdmodel")
-params_file = os.path.join(args.model, "model.pdiparams")
-config_file = os.path.join(args.model, "deploy.yaml")
-model = fd.vision.segmentation.PaddleSegModel(
-    model_file, params_file, config_file, runtime_option=runtime_option)
-
-# 预测图片检测结果
-im = cv2.imread(args.image)
-result = model.predict(im)
-print(result)
diff --git a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/README_CN.md b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/README_CN.md
index 7d10f82f2..b7a1be32a 100644
--- a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/README_CN.md
@@ -6,9 +6,28 @@
 - [PaddleSeg develop](https://github.com/PaddlePaddle/PaddleSeg/tree/develop)
 
 目前FastDeploy使用RKNPU2推理PPSeg支持如下模型的部署:
+- [U-Net系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/unet/README.md)
+- [PP-LiteSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/pp_liteseg/README.md)
+- [PP-HumanSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/contrib/PP-HumanSeg/README.md)
+- [FCN系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/fcn/README.md)
+- [DeepLabV3系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/deeplabv3/README.md)
 
-| 模型                                                                                                                                           | 参数文件大小 | 输入Shape  | mIoU   | mIoU (flip) | mIoU (ms+flip) |
-|:---------------------------------------------------------------------------------------------------------------------------------------------|:-------|:---------|:-------|:------------|:---------------|
+## 准备PaddleSeg部署模型
+PaddleSeg模型导出，请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)  
+
+**注意**
+- PaddleSeg导出的模型包含`model.pdmodel`、`model.pdiparams`和`deploy.yaml`三个文件，FastDeploy会从yaml文件中获取模型在推理时需要的预处理信息
+
+## 下载预训练模型
+
+为了方便开发者的测试，下面提供了PaddleSeg导出的部分模型
+- without-argmax导出方式为：**不指定**`--input_shape`，**指定**`--output_op none`
+- with-argmax导出方式为：**不指定**`--input_shape`，**指定**`--output_op argmax`
+
+开发者可直接下载使用。
+
+| 模型 | 参数文件大小 | 输入Shape  | mIoU   | mIoU (flip) | mIoU (ms+flip) |
+|:----------------|:-------|:---------|:-------|:------------|:---------------|
 | [Unet-cityscapes](https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz)                                       | 52MB   | 1024x512 | 65.00% | 66.02%      | 66.89%         |
 | [PP-LiteSeg-T(STDC1)-cityscapes](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer.tgz)          | 31MB   | 1024x512 | 77.04% | 77.73%      | 77.46%         |
 | [PP-HumanSegV1-Lite(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Lite_infer.tgz)                                      | 543KB  | 192x192  | 86.2%  | -           | -              |
@@ -21,14 +40,16 @@
 
 ## 准备PaddleSeg部署模型以及转换模型
 RKNPU部署模型前需要将Paddle模型转换成RKNN模型，具体步骤如下:
-* Paddle动态图模型转换为ONNX模型，请参考[PaddleSeg模型导出说明](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/contrib/PP-HumanSeg)
-* ONNX模型转换RKNN模型的过程，请参考[转换文档](../../../../../docs/cn/faq/rknpu2/export.md)进行转换。
+* PaddleSeg训练模型导出为推理模型，请参考[PaddleSeg模型导出说明](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)，也可以使用上表中的FastDeploy的预导出模型
+* Paddle模型转换为ONNX模型，请参考[Paddle2ONNX](https://github.com/PaddlePaddle/Paddle2ONNX)
+* ONNX模型转换RKNN模型的过程，请参考[转换文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/export.md)进行转换。
+上述步骤可以可参考以下具体示例
 
 ## 模型转换example
 
 * [PPHumanSeg](./pp_humanseg.md)
 
 ## 详细部署文档
-- [RKNN总体部署教程](../../../../../docs/cn/faq/rknpu2/rknpu2.md)
+- [RKNN总体部署教程](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/rknpu2.md)
 - [C++部署](cpp)
 - [Python部署](python)
diff --git a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/README_CN.md b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/README_CN.md
index 309d5f26c..45bb923a0 100644
--- a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/README_CN.md
@@ -8,7 +8,7 @@
 1. 软硬件环境满足要求
 2. 根据开发环境，下载预编译部署库或者从头编译FastDeploy仓库
 
-以上步骤请参考[RK2代NPU部署库编译](../../../../../../docs/cn/build_and_install/rknpu2.md)实现
+以上步骤请参考[RK2代NPU部署库编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/rknpu2.md)实现
 
 ## 生成基本目录文件
 
@@ -37,7 +37,7 @@ mkdir thirdpartys
 
 ### 编译并拷贝SDK到thirdpartys文件夹
 
-请参考[RK2代NPU部署库编译](../../../../../../docs/cn/build_and_install/rknpu2.md)仓库编译SDK，编译完成后，将在build目录下生成fastdeploy-0.0.3目录，请移动它至thirdpartys目录下.
+请参考[RK2代NPU部署库编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/rknpu2.md)仓库编译SDK，编译完成后，将在build目录下生成fastdeploy-x-x-x目录，请移动它至thirdpartys目录下.
 
 ### 拷贝模型文件，以及配置文件至model文件夹
 在Paddle动态图模型 -> Paddle静态图模型 -> ONNX模型的过程中，将生成ONNX文件以及对应的yaml配置文件，请将配置文件存放到model文件夹内。
diff --git a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/pp_humanseg.md b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/pp_humanseg.md
index e0f458eb0..e212d4e2d 100644
--- a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/pp_humanseg.md
+++ b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/pp_humanseg.md
@@ -2,7 +2,7 @@
 # PPHumanSeg模型部署
 
 ## 转换模型
-下面以Portait-PP-HumanSegV2_Lite(肖像分割模型)为例子，教大家如何转换PPSeg模型到RKNN模型。
+下面以Portait-PP-HumanSegV2_Lite(肖像分割模型)为例子，教大家如何转换PaddleSeg模型到RKNN模型。
 
 ```bash
 # 下载Paddle2ONNX仓库
diff --git a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/python/README_CN.md b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/python/README_CN.md
index b897dc369..0bf8b9396 100644
--- a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/python/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/python/README_CN.md
@@ -3,9 +3,9 @@
 
 在部署前，需确认以下步骤
 
-- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../../docs/cn/build_and_install/rknpu2.md)
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/rknpu2.md)
 
-【注意】如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../../../matting/)
+【注意】如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../../../../matting/)
 
 本目录下提供`infer.py`快速完成PPHumanseg在RKNPU上部署的示例。执行如下脚本即可完成
 
@@ -32,5 +32,5 @@ RKNPU上对模型的输入要求是使用NHWC格式，且图片归一化操作
 
 - [PaddleSeg 模型介绍](..)
 - [PaddleSeg C++部署](../cpp)
-- [模型预测结果说明](../../../../../../docs/api/vision_results/)
-- [转换PPSeg RKNN模型文档](../README.md)
+- [模型预测结果说明](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
+- [转换PaddleSeg模型至RKNN模型文档](../README.md)
diff --git a/examples/vision/segmentation/paddleseg/rockchip/rv1126/README_CN.md b/examples/vision/segmentation/paddleseg/rockchip/rv1126/README_CN.md
index ce4cbb816..2b51362b8 100644
--- a/examples/vision/segmentation/paddleseg/rockchip/rv1126/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/rockchip/rv1126/README_CN.md
@@ -1,12 +1,20 @@
 [English](README.md) | 简体中文
-# PP-LiteSeg 量化模型在 RV1126 上的部署
-目前 FastDeploy 已经支持基于 Paddle Lite 部署 PP-LiteSeg 量化模型到 RV1126 上。
+# 在瑞芯微 RV1126 上使用 FastDeploy 部署 PaddleSeg 模型
+瑞芯微 RV1126 是一款编解码芯片，专门面相人工智能的机器视觉领域。目前，FastDeploy 支持在 RV1126 上基于 Paddle-Lite 部署 PaddleSeg 相关模型
 
-模型的量化和量化模型的下载请参考：[模型量化](../quantize/README.md)
+## 瑞芯微 RV1126 支持的PaddleSeg模型
+由于瑞芯微 RV1126 的 NPU 仅支持 INT8 量化模型的部署，因此所支持的量化模型如下：
+- [PP-LiteSeg 系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/pp_liteseg/README.md)
 
+为了方便开发者的测试，下面提供了 PaddleSeg 导出的部分模型，开发者可直接下载使用。
+
+| 模型                              | 参数文件大小    |输入Shape |  mIoU | mIoU (flip) | mIoU (ms+flip) |
+|:---------------------------------------------------------------- |:----- |:----- | :----- | :----- | :----- |
+| [PP-LiteSeg-T(STDC1)-cityscapes-without-argmax](https://bj.bcebos.com/fastdeploy/models/rk1/ppliteseg.tar.gz)| 31MB  | 1024x512 | 77.04% | 77.73% | 77.46% |
+>> **注意**: FastDeploy 模型量化的方法及一键自动化压缩工具可以参考[模型量化](../../../quantize/README.md)
 
 ## 详细部署文档
 
-在 RV1126 上只支持 C++ 的部署。
+目前，瑞芯微 RV1126 上只支持C++的部署。
 
 - [C++部署](cpp)
diff --git a/examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/README_CN.md b/examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/README_CN.md
index 15c1f273e..afd185ca0 100644
--- a/examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/README_CN.md
@@ -5,22 +5,22 @@
 
 ## 部署准备
 ### FastDeploy 交叉编译环境准备
-1. 软硬件环境满足要求，以及交叉编译环境的准备，请参考：[FastDeploy 交叉编译环境准备](../../../../../../docs/cn/build_and_install/rv1126.md#交叉编译环境搭建)  
+1. 软硬件环境满足要求，以及交叉编译环境的准备，请参考：[FastDeploy 交叉编译环境准备](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/rv1126.md#交叉编译环境搭建)  
 
 ### 模型准备
 1. 用户可以直接使用由 FastDeploy 提供的量化模型进行部署。
 2. 用户可以使用 FastDeploy 提供的一键模型自动化压缩工具,自行进行模型量化, 并使用产出的量化模型进行部署.(注意: 推理量化后的分类模型仍然需要FP32模型文件夹下的 deploy.yaml 文件, 自行量化的模型文件夹内不包含此 yaml 文件, 用户从FP32模型文件夹下复制此yaml文件到量化后的模型文件夹内即可.)
-3. 模型需要异构计算，异构计算文件可以参考：[异构计算](./../../../../../../docs/cn/faq/heterogeneous_computing_on_timvx_npu.md)，由于 FastDeploy 已经提供了模型，可以先测试我们提供的异构文件，验证精度是否符合要求。
+3. 模型需要异构计算，异构计算文件可以参考：[异构计算](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/heterogeneous_computing_on_timvx_npu.md)，由于 FastDeploy 已经提供了模型，可以先测试我们提供的异构文件，验证精度是否符合要求。
 
 更多量化相关相关信息可查阅[模型量化](../../quantize/README.md)
 
 ## 在 RV1126 上部署量化后的 PP-LiteSeg 分割模型
 请按照以下步骤完成在 RV1126 上部署 PP-LiteSeg 量化模型：
-1. 交叉编译编译 FastDeploy 库，具体请参考：[交叉编译 FastDeploy](../../../../../../docs/cn/build_and_install/rv1126.md#基于-paddlelite-的-fastdeploy-交叉编译库编译)
+1. 交叉编译编译 FastDeploy 库，具体请参考：[交叉编译 FastDeploy](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/a311d.md#基于-paddle-lite-的-fastdeploy-交叉编译库编译)
 
 2. 将编译后的库拷贝到当前目录，可使用如下命令：
 ```bash
-cp -r FastDeploy/build/fastdeploy-timvx/ FastDeploy/examples/vision/segmentation/paddleseg/rv1126/cpp
+cp -r FastDeploy/build/fastdeploy-timvx/ FastDeploy/examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp
 ```
 
 3. 在当前路径下载部署所需的模型和示例图片：
@@ -45,7 +45,7 @@ make install
 5. 基于 adb 工具部署 PP-LiteSeg 分割模型到 Rockchip RV1126，可使用如下命令：
 ```bash
 # 进入 install 目录
-cd FastDeploy/examples/vision/segmentation/paddleseg/rv1126/cpp/build/install/
+cd FastDeploy/examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/build/install/
 # 如下命令表示：bash run_with_adb.sh 需要运行的demo 模型路径 图片路径 设备的DEVICE_ID
 bash run_with_adb.sh infer_demo ppliteseg cityscapes_demo.png $DEVICE_ID
 ```
@@ -54,4 +54,4 @@ bash run_with_adb.sh infer_demo ppliteseg cityscapes_demo.png $DEVICE_ID
 
 <img width="640" src="https://user-images.githubusercontent.com/30516196/205544166-9b2719ff-ed82-4908-b90a-095de47392e1.png">
 
-需要特别注意的是，在 RV1126 上部署的模型需要是量化后的模型，模型的量化请参考：[模型量化](../../../../../../docs/cn/quantize.md)
+需要特别注意的是，在 RV1126 上部署的模型需要是量化后的模型，模型的量化请参考：[模型量化](../../../quantize/README.md)
diff --git a/examples/vision/segmentation/paddleseg/sophgo/README_CN.md b/examples/vision/segmentation/paddleseg/sophgo/README_CN.md
index 566691889..563507c25 100644
--- a/examples/vision/segmentation/paddleseg/sophgo/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/sophgo/README_CN.md
@@ -3,7 +3,15 @@
 
 ## 支持模型列表
 
-- PP-LiteSeg部署模型实现来自[PaddleSeg PP-LiteSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/pp_liteseg/README.md)
+- [PP-LiteSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/pp_liteseg/README.md)
+
+为了方便开发者的测试，下面提供了PaddleSeg导出的部分推理模型，开发者可直接下载使用。
+
+PaddleSeg模型导出，请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)  
+
+| 模型                              | 参数文件大小    |输入Shape |  mIoU | mIoU (flip) | mIoU (ms+flip) |
+|:---------------------------------------------------------------- |:----- |:----- | :----- | :----- | :----- |
+| [PP-LiteSeg-T(STDC1)-cityscapes-without-argmax](https://bj.bcebos.com/fastdeploy/models/rk1/ppliteseg.tar.gz)| 31MB  | 1024x512 | 77.04% | 77.73% | 77.46% |
 
 ## 准备PP-LiteSeg部署模型以及转换模型
 
@@ -93,5 +101,6 @@ model_deploy.py \
 ```
 最终获得可以在BM1684x上能够运行的bmodel模型pp_liteseg_1684x_f32.bmodel。如果需要进一步对模型进行加速，可以将ONNX模型转换为INT8 bmodel，具体步骤参见[TPU-MLIR文档](https://github.com/sophgo/tpu-mlir/blob/master/README.md)。
 
-## 其他链接
+## 快速链接
 - [Cpp部署](./cpp)
+- [Python部署](./python)
diff --git a/examples/vision/segmentation/paddleseg/sophgo/cpp/README_CN.md b/examples/vision/segmentation/paddleseg/sophgo/cpp/README_CN.md
index 6360a2907..fbb274b15 100644
--- a/examples/vision/segmentation/paddleseg/sophgo/cpp/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/sophgo/cpp/README_CN.md
@@ -8,7 +8,7 @@
 1. 软硬件环境满足要求
 2. 根据开发环境，从头编译FastDeploy仓库
 
-以上步骤请参考[SOPHGO部署库编译](../../../../../../docs/cn/build_and_install/sophgo.md)实现
+以上步骤请参考[SOPHGO部署库编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/sophgo.md)实现
 
 ## 生成基本目录文件
 
@@ -26,7 +26,7 @@
 
 ### 编译并拷贝SDK到thirdpartys文件夹
 
-请参考[SOPHGO部署库编译](../../../../../../docs/cn/build_and_install/sophgo.md)仓库编译SDK，编译完成后，将在build目录下生成fastdeploy-0.0.3目录.
+请参考[SOPHGO部署库编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/sophgo.md)仓库编译SDK，编译完成后，将在build目录下生成fastdeploy-0.0.3目录.
 
 ### 拷贝模型文件，以及配置文件至model文件夹
 将Paddle模型转换为SOPHGO bmodel模型，转换步骤参考[文档](../README.md)  
diff --git a/examples/vision/segmentation/paddleseg/sophgo/python/README_CN.md b/examples/vision/segmentation/paddleseg/sophgo/python/README_CN.md
index 9cafb1dc9..a6eb37f8f 100644
--- a/examples/vision/segmentation/paddleseg/sophgo/python/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/sophgo/python/README_CN.md
@@ -3,7 +3,7 @@
 
 在部署前，需确认以下步骤
 
-- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../../docs/cn/build_and_install/sophgo.md)
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/sophgo.md)
 
 本目录下提供`infer.py`快速完成 pp_liteseg 在SOPHGO TPU上部署的示例。执行如下脚本即可完成
 
diff --git a/examples/vision/segmentation/paddleseg/web/README_CN.md b/examples/vision/segmentation/paddleseg/web/README_CN.md
index 81664eee3..2847da0be 100644
--- a/examples/vision/segmentation/paddleseg/web/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/web/README_CN.md
@@ -8,7 +8,7 @@
 
 ## 前端部署PP-Humanseg v1模型
 
-PP-Humanseg v1模型web demo部署及使用参考[文档](../../../../application/js/web_demo/README.md)
+PP-Humanseg v1模型web demo部署及使用参考[文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/examples/application/js/README_CN.md)
 
 
 ## PP-Humanseg v1 js接口

From aef99f9b27bd0245470a21fffc713f575b32fac7 Mon Sep 17 00:00:00 2001
From: huangjianhui <852142024@qq.com>
Date: Wed, 8 Feb 2023 14:33:10 +0800
Subject: [PATCH 03/41] Update README_CN.md

---
 examples/vision/segmentation/paddleseg/cpu-gpu/README_CN.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/vision/segmentation/paddleseg/cpu-gpu/README_CN.md b/examples/vision/segmentation/paddleseg/cpu-gpu/README_CN.md
index 0109ac01a..e8fb39272 100644
--- a/examples/vision/segmentation/paddleseg/cpu-gpu/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/cpu-gpu/README_CN.md
@@ -1,6 +1,6 @@
-# 使用FastDeploy部署PaddleSeg模型
+# PaddleSeg模型高性能全场景部署方案-FastDeploy
 
-FastDeploy支持在NVIDIA GPU、X86 CPU、飞腾CPU、ARM CPU、Intel GPU(独立显卡/集成显卡)硬件上部署PaddleSeg模型
+PaddleSeg通过FastDeploy支持在NVIDIA GPU、X86 CPU、飞腾CPU、ARM CPU、Intel GPU(独立显卡/集成显卡)硬件上部署
 
 ## 模型版本说明
 

From fa551282a29cd3d45c05b3f97aac856bff79ce69 Mon Sep 17 00:00:00 2001
From: huangjianhui <852142024@qq.com>
Date: Wed, 8 Feb 2023 15:01:05 +0800
Subject: [PATCH 04/41] Update README_CN.md

---
 examples/vision/segmentation/paddleseg/serving/README_CN.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/examples/vision/segmentation/paddleseg/serving/README_CN.md b/examples/vision/segmentation/paddleseg/serving/README_CN.md
index 0c2e5194b..7ea966f65 100644
--- a/examples/vision/segmentation/paddleseg/serving/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/serving/README_CN.md
@@ -1,7 +1,9 @@
 [English](README.md) | 简体中文
 # 使用 FastDeploy 服务化部署 PaddleSeg 模型
 ## FastDeploy 服务化部署介绍
-在线推理作为企业或个人线上部署模型的最后一环，是工业界必不可少的环节，其中最重要的就是服务化推理框架。FastDeploy 目前提供两种服务化部署方式：simple_serving和fastdeploy_serving。simple_serving 基于Flask框架具有简单高效的特点，可以快速验证线上部署模型的可行性。fastdeploy_serving基于Triton Inference Server框架，是一套完备且性能卓越的服务化部署框架，可用于实际生产。
+在线推理作为企业或个人线上部署模型的最后一环，是工业界必不可少的环节，其中最重要的就是服务化推理框架。FastDeploy 目前提供两种服务化部署方式：
+- simple_serving和fastdeploy_serving。simple_serving 基于Flask框架具有简单高效的特点，可以快速验证线上部署模型的可行性。
+- fastdeploy_serving基于Triton Inference Server框架，是一套完备且性能卓越的服务化部署框架，可用于实际生产。
 
 ## 详细部署文档
 

From 844d80ecae0f36052cb7f2aa89a8fe0b4052c2a4 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Wed, 8 Feb 2023 16:33:12 +0000
Subject: [PATCH 05/41] Update docs

---
 .../face_alignment_result_CN.md               |  26 +-
 docs/api_docs/cpp/vision_results_cn.md        | 306 ++++++++++++++++++
 docs/api_docs/cpp/vision_results_en.md        | 276 ++++++++++++++++
 docs/cn/build_and_install/README.md           |   1 +
 docs/cn/build_and_install/huawei_ascend.md    |  14 +-
 .../faq/vision_result_related_problems.md}    |   0
 examples/vision/matting/ppmatting             |   1 +
 examples/vision/matting/ppmatting/README.md   |  42 ---
 .../vision/matting/ppmatting/README_CN.md     |  43 ---
 .../matting/ppmatting/cpp/CMakeLists.txt      |  14 -
 .../vision/matting/ppmatting/cpp/README.md    |  93 ------
 .../vision/matting/ppmatting/cpp/README_CN.md |  94 ------
 .../vision/matting/ppmatting/cpp/infer.cc     | 173 ----------
 .../vision/matting/ppmatting/python/README.md |  81 -----
 .../matting/ppmatting/python/README_CN.md     |  81 -----
 .../vision/matting/ppmatting/python/infer.py  |  70 ----
 .../vision/segmentation/paddleseg/README.md   |  65 ++--
 .../segmentation/paddleseg/README_CN.md       |  23 --
 .../paddleseg/amlogic/a311d/README.md         |  32 +-
 .../paddleseg/amlogic/a311d/README_CN.md      |  22 --
 .../paddleseg/amlogic/a311d/cpp/README.md     |  56 ++--
 .../paddleseg/amlogic/a311d/cpp/README_CN.md  |  59 ----
 .../segmentation/paddleseg/android/README.md  |  12 +-
 .../paddleseg/android/README_CN.md            |   4 +-
 .../ascend/{README_CN.md => README.md}        |   4 +-
 .../paddleseg/ascend/cpp/README.md            | 100 ++----
 .../paddleseg/ascend/cpp/README_CN.md         |  88 -----
 .../paddleseg/ascend/python/README.md         |  88 ++---
 .../paddleseg/ascend/python/README_CN.md      |  79 -----
 .../cpu-gpu/{README_CN.md => README.md}       |   4 +-
 .../paddleseg/cpu-gpu/cpp/README.md           | 105 ++----
 .../paddleseg/cpu-gpu/cpp/README_CN.md        | 106 ------
 .../paddleseg/cpu-gpu/cpp/infer.cc            |   9 +-
 .../paddleseg/cpu-gpu/python/README.md        |  95 ++----
 .../paddleseg/cpu-gpu/python/README_CN.md     |  88 -----
 .../paddleseg/cpu-gpu/python/infer.py         |   4 +
 .../kunlun/{README_CN.md => README.md}        |   4 +-
 .../paddleseg/kunlun/cpp/README.md            | 101 ++----
 .../paddleseg/kunlun/cpp/README_CN.md         |  88 -----
 .../paddleseg/kunlun/python/README.md         |  89 ++---
 .../paddleseg/kunlun/python/README_CN.md      |  79 -----
 .../segmentation/paddleseg/quantize/README.md |  51 ++-
 .../paddleseg/quantize/README_CN.md           |  26 --
 .../paddleseg/rockchip/rknpu2/README.md       |  71 ++--
 .../paddleseg/rockchip/rknpu2/README_CN.md    |  55 ----
 .../paddleseg/rockchip/rknpu2/cpp/README.md   |  59 ++--
 .../rockchip/rknpu2/cpp/README_CN.md          |  73 -----
 .../paddleseg/rockchip/rknpu2/pp_humanseg.md  |   2 +-
 .../rockchip/rknpu2/python/README.md          |  34 +-
 .../rockchip/rknpu2/python/README_CN.md       |  36 ---
 .../paddleseg/rockchip/rv1126/README.md       |  29 +-
 .../paddleseg/rockchip/rv1126/README_CN.md    |  20 --
 .../paddleseg/rockchip/rv1126/cpp/README.md   |  54 ++--
 .../rockchip/rv1126/cpp/README_CN.md          |  57 ----
 .../paddleseg/serving/README_CN.md            |   4 +-
 .../segmentation/paddleseg/sophgo/README.md   |  69 ++--
 .../paddleseg/sophgo/README_CN.md             | 106 ------
 .../paddleseg/sophgo/cpp/README.md            |  53 ++-
 .../paddleseg/sophgo/cpp/README_CN.md         |  57 ----
 .../paddleseg/sophgo/python/README.md         |  34 +-
 .../paddleseg/sophgo/python/README_CN.md      |  27 --
 61 files changed, 1117 insertions(+), 2519 deletions(-)
 create mode 100644 docs/api_docs/cpp/vision_results_cn.md
 create mode 100644 docs/api_docs/cpp/vision_results_en.md
 rename docs/{api/vision_results/faq_CN.md => cn/faq/vision_result_related_problems.md} (100%)
 create mode 120000 examples/vision/matting/ppmatting
 delete mode 100644 examples/vision/matting/ppmatting/README.md
 delete mode 100644 examples/vision/matting/ppmatting/README_CN.md
 delete mode 100644 examples/vision/matting/ppmatting/cpp/CMakeLists.txt
 delete mode 100755 examples/vision/matting/ppmatting/cpp/README.md
 delete mode 100644 examples/vision/matting/ppmatting/cpp/README_CN.md
 delete mode 100755 examples/vision/matting/ppmatting/cpp/infer.cc
 delete mode 100755 examples/vision/matting/ppmatting/python/README.md
 delete mode 100644 examples/vision/matting/ppmatting/python/README_CN.md
 delete mode 100755 examples/vision/matting/ppmatting/python/infer.py
 delete mode 100644 examples/vision/segmentation/paddleseg/README_CN.md
 mode change 100755 => 100644 examples/vision/segmentation/paddleseg/amlogic/a311d/README.md
 delete mode 100644 examples/vision/segmentation/paddleseg/amlogic/a311d/README_CN.md
 mode change 100755 => 100644 examples/vision/segmentation/paddleseg/amlogic/a311d/cpp/README.md
 delete mode 100644 examples/vision/segmentation/paddleseg/amlogic/a311d/cpp/README_CN.md
 rename examples/vision/segmentation/paddleseg/ascend/{README_CN.md => README.md} (97%)
 mode change 100755 => 100644 examples/vision/segmentation/paddleseg/ascend/cpp/README.md
 delete mode 100644 examples/vision/segmentation/paddleseg/ascend/cpp/README_CN.md
 mode change 100755 => 100644 examples/vision/segmentation/paddleseg/ascend/python/README.md
 delete mode 100644 examples/vision/segmentation/paddleseg/ascend/python/README_CN.md
 rename examples/vision/segmentation/paddleseg/cpu-gpu/{README_CN.md => README.md} (97%)
 mode change 100755 => 100644 examples/vision/segmentation/paddleseg/cpu-gpu/cpp/README.md
 delete mode 100644 examples/vision/segmentation/paddleseg/cpu-gpu/cpp/README_CN.md
 mode change 100755 => 100644 examples/vision/segmentation/paddleseg/cpu-gpu/python/README.md
 delete mode 100644 examples/vision/segmentation/paddleseg/cpu-gpu/python/README_CN.md
 rename examples/vision/segmentation/paddleseg/kunlun/{README_CN.md => README.md} (97%)
 mode change 100755 => 100644 examples/vision/segmentation/paddleseg/kunlun/cpp/README.md
 delete mode 100644 examples/vision/segmentation/paddleseg/kunlun/cpp/README_CN.md
 mode change 100755 => 100644 examples/vision/segmentation/paddleseg/kunlun/python/README.md
 delete mode 100644 examples/vision/segmentation/paddleseg/kunlun/python/README_CN.md
 mode change 100755 => 100644 examples/vision/segmentation/paddleseg/quantize/README.md
 delete mode 100644 examples/vision/segmentation/paddleseg/quantize/README_CN.md
 delete mode 100644 examples/vision/segmentation/paddleseg/rockchip/rknpu2/README_CN.md
 delete mode 100644 examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/README_CN.md
 delete mode 100644 examples/vision/segmentation/paddleseg/rockchip/rknpu2/python/README_CN.md
 mode change 100755 => 100644 examples/vision/segmentation/paddleseg/rockchip/rv1126/README.md
 delete mode 100644 examples/vision/segmentation/paddleseg/rockchip/rv1126/README_CN.md
 mode change 100755 => 100644 examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/README.md
 delete mode 100644 examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/README_CN.md
 delete mode 100644 examples/vision/segmentation/paddleseg/sophgo/README_CN.md
 delete mode 100644 examples/vision/segmentation/paddleseg/sophgo/cpp/README_CN.md
 delete mode 100644 examples/vision/segmentation/paddleseg/sophgo/python/README_CN.md

diff --git a/docs/api/vision_results/face_alignment_result_CN.md b/docs/api/vision_results/face_alignment_result_CN.md
index 1697fbfd5..ada0291fa 100644
--- a/docs/api/vision_results/face_alignment_result_CN.md
+++ b/docs/api/vision_results/face_alignment_result_CN.md
@@ -1,35 +1,27 @@
-简体中文 | [English](face_detection_result.md)
-# FaceDetectionResult 人脸检测结果
+[English](face_alignment_result.md) | 简体中文
 
-FaceDetectionResult 代码定义在`fastdeploy/vision/common/result.h`中，用于表明人脸检测出来的目标框、人脸landmarks，目标置信度和每张人脸的landmark数量。
+# FaceAlignmentResult 人脸对齐(人脸关键点检测)结果
+
+FaceAlignmentResult 代码定义在`fastdeploy/vision/common/result.h`中，用于表明人脸landmarks。
 
 ## C++ 定义
 
-`fastdeploy::vision::FaceDetectionResult`
+`fastdeploy::vision::FaceAlignmentResult`
 
 ```c++
-struct FaceDetectionResult {
-  std::vector<std::array<float, 4>> boxes;
+struct FaceAlignmentResult {
   std::vector<std::array<float, 2>> landmarks;
-  std::vector<float> scores;
-  int landmarks_per_face;
   void Clear();
   std::string Str();
 };
 ```
 
-- **boxes**: 成员变量，表示单张图片检测出来的所有目标框坐标，`boxes.size()`表示框的个数，每个框以4个float数值依次表示xmin, ymin, xmax, ymax， 即左上角和右下角坐标
-- **scores**: 成员变量，表示单张图片检测出来的所有目标置信度，其元素个数与`boxes.size()`一致
-- **landmarks**: 成员变量，表示单张图片检测出来的所有人脸的关键点，其元素个数与`boxes.size()`一致
-- **landmarks_per_face**: 成员变量，表示每个人脸框中的关键点的数量。
+- **landmarks**: 成员变量，表示单张人脸图片检测出来的所有关键点
 - **Clear()**: 成员函数，用于清除结构体中存储的结果
 - **Str()**: 成员函数，将结构体中的信息以字符串形式输出（用于Debug）
 
 ## Python 定义
 
-`fastdeploy.vision.FaceDetectionResult`
+`fastdeploy.vision.FaceAlignmentResult`
 
-- **boxes**(list of list(float)): 成员变量，表示单张图片检测出来的所有目标框坐标。boxes是一个list，其每个元素为一个长度为4的list， 表示为一个框，每个框以4个float数值依次表示xmin, ymin, xmax, ymax， 即左上角和右下角坐标
-- **scores**(list of float): 成员变量，表示单张图片检测出来的所有目标置信度
-- **landmarks**(list of list(float)): 成员变量，表示单张图片检测出来的所有人脸的关键点
-- **landmarks_per_face**(int): 成员变量，表示每个人脸框中的关键点的数量。
+- **landmarks**(list of list(float)): 成员变量，表示单张人脸图片检测出来的所有关键点
diff --git a/docs/api_docs/cpp/vision_results_cn.md b/docs/api_docs/cpp/vision_results_cn.md
new file mode 100644
index 000000000..73af7f9ed
--- /dev/null
+++ b/docs/api_docs/cpp/vision_results_cn.md
@@ -0,0 +1,306 @@
+# 视觉模型预测结果说明
+
+## ClassifyResult 图像分类结果
+
+ClassifyResult代码定义在`fastdeploy/vision/common/result.h`中，用于表明图像的分类结果和置信度。
+
+### C++ 定义
+
+`fastdeploy::vision::ClassifyResult`
+
+```c++
+struct ClassifyResult {
+  std::vector<int32_t> label_ids;
+  std::vector<float> scores;
+  void Clear();
+  std::string Str();
+};
+```
+
+- **label_ids**: 成员变量，表示单张图片的分类结果，其个数根据在使用分类模型时传入的topk决定，例如可以返回top 5的分类结果
+- **scores**: 成员变量，表示单张图片在相应分类结果上的置信度，其个数根据在使用分类模型时传入的topk决定，例如可以返回top 5的分类置信度
+- **Clear()**: 成员函数，用于清除结构体中存储的结果
+- **Str()**: 成员函数，将结构体中的信息以字符串形式输出（用于Debug）
+
+## SegmentationResult 图像分割结果
+
+SegmentationResult代码定义在`fastdeploy/vision/common/result.h`中，用于表明图像中每个像素预测出来的分割类别和分割类别的概率值。
+
+### C++ 定义
+
+`fastdeploy::vision::SegmentationResult`
+
+```c++
+struct SegmentationResult {
+  std::vector<uint8_t> label_map;
+  std::vector<float> score_map;
+  std::vector<int64_t> shape;
+  bool contain_score_map = false;
+  void Clear();
+  void Free();
+  std::string Str();
+};
+```
+
+- **label_map**: 成员变量，表示单张图片每个像素点的分割类别，`label_map.size()`表示图片像素点的个数
+- **score_map**: 成员变量，与label_map一一对应的所预测的分割类别概率值(当导出模型时指定`--output_op argmax`)或者经过softmax归一化化后的概率值(当导出模型时指定`--output_op softmax`或者导出模型时指定`--output_op none`同时模型初始化的时候设置模型[类成员属性](../../../examples/vision/segmentation/paddleseg/cpp/)`apply_softmax=True`)
+- **shape**: 成员变量，表示输出图片的shape，为H\*W
+- **Clear()**: 成员函数，用于清除结构体中存储的结果
+- **Free()**: 成员函数，用于清除结构体中存储的结果并释放内存
+- **Str()**: 成员函数，将结构体中的信息以字符串形式输出（用于Debug）
+
+## DetectionResult 目标检测结果
+
+DetectionResult代码定义在`fastdeploy/vision/common/result.h`中，用于表明图像检测出来的目标框、目标类别和目标置信度。
+
+### C++ 定义
+
+```c++
+fastdeploy::vision::DetectionResult
+```  
+
+```c++
+struct DetectionResult {
+  std::vector<std::array<float, 4>> boxes;
+  std::vector<float> scores;
+  std::vector<int32_t> label_ids;
+  std::vector<Mask> masks;
+  bool contain_masks = false;
+  void Clear();
+  std::string Str();
+};
+```
+
+- **boxes**: 成员变量，表示单张图片检测出来的所有目标框坐标，`boxes.size()`表示框的个数，每个框以4个float数值依次表示xmin, ymin, xmax, ymax， 即左上角和右下角坐标
+- **scores**: 成员变量，表示单张图片检测出来的所有目标置信度，其元素个数与`boxes.size()`一致
+- **label_ids**: 成员变量，表示单张图片检测出来的所有目标类别，其元素个数与`boxes.size()`一致
+- **masks**: 成员变量，表示单张图片检测出来的所有实例mask，其元素个数及shape大小与`boxes`一致
+- **contain_masks**: 成员变量，表示检测结果中是否包含实例mask，实例分割模型的结果此项一般为true.
+- **Clear()**: 成员函数，用于清除结构体中存储的结果
+- **Str()**: 成员函数，将结构体中的信息以字符串形式输出（用于Debug）
+
+```c++
+fastdeploy::vision::Mask
+```  
+```c++
+struct Mask {
+  std::vector<int32_t> data;
+  std::vector<int64_t> shape;  // (H,W) ...
+
+  void Clear();
+  std::string Str();
+};
+```  
+- **data**: 成员变量，表示检测到的一个mask
+- **shape**: 成员变量，表示mask的shape，如 (h,w)
+- **Clear()**: 成员函数，用于清除结构体中存储的结果
+- **Str()**: 成员函数，将结构体中的信息以字符串形式输出（用于Debug）
+
+## FaceAlignmentResult 人脸对齐(人脸关键点检测)结果
+
+FaceAlignmentResult 代码定义在`fastdeploy/vision/common/result.h`中，用于表明人脸landmarks。
+
+### C++ 定义
+
+`fastdeploy::vision::FaceAlignmentResult`
+
+```c++
+struct FaceAlignmentResult {
+  std::vector<std::array<float, 2>> landmarks;
+  void Clear();
+  std::string Str();
+};
+```
+
+- **landmarks**: 成员变量，表示单张人脸图片检测出来的所有关键点
+- **Clear()**: 成员函数，用于清除结构体中存储的结果
+- **Str()**: 成员函数，将结构体中的信息以字符串形式输出（用于Debug）
+
+## KeyPointDetectionResult 目标检测结果
+
+KeyPointDetectionResult 代码定义在`fastdeploy/vision/common/result.h`中，用于表明图像中目标行为的各个关键点坐标和置信度。
+
+### C++ 定义
+
+`fastdeploy::vision::KeyPointDetectionResult`
+
+```c++
+struct KeyPointDetectionResult {
+  std::vector<std::array<float, 2>> keypoints;
+  std::vector<float> scores;
+  int num_joints = -1;
+  void Clear();
+  std::string Str();
+};
+```
+
+- **keypoints**: 成员变量，表示识别到的目标行为的关键点坐标。
+  `keypoints.size()= N * J`
+    - `N`：图片中的目标数量
+    - `J`：num_joints（一个目标的关键点数量）
+- **scores**: 成员变量，表示识别到的目标行为的关键点坐标的置信度。
+  `scores.size()= N * J`
+    - `N`：图片中的目标数量
+    - `J`:num_joints（一个目标的关键点数量）
+- **num_joints**: 成员变量，一个目标的关键点数量
+- **Clear()**: 成员函数，用于清除结构体中存储的结果
+- **Str()**: 成员函数，将结构体中的信息以字符串形式输出（用于Debug）
+
+
+## FaceRecognitionResult 人脸识别结果
+
+FaceRecognitionResult 代码定义在`fastdeploy/vision/common/result.h`中，用于表明人脸识别模型对图像特征的embedding。
+### C++ 定义
+
+`fastdeploy::vision::FaceRecognitionResult`
+
+```c++
+struct FaceRecognitionResult {
+  std::vector<float> embedding;
+  void Clear();
+  std::string Str();
+};
+```
+
+- **embedding**: 成员变量，表示人脸识别模型最终的提取的特征embedding，可以用来计算人脸之间的特征相似度。
+- **Clear()**: 成员函数，用于清除结构体中存储的结果
+- **Str()**: 成员函数，将结构体中的信息以字符串形式输出（用于Debug）
+
+
+
+## MattingResult 抠图结果
+
+MattingResult 代码定义在`fastdeploy/vision/common/result.h`中，用于表明模型预测的alpha透明度的值，预测的前景等。
+
+### C++ 定义
+
+`fastdeploy::vision::MattingResult`
+
+```c++
+struct MattingResult {
+  std::vector<float> alpha;
+  std::vector<float> foreground;
+  std::vector<int64_t> shape;
+  bool contain_foreground = false;
+  void Clear();
+  std::string Str();
+};
+```
+
+- **alpha**: 是一维向量，为预测的alpha透明度的值，值域为[0.,1.]，长度为hxw，h,w为输入图像的高和宽
+- **foreground**: 是一维向量，为预测的前景，值域为[0.,255.]，长度为hxwxc，h,w为输入图像的高和宽，c一般为3，foreground不是一定有的，只有模型本身预测了前景，这个属性才会有效
+- **contain_foreground**: 表示预测的结果是否包含前景
+- **shape**: 表示输出结果的shape，当contain_foreground为false，shape只包含(h,w)，当contain_foreground为true，shape包含(h,w,c), c一般为3
+- **Clear()**: 成员函数，用于清除结构体中存储的结果
+- **Str()**: 成员函数，将结构体中的信息以字符串形式输出（用于Debug）
+
+## OCRResult OCR预测结果
+
+OCRResult代码定义在`fastdeploy/vision/common/result.h`中，用于表明图像检测和识别出来的文本框，文本框方向分类，以及文本框内的文本内容
+
+### C++ 定义
+
+```c++
+fastdeploy::vision::OCRResult
+```  
+
+```c++
+struct OCRResult {
+  std::vector<std::array<int, 8>> boxes;
+  std::vector<std::string> text;
+  std::vector<float> rec_scores;
+  std::vector<float> cls_scores;
+  std::vector<int32_t> cls_labels;
+  ResultType type = ResultType::OCR;
+  void Clear();
+  std::string Str();
+};
+```
+
+- **boxes**: 成员变量，表示单张图片检测出来的所有目标框坐标，`boxes.size()`表示单张图内检测出的框的个数，每个框以8个int数值依次表示框的4个坐标点，顺序为左下，右下，右上，左上
+- **text**: 成员变量，表示多个文本框内被识别出来的文本内容，其元素个数与`boxes.size()`一致
+- **rec_scores**: 成员变量，表示文本框内识别出来的文本的置信度，其元素个数与`boxes.size()`一致
+- **cls_scores**: 成员变量，表示文本框的分类结果的置信度，其元素个数与`boxes.size()`一致
+- **cls_labels**: 成员变量，表示文本框的方向分类类别，其元素个数与`boxes.size()`一致
+- **Clear()**: 成员函数，用于清除结构体中存储的结果
+- **Str()**: 成员函数，将结构体中的信息以字符串形式输出（用于Debug）
+
+
+## FaceDetectionResult 人脸检测结果
+
+FaceDetectionResult 代码定义在`fastdeploy/vision/common/result.h`中，用于表明人脸检测出来的目标框、人脸landmarks，目标置信度和每张人脸的landmark数量。
+
+### C++ 定义
+
+`fastdeploy::vision::FaceDetectionResult`
+
+```c++
+struct FaceDetectionResult {
+  std::vector<std::array<float, 4>> boxes;
+  std::vector<std::array<float, 2>> landmarks;
+  std::vector<float> scores;
+  int landmarks_per_face;
+  void Clear();
+  std::string Str();
+};
+```
+
+- **boxes**: 成员变量，表示单张图片检测出来的所有目标框坐标，`boxes.size()`表示框的个数，每个框以4个float数值依次表示xmin, ymin, xmax, ymax， 即左上角和右下角坐标
+- **scores**: 成员变量，表示单张图片检测出来的所有目标置信度，其元素个数与`boxes.size()`一致
+- **landmarks**: 成员变量，表示单张图片检测出来的所有人脸的关键点，其元素个数与`boxes.size()`一致
+- **landmarks_per_face**: 成员变量，表示每个人脸框中的关键点的数量。
+- **Clear()**: 成员函数，用于清除结构体中存储的结果
+- **Str()**: 成员函数，将结构体中的信息以字符串形式输出（用于Debug）
+
+## HeadPoseResult 头部姿态结果
+
+HeadPoseResult 代码定义在`fastdeploy/vision/common/result.h`中，用于表明头部姿态结果。
+
+### C++ 定义
+
+`fastdeploy::vision::HeadPoseResult`
+
+```c++
+struct HeadPoseResult {
+  std::vector<float> euler_angles;
+  void Clear();
+  std::string Str();
+};
+```
+
+- **euler_angles**: 成员变量，表示单张人脸图片预测的欧拉角，存放的顺序是(yaw, pitch, roll)， yaw 代表水平转角，pitch 代表垂直角，roll 代表翻滚角，值域都为 [-90,+90]度
+- **Clear()**: 成员函数，用于清除结构体中存储的结果
+- **Str()**: 成员函数，将结构体中的信息以字符串形式输出（用于Debug）
+
+
+API:`fastdeploy.vision.HeadPoseResult`, 该结果返回:
+- **euler_angles**(list of float): 成员变量，表示单张人脸图片预测的欧拉角，存放的顺序是(yaw, pitch, roll)， yaw 代表水平转角，pitch 代表垂直角，roll 代表翻滚角，值域都为 [-90, +90]度
+
+## MOTResult 多目标跟踪结果
+
+MOTResult代码定义在`fastdeploy/vision/common/result.h`中，用于表明多目标跟踪中的检测出来的目标框、目标跟踪id、目标类别和目标置信度。
+
+### C++ 定义
+
+```c++
+fastdeploy::vision::MOTResult
+```  
+
+```c++
+struct MOTResult{
+  // left top right bottom
+  std::vector<std::array<int, 4>> boxes;
+  std::vector<int> ids;
+  std::vector<float> scores;
+  std::vector<int> class_ids;
+  void Clear();
+  std::string Str();
+};
+```
+
+- **boxes**: 成员变量，表示单帧画面中检测出来的所有目标框坐标，`boxes.size()`表示框的个数，每个框以4个float数值依次表示xmin, ymin, xmax, ymax， 即左上角和右下角坐标
+- **ids**: 成员变量，表示单帧画面中所有目标的id，其元素个数与`boxes.size()`一致
+- **scores**: 成员变量，表示单帧画面检测出来的所有目标置信度，其元素个数与`boxes.size()`一致
+- **class_ids**: 成员变量，表示单帧画面出来的所有目标类别，其元素个数与`boxes.size()`一致
+- **Clear()**: 成员函数，用于清除结构体中存储的结果
+- **Str()**: 成员函数，将结构体中的信息以字符串形式输出（用于Debug）
diff --git a/docs/api_docs/cpp/vision_results_en.md b/docs/api_docs/cpp/vision_results_en.md
new file mode 100644
index 000000000..122e90293
--- /dev/null
+++ b/docs/api_docs/cpp/vision_results_en.md
@@ -0,0 +1,276 @@
+# Description of Vision Results
+
+本文档的中文版本参考[视觉模型预测结果说明](./vision_results_cn.md)
+
+## Image Classification Result
+
+The ClassifyResult code is defined in `fastdeploy/vision/common/result.h`, and is used to indicate the classification result and confidence level of the image.
+
+### C++ Definition
+
+`fastdeploy::vision::ClassifyResult`
+
+```c++
+struct ClassifyResult {
+  std::vector<int32_t> label_ids;
+  std::vector<float> scores;
+  void Clear();
+  std::string Str();
+};
+```
+
+- **label_ids**: Member variable which indicates the classification results of a single image. Its number is determined by the topk passed in when using the classification model, e.g. it can return the top 5 classification results.
+- **scores**: Member variable which indicates the confidence level of a single image on the corresponding classification result. Its number is determined by the topk passed in when using the classification model, e.g. it can return the top 5 classification confidence level.
+- **Clear()**: Member function used to clear the results stored in the structure.
+- **Str()**: Member function used to output the information in the structure as string (for Debug).
+
+
+## Segmentation Result
+
+The SegmentationResult code is defined in `fastdeploy/vision/common/result.h`, indicating the segmentation category and the segmentation category probability predicted in each pixel in the image.
+
+### C++ Definition
+
+``fastdeploy::vision::SegmentationResult``
+
+```c++
+struct SegmentationResult {
+  std::vector<uint8_t> label_map;
+  std::vector<float> score_map;
+  std::vector<int64_t> shape;
+  bool contain_score_map = false;
+  void Clear();
+  std::string Str();
+};
+```
+
+- **label_map**: Member variable which indicates the segmentation category of each pixel in a single image. `label_map.size()` indicates the number of pixel points of a image.
+- **score_map**: Member variable which indicates the predicted segmentation category probability value (specified as `--output_op argmax` when export) corresponding to label_map, or the probability value normalized by softmax (specified as `--output_op softmax` when export, or as `--output_op when exporting the model). none`  when export while setting the [class member attribute](../../../examples/vision/segmentation/paddleseg/cpp/) as `apply_softmax=True` during model initialization).
+- **shape**: Member variable which indicates the shape of the output image as H\*W.
+- **Clear()**: Member function used to clear the results stored in the structure.
+- **Str()**: Member function used to output the information in the structure as string (for Debug).
+
+## Target Detection Result
+
+The DetectionResult code is defined in `fastdeploy/vision/common/result.h`, and is used to indicate the target frame, target class and target confidence level detected in the image.
+
+### C++ Definition
+
+```c++
+fastdeploy::vision::DetectionResult
+```  
+
+```c++
+struct DetectionResult {
+  std::vector<std::array<float, 4>> boxes;
+  std::vector<float> scores;
+  std::vector<int32_t> label_ids;
+  std::vector<Mask> masks;
+  bool contain_masks = false;
+  void Clear();
+  std::string Str();
+};
+```
+
+- **boxes**: Member variable which indicates the coordinates of all detected target boxes in a single image. `boxes.size()` indicates the number of boxes, each box is represented by 4 float values in order of xmin, ymin, xmax, ymax, i.e. the coordinates of the top left and bottom right corner.
+- **scores**: Member variable which indicates the confidence level of all targets detected in a single image, where the number of elements is the same as `boxes.size()`.
+- **label_ids**: Member variable which indicates all target categories detected in a single image, where the number of elements is the same as `boxes.size()`.
+- **masks**: Member variable which indicates all detected instance masks of a single image, where the number of elements and the shape size are the same as `boxes`.
+- **contain_masks**: Member variable which indicates whether the detected result contains instance masks, which is generally true for the instance segmentation model.
+- **Clear()**: Member function used to clear the results stored in the structure.
+- **Str()**: Member function used to output the information in the structure as string (for Debug).
+
+```c++
+fastdeploy::vision::Mask
+```  
+```c++
+struct Mask {
+  std::vector<int32_t> data;
+  std::vector<int64_t> shape; // (H,W) ...
+
+  void Clear();
+  std::string Str();
+};
+```  
+- **data**: Member variable which indicates a detected mask.
+- **shape**: Member variable which indicates the shape of the mask, e.g. (h,w).
+- **Clear()**: Member function used to clear the results stored in the structure.
+- **Str()**: Member function used to output the information in the structure as string (for Debug).
+
+
+## Face Detection Result
+
+The FaceDetectionResult code is defined in `fastdeploy/vision/common/result.h`, and is used to indicate the target frames, face landmarks, target confidence and the number of landmark per face.
+
+### C++ Definition
+
+``fastdeploy::vision::FaceDetectionResult``
+
+```c++
+struct FaceDetectionResult {
+  std::vector<std::array<float, 4>> boxes;
+  std::vector<std::array<float, 2>> landmarks;
+  std::vector<float> scores;
+  int landmarks_per_face;
+  void Clear();
+  std::string Str();
+};
+```
+
+- **boxes**: Member variable which indicates the coordinates of all detected target boxes in a single image. `boxes.size()` indicates the number of boxes, each box is represented by 4 float values in order of xmin, ymin, xmax, ymax, i.e. the coordinates of the top left and bottom right corner.
+- **scores**: Member variable which indicates the confidence level of all targets detected in a single image, where the number of elements is the same as `boxes.size()`.
+- **landmarks**: Member variable which indicates the keypoints of all faces detected in a single image, where the number of elements is the same as `boxes.size()`.
+- **landmarks_per_face**: Member variable which indicates the number of keypoints in each face box.
+- **Clear()**: Member function used to clear the results stored in the structure.
+- **Str()**: Member function used to output the information in the structure as string (for Debug).
+
+
+## Keypoint Detection Result
+
+The KeyPointDetectionResult code is defined in `fastdeploy/vision/common/result.h`, and is used to indicate the coordinates and confidence level of each keypoint of the target's behavior in the image.
+
+### C++ Definition
+
+``fastdeploy::vision::KeyPointDetectionResult``
+
+```c++
+struct KeyPointDetectionResult {
+  std::vector<std::array<float, 2>> keypoints;
+  std::vector<float> scores;
+  int num_joints = -1;
+  void Clear();
+  std::string Str();
+};
+```
+
+- **keypoints**: Member variable which indicates the coordinates of the identified target behavior keypoint.
+  ` keypoints.size() = N * J`:
+    - `N`: the number of targets in the image
+    - `J`: num_joints (the number of keypoints of a target)
+- **scores**: Member variable which indicates the confidence level of the keypoint coordinates of the identified target behavior.
+  `scores.size() = N * J`:
+    - `N`: the number of targets in the picture
+    - `J`:num_joints (the number of keypoints of a target)
+- **num_joints**: Member variable which indicates the number of keypoints of a target.
+- **Clear()**: Member function used to clear the results stored in the structure.
+- **Str()**: Member function used to output the information in the structure as string (for Debug).
+
+
+## Face Recognition Result
+
+The FaceRecognitionResult code is defined in `fastdeploy/vision/common/result.h`, and is used to indicate the image features embedding in the face recognition model.
+### C++ Definition
+
+`fastdeploy::vision::FaceRecognitionResult`
+
+```c++
+struct FaceRecognitionResult {
+  std::vector<float> embedding;
+  void Clear();
+  std::string Str();
+};
+```
+
+- **embedding**: Member variable which indicates the final extracted feature embedding of the face recognition model, and can be used to calculate the facial feature similarity.
+- **Clear()**: Member function used to clear the results stored in the structure.
+- **Str()**: Member function used to output the information in the structure as string (for Debug).
+
+## Matting Result
+
+The MattingResult code is defined in `fastdeploy/vision/common/result.h`, and is used to indicate the predicted value of alpha transparency predicted and the predicted foreground, etc.
+
+### C++ Definition
+
+``fastdeploy::vision::MattingResult`''
+
+```c++
+struct MattingResult {
+  std::vector<float> alpha;
+  std::vector<float> foreground;
+  std::vector<int64_t> shape;
+  bool contain_foreground = false;
+  void Clear();
+  std::string Str();
+};
+```
+
+- **alpha**: It is a one-dimensional vector, indicating the predicted value of alpha transparency. The value range is [0.,1.], and the length is hxw, in which h,w represent the height and the width of the input image seperately.
+- **foreground**: It is a one-dimensional vector, indicating the predicted foreground. The value range is [0.,255.], and the length is hxwxc, in which h,w represent the height and the width of the input image, and c is generally 3. This vector is valid only when the model itself predicts the foreground.
+- **contain_foreground**: Used to indicate whether the result contains foreground.
+- **shape**: Used to indicate the shape of the output. When contain_foreground is false, the shape only contains (h,w), while when contain_foreground is true, the shape contains (h,w,c), in which c is generally 3.
+- **Clear()**: Member function used to clear the results stored in the structure.
+- **Str()**: Member function used to output the information in the structure as string (for Debug).
+
+
+## OCR prediction result
+
+The OCRResult code is defined in `fastdeploy/vision/common/result.h`, and is used to indicate the text box detected in the image, text box orientation classification, and the text content.
+
+### C++ Definition
+
+```c++
+fastdeploy::vision::OCRResult
+```  
+
+```c++
+struct OCRResult {
+  std::vector<std::array<int, 8>> boxes;
+  std::vector<std::string> text;
+  std::vector<float> rec_scores;
+  std::vector<float> cls_scores;
+  std::vector<int32_t> cls_labels;
+  ResultType type = ResultType::OCR;
+  void Clear();
+  std::string Str();
+};
+```
+
+- **boxes**: Member variable which indicates the coordinates of all detected target boxes in a single image. `boxes.size()` indicates the number of detected boxes. Each box is represented by 8 int values to indicate the 4 coordinates of the box, in the order of lower left, lower right, upper right, upper left.
+- **text**: Member variable which indicates the content of the recognized text in multiple text boxes, where the element number is the same as `boxes.size()`.
+- **rec_scores**: Member variable which indicates the confidence level of the recognized text, where the element number is the same as `boxes.size()`.
+- **cls_scores**: Member variable which indicates the confidence level of the classification result of the text box, where the element number is the same as `boxes.size()`.
+- **cls_labels**: Member variable which indicates the directional category of the textbox, where the element number is the same as `boxes.size()`.
+- **Clear()**: Member function used to clear the results stored in the structure.
+- **Str()**: Member function used to output the information in the structure as string (for Debug).
+
+
+## Face Alignment Result
+
+The FaceAlignmentResult code is defined in `fastdeploy/vision/common/result.h`, and is used to indicate face landmarks.
+
+### C++ Definition
+
+`fastdeploy::vision::FaceAlignmentResult`
+
+```c++
+struct FaceAlignmentResult {
+  std::vector<std::array<float, 2>> landmarks;
+  void Clear();
+  std::string Str();
+};
+```
+
+- **landmarks**: Member variable which indicates all the key points detected in a single face image.
+- **Clear()**: Member function used to clear the results stored in the structure.
+- **Str()**: Member function used to output the information in the structure as string (for Debug).
+
+
+## Head Pose Result
+
+The HeadPoseResult code is defined in `fastdeploy/vision/common/result.h`, and is used to indicate the head pose result.
+
+### C++ Definition
+
+``fastdeploy::vision::HeadPoseResult`''
+
+```c++
+struct HeadPoseResult {
+  std::vector<float> euler_angles;
+  void Clear();
+  std::string Str();
+};
+```
+
+- **euler_angles**: Member variable which indicates the Euler angles predicted for a single face image, stored in the order (yaw, pitch, roll), with yaw representing the horizontal turn angle, pitch representing the vertical angle, and roll representing the roll angle, all with a value range of [-90,+90].
+- **Clear()**: Member function used to clear the results stored in the structure.
+- **Str()**: Member function used to output the information in the structure as string (for Debug).
diff --git a/docs/cn/build_and_install/README.md b/docs/cn/build_and_install/README.md
index 92f575d8d..c6d5a23bc 100755
--- a/docs/cn/build_and_install/README.md
+++ b/docs/cn/build_and_install/README.md
@@ -4,6 +4,7 @@
 
 ## FastDeploy预编译库安装
 - [FastDeploy预编译库下载安装](download_prebuilt_libraries.md)
+>> **注意**：FastDeploy目前只提供部分环境的预编译库，其他环境需要参考下方文档自行编译
 
 ## 自行编译安装
 - [NVIDIA GPU部署环境](gpu.md)
diff --git a/docs/cn/build_and_install/huawei_ascend.md b/docs/cn/build_and_install/huawei_ascend.md
index 3741027e2..980a4ba47 100644
--- a/docs/cn/build_and_install/huawei_ascend.md
+++ b/docs/cn/build_and_install/huawei_ascend.md
@@ -1,5 +1,17 @@
 # 华为昇腾NPU 部署环境编译准备
 
+## 导航目录
+
+* [简介以及编译选项](#简介以及编译选项)
+* [华为昇腾环境准备](#一华为昇腾环境准备)
+* [编译环境搭建](#二编译环境搭建)
+* [基于 Paddle Lite 的 C++ FastDeploy 库编译](#三基于-paddle-lite-的-c-fastdeploy-库编译)
+* [基于 Paddle Lite 的 Python FastDeploy 库编译](#四基于-paddle-lite-的-python-fastdeploy-库编译)
+* [昇腾部署时开启FlyCV](#五昇腾部署时开启flycv)
+* [昇腾部署Demo参考](#六昇腾部署demo参考)
+
+## 简介以及编译选项
+
 FastDeploy基于 Paddle-Lite 后端, 支持在华为昇腾NPU上进行部署推理。
 更多详细的信息请参考：[Paddle Lite部署示例](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/docs/demo_guides/huawei_ascend_npu.md)。
 
@@ -114,7 +126,7 @@ python setup.py bdist_wheel
 ## 五.昇腾部署时开启FlyCV
 [FlyCV](https://github.com/PaddlePaddle/FlyCV) 是一款高性能计算机图像处理库, 针对ARM架构做了很多优化, 相比其他图像处理库性能更为出色.
 FastDeploy现在已经集成FlyCV, 用户可以在支持的硬件平台上使用FlyCV, 实现模型端到端推理性能的加速.
-模型端到端推理中, 预处理和后处理阶段为CPU计算, 当用户使用ARM CPU + 昇腾的硬件平台时, 我们推荐用户使用FlyCV, 可以实现端到端的推理性能加速, 详见[FLyCV使用文档](./boost_cv_by_flycv.md).
+模型端到端推理中, 预处理和后处理阶段为CPU计算, 当用户使用ARM CPU + 昇腾的硬件平台时, 我们推荐用户使用FlyCV, 可以实现端到端的推理性能加速, 详见[FLyCV使用文档](../faq/boost_cv_by_flycv.md).
 
 
 ## 六.昇腾部署Demo参考
diff --git a/docs/api/vision_results/faq_CN.md b/docs/cn/faq/vision_result_related_problems.md
similarity index 100%
rename from docs/api/vision_results/faq_CN.md
rename to docs/cn/faq/vision_result_related_problems.md
diff --git a/examples/vision/matting/ppmatting b/examples/vision/matting/ppmatting
new file mode 120000
index 000000000..c89893416
--- /dev/null
+++ b/examples/vision/matting/ppmatting
@@ -0,0 +1 @@
+/huangjianhui/doc/FastDeploy/examples/vision/segmentation/ppmatting/
\ No newline at end of file
diff --git a/examples/vision/matting/ppmatting/README.md b/examples/vision/matting/ppmatting/README.md
deleted file mode 100644
index a2cbdc346..000000000
--- a/examples/vision/matting/ppmatting/README.md
+++ /dev/null
@@ -1,42 +0,0 @@
-English | [简体中文](README_CN.md)
-# PP-Matting Model Deployment
-
-## Model Description
-
-- [PP-Matting Release/2.6](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
-
-## List of Supported Models
-
-Now FastDeploy supports the deployment of the following models
-
-- [PP-Matting models](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
-- [PP-HumanMatting models](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
-- [ModNet models](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
-
-
-## Export Deployment Model
-
-Before deployment, PP-Matting needs to be exported into the deployment model. Refer to [Export Model](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting) for more information. (Tips: You need to set the `--input_shape` parameter of the export script when exporting PP-Matting and PP-HumanMatting models)
-
-
-## Download Pre-trained Models
-
-For developers' testing, models exported by PP-Matting are provided below. Developers can download and use them directly.
-
-The accuracy metric is sourced from the model description in PP-Matting. (Accuracy data are not provided) Refer to the introduction in PP-Matting for more details.
-
-| Model                                                               | Parameter Size    | Accuracy    | Note |
-|:---------------------------------------------------------------- |:----- |:----- | :------ |
-| [PP-Matting-512](https://bj.bcebos.com/paddlehub/fastdeploy/PP-Matting-512.tgz) | 106MB | - |
-| [PP-Matting-1024](https://bj.bcebos.com/paddlehub/fastdeploy/PP-Matting-1024.tgz) | 106MB | - |
-| [PP-HumanMatting](https://bj.bcebos.com/paddlehub/fastdeploy/PPHumanMatting.tgz) | 247MB | - |
-| [Modnet-ResNet50_vd](https://bj.bcebos.com/paddlehub/fastdeploy/PPModnet_ResNet50_vd.tgz) | 355MB | - |
-| [Modnet-MobileNetV2](https://bj.bcebos.com/paddlehub/fastdeploy/PPModnet_MobileNetV2.tgz) | 28MB | - |
-| [Modnet-HRNet_w18](https://bj.bcebos.com/paddlehub/fastdeploy/PPModnet_HRNet_w18.tgz) | 51MB | - |
-
-
-
-## Detailed Deployment Tutorials
-
-- [Python Deployment](python)
-- [C++ Deployment](cpp)
diff --git a/examples/vision/matting/ppmatting/README_CN.md b/examples/vision/matting/ppmatting/README_CN.md
deleted file mode 100644
index a1c9801aa..000000000
--- a/examples/vision/matting/ppmatting/README_CN.md
+++ /dev/null
@@ -1,43 +0,0 @@
-[English](README.md) | 简体中文
-# PP-Matting模型部署
-
-## 模型版本说明
-
-- [PP-Matting Release/2.6](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
-
-## 支持模型列表
-
-目前FastDeploy支持如下模型的部署
-
-- [PP-Matting系列模型](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
-- [PP-HumanMatting系列模型](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
-- [ModNet系列模型](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
-
-
-## 导出部署模型
-
-在部署前，需要先将PP-Matting导出成部署模型，导出步骤参考文档[导出模型](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)(Tips:导出PP-Matting系列模型和PP-HumanMatting系列模型需要设置导出脚本的`--input_shape`参数)
-
-
-## 下载预训练模型
-
-为了方便开发者的测试，下面提供了PP-Matting导出的各系列模型，开发者可直接下载使用。
-
-其中精度指标来源于PP-Matting中对各模型的介绍(未提供精度数据)，详情各参考PP-Matting中的说明。
-
-
-| 模型                                                               | 参数大小    | 精度    | 备注 |
-|:---------------------------------------------------------------- |:----- |:----- | :------ |
-| [PP-Matting-512](https://bj.bcebos.com/paddlehub/fastdeploy/PP-Matting-512.tgz) | 106MB | - |
-| [PP-Matting-1024](https://bj.bcebos.com/paddlehub/fastdeploy/PP-Matting-1024.tgz) | 106MB | - |
-| [PP-HumanMatting](https://bj.bcebos.com/paddlehub/fastdeploy/PPHumanMatting.tgz) | 247MB | - |
-| [Modnet-ResNet50_vd](https://bj.bcebos.com/paddlehub/fastdeploy/PPModnet_ResNet50_vd.tgz) | 355MB | - |
-| [Modnet-MobileNetV2](https://bj.bcebos.com/paddlehub/fastdeploy/PPModnet_MobileNetV2.tgz) | 28MB | - |
-| [Modnet-HRNet_w18](https://bj.bcebos.com/paddlehub/fastdeploy/PPModnet_HRNet_w18.tgz) | 51MB | - |
-
-
-
-## 详细部署文档
-
-- [Python部署](python)
-- [C++部署](cpp)
diff --git a/examples/vision/matting/ppmatting/cpp/CMakeLists.txt b/examples/vision/matting/ppmatting/cpp/CMakeLists.txt
deleted file mode 100644
index 93540a7e8..000000000
--- a/examples/vision/matting/ppmatting/cpp/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-PROJECT(infer_demo C CXX)
-CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
-
-# 指定下载解压后的fastdeploy库路径
-option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
-
-include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
-
-# 添加FastDeploy依赖头文件
-include_directories(${FASTDEPLOY_INCS})
-
-add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
-# 添加FastDeploy库依赖
-target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
diff --git a/examples/vision/matting/ppmatting/cpp/README.md b/examples/vision/matting/ppmatting/cpp/README.md
deleted file mode 100755
index fbe2c5fae..000000000
--- a/examples/vision/matting/ppmatting/cpp/README.md
+++ /dev/null
@@ -1,93 +0,0 @@
-English | [简体中文](README_CN.md)
-# PP-Matting C++ Deployment Example
-
-This directory provides examples that `infer.cc` fast finishes the deployment of PP-Matting on CPU/GPU and GPU accelerated by TensorRT. 
-Before deployment, two steps require confirmation
-
-- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)  
-- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy  Precompiled Library](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
-
-Taking the PP-Matting inference on Linux as an example, the compilation test can be completed by executing the following command in this directory. FastDeploy version 0.7.0 or above (x.x.x>=0.7.0) is required to support this model.
-
-```bash
-mkdir build
-cd build
-# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy  Precompiled Library` mentioned above 
-wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
-tar xvf fastdeploy-linux-x64-x.x.x.tgz
-cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
-make -j
-
-# Download PP-Matting model files and test images
-wget https://bj.bcebos.com/paddlehub/fastdeploy/PP-Matting-512.tgz
-tar -xvf PP-Matting-512.tgz
-wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_input.jpg
-wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_bgr.jpg
-
-
-# CPU inference
-./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 0
-# GPU inference
-./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 1
-# TensorRT inference on GPU
-./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 2
-# kunlunxin XPU inference
-./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 3
-```
-
-The visualized result after running is as follows
-<div width="840">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852040-759da522-fca4-4786-9205-88c622cd4a39.jpg">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852587-48895efc-d24a-43c9-aeec-d7b0362ab2b9.jpg">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852116-cf91445b-3a67-45d9-a675-c69fe77c383a.jpg">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852554-6960659f-4fd7-4506-b33b-54e1a9dd89bf.jpg">
-</div>
-
-The above command works for Linux or MacOS. For SDK use-pattern in Windows, refer to:
-- [How to use FastDeploy C++ SDK in Windows](../../../../../docs/en/faq/use_sdk_on_windows.md)
-
-## PP-Matting C++ Interface 
-
-### PPMatting Class
-
-```c++
-fastdeploy::vision::matting::PPMatting(
-        const string& model_file,
-        const string& params_file = "",
-        const string& config_file,
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const ModelFormat& model_format = ModelFormat::PADDLE)
-```
-
-PP-Matting model loading and initialization, among which model_file is the exported Paddle model format.
-
-**Parameter**
-
-> * **model_file**(str): Model file path 
-> * **params_file**(str): Parameter file path
-> * **config_file**(str): Inference deployment configuration file
-> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
-> * **model_format**(ModelFormat): Model format. Paddle format by default
-
-#### Predict Function
-
-> ```c++
-> PPMatting::Predict(cv::Mat* im, MattingResult* result)
-> ```
->
-> Model prediction interface. Input images and output detection results.
->
-> **Parameter**
->
-> > * **im**: Input images in HWC or BGR format
-> > * **result**: The segmentation result, including the predicted label of the segmentation and the corresponding probability of the label. Refer to [Vision Model Prediction Results](../../../../../docs/api/vision_results/) for the description of SegmentationResult
-
-### Class Member Variable
-#### Pre-processing Parameter
-Users can modify the following pre-processing parameters to their needs, which affects the final inference and deployment results
-
-
-- [Model Description](../../)
-- [Python Deployment](../python)
-- [Vision Model Prediction Results](../../../../../docs/api/vision_results/)
-- [How to switch the model inference backend engine](../../../../../docs/en/faq/how_to_change_backend.md)
diff --git a/examples/vision/matting/ppmatting/cpp/README_CN.md b/examples/vision/matting/ppmatting/cpp/README_CN.md
deleted file mode 100644
index 38e2e592a..000000000
--- a/examples/vision/matting/ppmatting/cpp/README_CN.md
+++ /dev/null
@@ -1,94 +0,0 @@
-[English](README.md) | 简体中文
-# PP-Matting C++部署示例
-
-本目录下提供`infer.cc`快速完成PP-Matting在CPU/GPU，以及GPU上通过TensorRT加速部署的示例。
-
-在部署前，需确认以下两个步骤
-
-- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
-- 2. 根据开发环境，下载预编译部署库和samples代码，参考[FastDeploy预编译库](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
-
-以Linux上 PP-Matting 推理为例，在本目录执行如下命令即可完成编译测试，支持此模型需保证FastDeploy版本0.7.0以上(x.x.x>=0.7.0)
-
-```bash
-mkdir build
-cd build
-# 下载FastDeploy预编译库，用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
-wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
-tar xvf fastdeploy-linux-x64-x.x.x.tgz
-cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
-make -j
-
-# 下载PP-Matting模型文件和测试图片
-wget https://bj.bcebos.com/paddlehub/fastdeploy/PP-Matting-512.tgz
-tar -xvf PP-Matting-512.tgz
-wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_input.jpg
-wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_bgr.jpg
-
-
-# CPU推理
-./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 0
-# GPU推理
-./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 1
-# GPU上TensorRT推理
-./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 2
-# 昆仑芯XPU推理
-./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 3
-```
-
-运行完成可视化结果如下图所示
-<div width="840">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852040-759da522-fca4-4786-9205-88c622cd4a39.jpg">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852587-48895efc-d24a-43c9-aeec-d7b0362ab2b9.jpg">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852116-cf91445b-3a67-45d9-a675-c69fe77c383a.jpg">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852554-6960659f-4fd7-4506-b33b-54e1a9dd89bf.jpg">
-</div>
-
-以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:  
-- [如何在Windows中使用FastDeploy C++ SDK](../../../../../docs/cn/faq/use_sdk_on_windows.md)
-
-## PP-Matting C++接口
-
-### PPMatting类
-
-```c++
-fastdeploy::vision::matting::PPMatting(
-        const string& model_file,
-        const string& params_file = "",
-        const string& config_file,
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const ModelFormat& model_format = ModelFormat::PADDLE)
-```
-
-PP-Matting模型加载和初始化，其中model_file为导出的Paddle模型格式。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **config_file**(str): 推理部署配置文件
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(ModelFormat): 模型格式，默认为Paddle格式
-
-#### Predict函数
-
-> ```c++
-> PPMatting::Predict(cv::Mat* im, MattingResult* result)
-> ```
->
-> 模型预测接口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **im**: 输入图像，注意需为HWC，BGR格式
-> > * **result**: 分割结果，包括分割预测的标签以及标签对应的概率值, MattingResult说明参考[视觉模型预测结果](../../../../../docs/api/vision_results/)
-
-### 类成员属性
-#### 预处理参数
-用户可按照自己的实际需求，修改下列预处理参数，从而影响最终的推理和部署效果
-
-
-- [模型介绍](../../)
-- [Python部署](../python)
-- [视觉模型预测结果](../../../../../docs/api/vision_results/)
-- [如何切换模型推理后端引擎](../../../../../docs/cn/faq/how_to_change_backend.md)
diff --git a/examples/vision/matting/ppmatting/cpp/infer.cc b/examples/vision/matting/ppmatting/cpp/infer.cc
deleted file mode 100755
index 3b8309044..000000000
--- a/examples/vision/matting/ppmatting/cpp/infer.cc
+++ /dev/null
@@ -1,173 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "fastdeploy/vision.h"
-
-#ifdef WIN32
-const char sep = '\\';
-#else
-const char sep = '/';
-#endif
-
-void CpuInfer(const std::string& model_dir, const std::string& image_file,
-              const std::string& background_file) {
-  auto model_file = model_dir + sep + "model.pdmodel";
-  auto params_file = model_dir + sep + "model.pdiparams";
-  auto config_file = model_dir + sep + "deploy.yaml";
-  auto option = fastdeploy::RuntimeOption();
-  option.UseCpu();
-  auto model = fastdeploy::vision::matting::PPMatting(model_file, params_file,
-                                                      config_file, option);
-  if (!model.Initialized()) {
-    std::cerr << "Failed to initialize." << std::endl;
-    return;
-  }
-
-  auto im = cv::imread(image_file);
-  cv::Mat bg = cv::imread(background_file);
-  fastdeploy::vision::MattingResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Failed to predict." << std::endl;
-    return;
-  }
-  auto vis_im = fastdeploy::vision::VisMatting(im, res);
-  auto vis_im_with_bg =
-      fastdeploy::vision::SwapBackground(im, bg, res);
-  cv::imwrite("visualized_result.jpg", vis_im_with_bg);
-  cv::imwrite("visualized_result_fg.jpg", vis_im);
-  std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
-               "and ./visualized_result_fg.jpg"
-            << std::endl;
-}
-
-void KunlunXinInfer(const std::string& model_dir, const std::string& image_file,
-              const std::string& background_file) {
-  auto model_file = model_dir + sep + "model.pdmodel";
-  auto params_file = model_dir + sep + "model.pdiparams";
-  auto config_file = model_dir + sep + "deploy.yaml";
-  auto option = fastdeploy::RuntimeOption();
-  option.UseKunlunXin();
-  auto model = fastdeploy::vision::matting::PPMatting(model_file, params_file,
-                                                      config_file, option);
-  if (!model.Initialized()) {
-    std::cerr << "Failed to initialize." << std::endl;
-    return;
-  }
-
-  auto im = cv::imread(image_file);
-  cv::Mat bg = cv::imread(background_file);
-  fastdeploy::vision::MattingResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Failed to predict." << std::endl;
-    return;
-  }
-  auto vis_im = fastdeploy::vision::VisMatting(im, res);
-  auto vis_im_with_bg =
-      fastdeploy::vision::SwapBackground(im, bg, res);
-  cv::imwrite("visualized_result.jpg", vis_im_with_bg);
-  cv::imwrite("visualized_result_fg.jpg", vis_im);
-  std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
-               "and ./visualized_result_fg.jpg"
-            << std::endl;
-}
-
-void GpuInfer(const std::string& model_dir, const std::string& image_file,
-              const std::string& background_file) {
-  auto model_file = model_dir + sep + "model.pdmodel";
-  auto params_file = model_dir + sep + "model.pdiparams";
-  auto config_file = model_dir + sep + "deploy.yaml";
-
-  auto option = fastdeploy::RuntimeOption();
-  option.UseGpu();
-  option.UsePaddleInferBackend();
-  auto model = fastdeploy::vision::matting::PPMatting(model_file, params_file,
-                                                      config_file, option);
-  if (!model.Initialized()) {
-    std::cerr << "Failed to initialize." << std::endl;
-    return;
-  }
-
-  auto im = cv::imread(image_file);
-  cv::Mat bg = cv::imread(background_file);
-  fastdeploy::vision::MattingResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Failed to predict." << std::endl;
-    return;
-  }
-  auto vis_im = fastdeploy::vision::VisMatting(im, res);
-  auto vis_im_with_bg =
-      fastdeploy::vision::SwapBackground(im, bg, res);
-  cv::imwrite("visualized_result.jpg", vis_im_with_bg);
-  cv::imwrite("visualized_result_fg.jpg", vis_im);
-  std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
-               "and ./visualized_result_fg.jpg"
-            << std::endl;
-}
-
-void TrtInfer(const std::string& model_dir, const std::string& image_file,
-              const std::string& background_file) {
-  auto model_file = model_dir + sep + "model.pdmodel";
-  auto params_file = model_dir + sep + "model.pdiparams";
-  auto config_file = model_dir + sep + "deploy.yaml";
-
-  auto option = fastdeploy::RuntimeOption();
-  option.UseGpu();
-  option.UseTrtBackend();
-  option.SetTrtInputShape("img", {1, 3, 512, 512});
-  auto model = fastdeploy::vision::matting::PPMatting(model_file, params_file,
-                                                      config_file, option);
-  if (!model.Initialized()) {
-    std::cerr << "Failed to initialize." << std::endl;
-    return;
-  }
-
-  auto im = cv::imread(image_file);
-  cv::Mat bg = cv::imread(background_file);
-  fastdeploy::vision::MattingResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Failed to predict." << std::endl;
-    return;
-  }
-  auto vis_im = fastdeploy::vision::VisMatting(im, res);
-  auto vis_im_with_bg =
-      fastdeploy::vision::SwapBackground(im, bg, res);
-  cv::imwrite("visualized_result.jpg", vis_im_with_bg);
-  cv::imwrite("visualized_result_fg.jpg", vis_im);
-  std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
-               "and ./visualized_result_fg.jpg"
-            << std::endl;
-}
-
-int main(int argc, char* argv[]) {
-  if (argc < 5) {
-    std::cout
-        << "Usage: infer_demo path/to/model_dir path/to/image run_option, "
-           "e.g ./infer_model ./PP-Matting-512 ./test.jpg ./test_bg.jpg 0"
-        << std::endl;
-    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu; 2: run with gpu and use tensorrt backend, 3: run with kunlunxin."
-              << std::endl;
-    return -1;
-  }
-  if (std::atoi(argv[4]) == 0) {
-    CpuInfer(argv[1], argv[2], argv[3]);
-  } else if (std::atoi(argv[4]) == 1) {
-    GpuInfer(argv[1], argv[2], argv[3]);
-  } else if (std::atoi(argv[4]) == 2) {
-    TrtInfer(argv[1], argv[2], argv[3]);
-  } else if (std::atoi(argv[4]) == 3) {
-    KunlunXinInfer(argv[1], argv[2], argv[3]);
-  }
-  return 0;
-}
diff --git a/examples/vision/matting/ppmatting/python/README.md b/examples/vision/matting/ppmatting/python/README.md
deleted file mode 100755
index 09fe0cdc2..000000000
--- a/examples/vision/matting/ppmatting/python/README.md
+++ /dev/null
@@ -1,81 +0,0 @@
-English | [简体中文](README_CN.md)
-# PP-Matting Python Deployment Example
-
-Before deployment, two steps require confirmation
-
-- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)  
-- 2. Install FastDeploy Python whl package. Refer to [FastDeploy Python Installation](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
-
-This directory provides examples that `infer.py`  fast finishes the deployment of PP-Matting on CPU/GPU and GPU accelerated by TensorRT. The script is as follows
-```bash
-# Download the deployment example code 
-git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd FastDeploy/examples/vision/matting/ppmatting/python
-
-# Download PP-Matting model files and test images
-wget https://bj.bcebos.com/paddlehub/fastdeploy/PP-Matting-512.tgz
-tar -xvf PP-Matting-512.tgz
-wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_input.jpg
-wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_bgr.jpg
-# CPU inference
-python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device cpu
-# GPU inference
-python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device gpu
-# TensorRT inference on GPU（Attention: It is somewhat time-consuming for the operation of model serialization when running TensorRT inference for the first time. Please be patient.）
-python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device gpu --use_trt True
-# kunlunxin XPU inference
-python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device kunlunxin
-```
-
-The visualized result after running is as follows
-<div width="840">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852040-759da522-fca4-4786-9205-88c622cd4a39.jpg">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852587-48895efc-d24a-43c9-aeec-d7b0362ab2b9.jpg">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852116-cf91445b-3a67-45d9-a675-c69fe77c383a.jpg">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852554-6960659f-4fd7-4506-b33b-54e1a9dd89bf.jpg">
-</div>
-## PP-Matting Python Interface 
-
-```python
-fd.vision.matting.PPMatting(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
-```
-
-PP-Matting model loading and initialization, among which model_file, params_file, and config_file are the Paddle inference files exported from the training model. Refer to [Model Export](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)  for more information
-
-**Parameter**
-
-> * **model_file**(str): Model file path 
-> * **params_file**(str): Parameter file path
-> * **config_file**(str): Inference deployment configuration file
-> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
-> * **model_format**(ModelFormat): Model format. Paddle format by default
-
-### predict function
-
-> ```python
-> PPMatting.predict(input_image)
-> ```
->
-> Model prediction interface. Input images and output detection results.
->
-> **Parameter**
->
-> > * **input_image**(np.ndarray): Input data in HWC or BGR format
-
-> **Return**
->
-> > Return `fastdeploy.vision.MattingResult` structure. Refer to [Vision Model Prediction Results](../../../../../docs/api/vision_results/) for the description of the structure.
-
-### Class Member Variable
-
-#### Pre-processing Parameter
-Users can modify the following pre-processing parameters to their needs, which affects the final inference and deployment results
-
-
-
-## Other Documents
-
-- [PP-Matting Model Description](..)
-- [PP-Matting C++ Deployment](../cpp)
-- [Model Prediction Results](../../../../../docs/api/vision_results/)
-- [How to switch the model inference backend engine](../../../../../docs/en/faq/how_to_change_backend.md)
diff --git a/examples/vision/matting/ppmatting/python/README_CN.md b/examples/vision/matting/ppmatting/python/README_CN.md
deleted file mode 100644
index cdfd7d378..000000000
--- a/examples/vision/matting/ppmatting/python/README_CN.md
+++ /dev/null
@@ -1,81 +0,0 @@
-[English](README.md) | 简体中文
-# PP-Matting Python部署示例
-
-在部署前，需确认以下两个步骤
-
-- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
-- 2. FastDeploy Python whl包安装，参考[FastDeploy Python安装](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
-
-本目录下提供`infer.py`快速完成PP-Matting在CPU/GPU，以及GPU上通过TensorRT加速部署的示例。执行如下脚本即可完成
-
-```bash
-#下载部署示例代码
-git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd FastDeploy/examples/vision/matting/ppmatting/python
-
-# 下载PP-Matting模型文件和测试图片
-wget https://bj.bcebos.com/paddlehub/fastdeploy/PP-Matting-512.tgz
-tar -xvf PP-Matting-512.tgz
-wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_input.jpg
-wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_bgr.jpg
-# CPU推理
-python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device cpu
-# GPU推理
-python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device gpu
-# GPU上使用TensorRT推理 （注意：TensorRT推理第一次运行，有序列化模型的操作，有一定耗时，需要耐心等待）
-python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device gpu --use_trt True
-# 昆仑芯XPU推理
-python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device kunlunxin
-```
-
-运行完成可视化结果如下图所示
-<div width="840">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852040-759da522-fca4-4786-9205-88c622cd4a39.jpg">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852587-48895efc-d24a-43c9-aeec-d7b0362ab2b9.jpg">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852116-cf91445b-3a67-45d9-a675-c69fe77c383a.jpg">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852554-6960659f-4fd7-4506-b33b-54e1a9dd89bf.jpg">
-</div>
-## PP-Matting Python接口
-
-```python
-fd.vision.matting.PPMatting(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
-```
-
-PP-Matting模型加载和初始化，其中model_file, params_file以及config_file为训练模型导出的Paddle inference文件，具体请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **config_file**(str): 推理部署配置文件
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(ModelFormat): 模型格式，默认为Paddle格式
-
-### predict函数
-
-> ```python
-> PPMatting.predict(input_image)
-> ```
->
-> 模型预测结口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **input_image**(np.ndarray): 输入数据，注意需为HWC，BGR格式
-
-> **返回**
->
-> > 返回`fastdeploy.vision.MattingResult`结构体，结构体说明参考文档[视觉模型预测结果](../../../../../docs/api/vision_results/)
-
-### 类成员属性
-#### 预处理参数
-用户可按照自己的实际需求，修改下列预处理参数，从而影响最终的推理和部署效果
-
-
-
-## 其它文档
-
-- [PP-Matting 模型介绍](..)
-- [PP-Matting C++部署](../cpp)
-- [模型预测结果说明](../../../../../docs/api/vision_results/)
-- [如何切换模型推理后端引擎](../../../../../docs/cn/faq/how_to_change_backend.md)
diff --git a/examples/vision/matting/ppmatting/python/infer.py b/examples/vision/matting/ppmatting/python/infer.py
deleted file mode 100755
index 512d0ca86..000000000
--- a/examples/vision/matting/ppmatting/python/infer.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import fastdeploy as fd
-import cv2
-import os
-
-
-def parse_arguments():
-    import argparse
-    import ast
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--model", required=True, help="Path of PaddleSeg model.")
-    parser.add_argument(
-        "--image", type=str, required=True, help="Path of test image file.")
-    parser.add_argument(
-        "--bg",
-        type=str,
-        required=True,
-        default=None,
-        help="Path of test background image file.")
-    parser.add_argument(
-        "--device",
-        type=str,
-        default='cpu',
-        help="Type of inference device, support 'cpu', 'kunlunxin' or 'gpu'.")
-    parser.add_argument(
-        "--use_trt",
-        type=ast.literal_eval,
-        default=False,
-        help="Wether to use tensorrt.")
-    return parser.parse_args()
-
-
-def build_option(args):
-    option = fd.RuntimeOption()
-    if args.device.lower() == "gpu":
-        option.use_gpu()
-        option.use_paddle_infer_backend()
-
-    if args.use_trt:
-        option.use_trt_backend()
-        option.set_trt_input_shape("img", [1, 3, 512, 512])
-
-    if args.device.lower() == "kunlunxin":
-        option.use_kunlunxin()
-    return option
-
-
-args = parse_arguments()
-
-# 配置runtime，加载模型
-runtime_option = build_option(args)
-model_file = os.path.join(args.model, "model.pdmodel")
-params_file = os.path.join(args.model, "model.pdiparams")
-config_file = os.path.join(args.model, "deploy.yaml")
-model = fd.vision.matting.PPMatting(
-    model_file, params_file, config_file, runtime_option=runtime_option)
-
-# 预测图片抠图结果
-im = cv2.imread(args.image)
-bg = cv2.imread(args.bg)
-result = model.predict(im)
-print(result)
-# 可视化结果
-vis_im = fd.vision.vis_matting(im, result)
-vis_im_with_bg = fd.vision.swap_background(im, bg, result)
-cv2.imwrite("visualized_result_fg.jpg", vis_im)
-cv2.imwrite("visualized_result_replaced_bg.jpg", vis_im_with_bg)
-print(
-    "Visualized result save in ./visualized_result_replaced_bg.jpg and ./visualized_result_fg.jpg"
-)
diff --git a/examples/vision/segmentation/paddleseg/README.md b/examples/vision/segmentation/paddleseg/README.md
index de578cb22..a1947ae56 100644
--- a/examples/vision/segmentation/paddleseg/README.md
+++ b/examples/vision/segmentation/paddleseg/README.md
@@ -1,49 +1,32 @@
-English | [简体中文](README_CN.md)
-# PaddleSeg Model Deployment
+# PaddleSeg高性能全场景模型部署方案——FastDeploy
 
-## Model Description
+## FastDeploy介绍
 
-- [PaddleSeg develop](https://github.com/PaddlePaddle/PaddleSeg/tree/develop)
+[FastDeploy](https://github.com/PaddlePaddle/FastDeploy)是一款全场景、易用灵活、极致高效的AI推理部署工具，使用FastDeploy可以简单高效的在10+款硬件上对PaddleSeg模型进行快速部署
 
-FastDeploy currently supports the deployment of the following models
+## 支持如下的硬件部署
 
-- [U-Net models](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/unet/README.md)
-- [PP-LiteSeg models](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/pp_liteseg/README.md)
-- [PP-HumanSeg models](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/contrib/PP-HumanSeg/README.md)
-- [FCN models](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/fcn/README.md)
-- [DeepLabV3 models](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/deeplabv3/README.md)
-
-【Attention】For **PP-Matting**、**PP-HumanMatting** and **ModNet** deployment, please refer to [Matting Model Deployment](../../matting)
-
-## Prepare PaddleSeg Deployment Model
-
-For the export of the PaddleSeg model, refer to [Model Export](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md) for more information
-
-**Attention**
-- The exported PaddleSeg model contains three files, including `model.pdmodel`、`model.pdiparams` and `deploy.yaml`. FastDeploy will get the pre-processing information for inference from yaml files.
-
-## Download Pre-trained Model
-
-For developers' testing, part of the PaddleSeg exported models are provided below. 
-- without-argmax export mode: **Not specified**`--input_shape`，**specified**`--output_op none`
-- with-argmax export mode：**Not specified**`--input_shape`，**specified**`--output_op argmax`
-
-Developers can download directly. 
+| 硬件支持列表 |  |   |   |
+|:----- | :-- | :-- | :-- |
+| [NVIDIA GPU](cpu-gpu) | [X86 CPU](cpu-gpu)| [飞腾CPU](cpu-gpu) | [ARM CPU](cpu-gpu) |
+| [Intel GPU(独立显卡/集成显卡)](cpu-gpu) | [昆仑](kunlun) | [昇腾](ascend) | [瑞芯微](rockchip) |
+| [晶晨](amlogic) | [算能](sophgo) |
 
 
-| Model                                                               | Parameter Size    | Input Shape |  mIoU | mIoU (flip) | mIoU (ms+flip) |
-|:---------------------------------------------------------------- |:----- |:----- | :----- | :----- | :----- |
-| [Unet-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_with_argmax_infer.tgz) \| [Unet-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz)  | 52MB | 1024x512 | 65.00% | 66.02% | 66.89% |
-| [PP-LiteSeg-B(STDC2)-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz) \| [PP-LiteSeg-B(STDC2)-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz) | 31MB  | 1024x512 | 79.04% |	79.52% | 79.85% |
-|[PP-HumanSegV1-Lite-with-argmax(General Portrait Segmentation Model)](https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV1_Lite_with_argmax_infer.tgz) \| [PP-HumanSegV1-Lite-without-argmax(General Portrait Segmentation Model)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Lite_infer.tgz) |  543KB | 192x192 | 86.2% | - | - |
-|[PP-HumanSegV2-Lite-with-argmax(General Portrait Segmentation Model)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Lite_192x192_with_argmax_infer.tgz) \| [PP-HumanSegV2-Lite-without-argmax(General Portrait Segmentation Model)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Lite_192x192_infer.tgz) |  12MB | 192x192 | 92.52% | - | - |
-| [PP-HumanSegV2-Mobile-with-argmax(General Portrait Segmentation Model)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Mobile_192x192_with_argmax_infer.tgz) \| [PP-HumanSegV2-Mobile-without-argmax(General Portrait Segmentation Model)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Mobile_192x192_infer.tgz) |  29MB | 192x192 | 93.13% | - | - |
-|[PP-HumanSegV1-Server-with-argmax(General Portrait Segmentation Model)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Server_with_argmax_infer.tgz) \| [PP-HumanSegV1-Server-without-argmax(General Portrait Segmentation Model)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Server_infer.tgz) |  103MB | 512x512 | 96.47% | - | - |
-| [Portait-PP-HumanSegV2-Lite-with-argmax(Portrait Segmentation Model)](https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV2_Lite_256x144_with_argmax_infer.tgz) \| [Portait-PP-HumanSegV2-Lite-without-argmax(Portrait Segmentation Model)](https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV2_Lite_256x144_infer.tgz) |  3.6M | 256x144 | 96.63% | - | - |
-| [FCN-HRNet-W18-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/FCN_HRNet_W18_cityscapes_with_argmax_infer.tgz) \| [FCN-HRNet-W18-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/FCN_HRNet_W18_cityscapes_without_argmax_infer.tgz)(GPU inference for ONNXRuntime is not supported now) |  37MB | 1024x512 | 78.97% | 79.49% | 79.74% |
-| [Deeplabv3-ResNet101-OS8-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Deeplabv3_ResNet101_OS8_cityscapes_with_argmax_infer.tgz) \| [Deeplabv3-ResNet101-OS8-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Deeplabv3_ResNet101_OS8_cityscapes_without_argmax_infer.tgz) |  150MB | 1024x512 | 79.90% | 80.22% | 80.47% |
+## 更多部署方式
 
-## Detailed Deployment Tutorials
+- [Android ARM CPU部署](android)
+- [服务化Serving部署](serving)
+- [web部署](web)
+- [模型自动化压缩工具](quantize)
 
-- [Python Deployment](python)
-- [C++ Deployment](cpp)
+
+## 常见问题
+
+遇到问题可查看常见问题集合文档或搜索FastDeploy issues，链接如下：
+
+[常见问题集合](https://github.com/PaddlePaddle/FastDeploy/tree/develop/docs/cn/faq)
+
+[FastDeploy issues](https://github.com/PaddlePaddle/FastDeploy/issues)
+
+若以上方式都无法解决问题，欢迎给FastDeploy提交新的[issue](https://github.com/PaddlePaddle/FastDeploy/issues)
diff --git a/examples/vision/segmentation/paddleseg/README_CN.md b/examples/vision/segmentation/paddleseg/README_CN.md
deleted file mode 100644
index 865c6c4aa..000000000
--- a/examples/vision/segmentation/paddleseg/README_CN.md
+++ /dev/null
@@ -1,23 +0,0 @@
-# 使用FastDeploy部署PaddleSeg模型
-
-## FastDeploy介绍
-
-FastDeploy是一款全场景、易用灵活、极致高效的AI推理部署工具，使用FastDeploy可以简单高效的在10+款硬件上对PaddleSeg模型进行快速部署
-
-## 详细文档
-
-- [NVIDIA GPU、X86 CPU、飞腾CPU、ARM CPU、Intel GPU(独立显卡/集成显卡)](cpu-gpu)
-- [昆仑](kunlun)
-- [升腾](ascend)
-- [瑞芯微](rockchip)
-- [晶晨](amlogic)
-- [算能](sophgo)
-- [Android ARM CPU部署](android)
-- [服务化Serving部署](serving)
-- [模型自动化压缩工具](quantize)
-- [web部署](web)
-
-## 常见问题
-遇到问题可查看常见问题集合文档或搜索 FastDeploy issues，链接如下。若都无法解决，欢迎给 FastDeploy 提交新的issue
-[常见问题集合](https://github.com/PaddlePaddle/FastDeploy/tree/develop/docs/cn/faq)
-[FastDeploy issues](https://github.com/PaddlePaddle/FastDeploy/issues)
diff --git a/examples/vision/segmentation/paddleseg/amlogic/a311d/README.md b/examples/vision/segmentation/paddleseg/amlogic/a311d/README.md
old mode 100755
new mode 100644
index db3e18110..9f856deb4
--- a/examples/vision/segmentation/paddleseg/amlogic/a311d/README.md
+++ b/examples/vision/segmentation/paddleseg/amlogic/a311d/README.md
@@ -1,12 +1,30 @@
-English | [简体中文](README_CN.md)
-# Deployment of PP-LiteSeg Quantification Model on A311D
-Now FastDeploy allows deploying PP-LiteSeg quantization model to A311D based on Paddle Lite.
+[English](README.md) | 简体中文
 
-For model quantization and download of quantized models, refer to [Model Quantization](../quantize/README.md)
+# 在晶晨A311D上使用FastDeploy部署PaddleSeg模型
+晶晨A311D是一款先进的AI应用处理器。FastDeploy支持在A311D上基于Paddle-Lite部署PaddleSeg相关模型
 
+## 晶晨A311D支持的PaddleSeg模型
+目前所支持的PaddleSeg模型如下：
+- [PP-LiteSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/pp_liteseg/README.md)
 
-## Detailed Deployment Tutorials
+## 预导出的推理模型
+为了方便开发者的测试，下面提供了PaddleSeg导出的部分量化后的推理模型，开发者可直接下载使用。
 
-Only C++ deployment is supported on A311D.
+| 模型                              | 参数文件大小    |输入Shape |  mIoU | mIoU (flip) | mIoU (ms+flip) |
+|:---------------------------------------------------------------- |:----- |:----- | :----- | :----- | :----- |
+| [PP-LiteSeg-T(STDC1)-cityscapes-without-argmax](https://bj.bcebos.com/fastdeploy/models/rk1/ppliteseg.tar.gz)| 31MB  | 1024x512 | 77.04% | 77.73% | 77.46% |
+**注意**
+- PaddleSeg量化模型包含`model.pdmodel`、`model.pdiparams`、`deploy.yaml`和`subgraph.txt`四个文件，FastDeploy会从yaml文件中获取模型在推理时需要的预处理信息，subgraph.txt是为了异构计算而存储的配置文件
+- 若以上列表中无满足要求的模型，可参考下方教程自行导出适配A311D的模型
 
-- [C++ deployment](cpp)
+## PaddleSeg动态图模型导出为A311D支持的INT8模型
+模型导出分为以下两步
+1. PaddleSeg训练的动态图模型导出为推理静态图模型，请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)
+晶晨A311D仅支持INT8
+2. 将推理模型量化压缩为INT8模型，FastDeploy模型量化的方法及一键自动化压缩工具可以参考[模型量化](../../../quantize/README.md)
+
+## 详细部署文档
+
+目前，A311D上只支持C++的部署。
+
+- [C++部署](cpp)
diff --git a/examples/vision/segmentation/paddleseg/amlogic/a311d/README_CN.md b/examples/vision/segmentation/paddleseg/amlogic/a311d/README_CN.md
deleted file mode 100644
index 3537dfef1..000000000
--- a/examples/vision/segmentation/paddleseg/amlogic/a311d/README_CN.md
+++ /dev/null
@@ -1,22 +0,0 @@
-[English](README.md) | 简体中文
-# 在晶晨A311D上使用FastDeploy部署PaddleSeg模型
-晶晨A311D是一款先进的AI应用处理器。目前，FastDeploy支持在A311D上基于Paddle-Lite部署PaddleSeg相关模型
-
-## 晶晨A311D支持的PaddleSeg模型
-由于晶晨A311D的NPU仅支持INT8量化模型的部署，因此所支持的量化模型如下：
-- [PP-LiteSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/pp_liteseg/README.md)
-
-为了方便开发者的测试，下面提供了PaddleSeg导出的部分推理模型，开发者可直接下载使用。
-
-PaddleSeg模型导出，请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)  
-
-| 模型                              | 参数文件大小    |输入Shape |  mIoU | mIoU (flip) | mIoU (ms+flip) |
-|:---------------------------------------------------------------- |:----- |:----- | :----- | :----- | :----- |
-| [PP-LiteSeg-T(STDC1)-cityscapes-without-argmax](https://bj.bcebos.com/fastdeploy/models/rk1/ppliteseg.tar.gz)| 31MB  | 1024x512 | 77.04% | 77.73% | 77.46% |
->> **注意**: FastDeploy模型量化的方法及一键自动化压缩工具可以参考[模型量化](../../../quantize/README.md)
-
-## 详细部署文档
-
-目前，A311D上只支持C++的部署。
-
-- [C++部署](cpp)
diff --git a/examples/vision/segmentation/paddleseg/amlogic/a311d/cpp/README.md b/examples/vision/segmentation/paddleseg/amlogic/a311d/cpp/README.md
old mode 100755
new mode 100644
index 0b8467ae4..57a71f86f
--- a/examples/vision/segmentation/paddleseg/amlogic/a311d/cpp/README.md
+++ b/examples/vision/segmentation/paddleseg/amlogic/a311d/cpp/README.md
@@ -1,31 +1,28 @@
-English | [简体中文](README_CN.md)
-# PP-LiteSeg Quantitative Model C++ Deployment Example
+[English](README.md) | 简体中文
+# PP-LiteSeg 量化模型 C++ 部署示例
 
-`infer.cc` in this directory can help you quickly complete the inference acceleration of PP-LiteSeg quantization model deployment on A311D.
+本目录下提供的 `infer.cc`，可以帮助用户快速完成 PP-LiteSeg 量化模型在晶晨 A311D 上的部署推理加速。
 
-## Deployment Preparations
-### FastDeploy Cross-compile Environment Preparations
-1. For the software and hardware environment, and the cross-compile environment, please refer to [FastDeploy Cross-compile environment](../../../../../../docs/en/build_and_install/a311d.md#Cross-compilation-environment-construction).  
+## 部署准备
+### FastDeploy 交叉编译环境准备
+软硬件环境满足要求，以及交叉编译环境的准备，请参考：[FastDeploy](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#自行编译安装)  
 
-### Model Preparations
-1. You can directly use the quantized model provided by FastDeploy for deployment.
-2. You can use one-click automatical compression tool provided by FastDeploy to quantize model by yourself, and use the generated quantized model for deployment.(Note: The quantized classification model still needs the deploy.yaml file in the FP32 model folder. Self-quantized model folder does not contain this yaml file, you can copy it from the FP32 model folder to the quantized model folder.)
-3. The model requires heterogeneous computation. Please refer to: [Heterogeneous Computation](./../../../../../../docs/en/faq/heterogeneous_computing_on_timvx_npu.md). Since the model is already provided, you can test the heterogeneous file we provide first to verify whether the accuracy meets the requirements.
+### 模型准备
+1. 用户可以直接使用由[FastDeploy 提供的量化模型](../README_CN.md#晶晨a311d支持的paddleseg模型)进行部署。
+2. 若FastDeploy没有提供满足要求的量化模型，用户可以参考[PaddleSeg动态图模型导出为A311D支持的INT8模型](../README_CN.md#paddleseg动态图模型导出为a311d支持的int8模型)自行导出或训练量化模型
+3. 若上述导出或训练的模型出现精度下降或者报错，则需要使用异构计算，使得模型算子部分跑在A311D的ARM CPU上进行调试以及精度验证，其中异构计算所需的文件是subgraph.txt。具体关于异构计算可参考：[异构计算](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/heterogeneous_computing_on_timvx_npu.md)。
 
-For more information, please refer to [Model Quantization](../../quantize/README.md)
+## 在 A311D 上部署量化后的 PP-LiteSeg 分割模型
+请按照以下步骤完成在 A311D 上部署 PP-LiteSeg 量化模型：
 
-## Deploying the Quantized PP-LiteSeg Segmentation model on A311D
-Please follow these steps to complete the deployment of the PP-LiteSeg quantization model on A311D.
-1. Cross-compile the FastDeploy library as described in [Cross-compile  FastDeploy](../../../../../../docs/en/build_and_install/a311d.md#FastDeploy-cross-compilation-library-compilation-based-on-Paddle-Lite)
-
-2. Copy the compiled library to the current directory. You can run this line:
+1. 将编译后的库拷贝到当前目录，可使用如下命令：
 ```bash
-cp -r FastDeploy/build/fastdeploy-timvx/ FastDeploy/examples/vision/segmentation/paddleseg/a311d/cpp
+cp -r FastDeploy/build/fastdeploy-timvx/ path/to/paddleseg/amlogic/a311d/cpp
 ```
 
-3. Download the model and example images required for deployment in current path.
+2. 在当前路径下载部署所需的模型和示例图片：
 ```bash
-cd FastDeploy/examples/vision/segmentation/paddleseg/a311d/cpp
+cd path/to/paddleseg/amlogic/a311d/cpp
 mkdir models && mkdir images
 wget https://bj.bcebos.com/fastdeploy/models/rk1/ppliteseg.tar.gz
 tar -xvf ppliteseg.tar.gz
@@ -34,26 +31,29 @@ wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
 cp -r cityscapes_demo.png images
 ```
 
-4. Compile the deployment example. You can run the following lines:
+3. 编译部署示例，可使入如下命令：
 ```bash
-cd FastDeploy/examples/vision/segmentation/paddleseg/a311d/cpp
+cd path/to/paddleseg/amlogic/a311d/cpp
 mkdir build && cd build
 cmake -DCMAKE_TOOLCHAIN_FILE=${PWD}/../fastdeploy-timvx/toolchain.cmake -DFASTDEPLOY_INSTALL_DIR=${PWD}/../fastdeploy-timvx -DTARGET_ABI=arm64 ..
 make -j8
 make install
-# After success, an install folder will be created with a running demo and libraries required for deployment.
+# 成功编译之后，会生成 install 文件夹，里面有一个运行 demo 和部署所需的库
 ```
 
-5. Deploy the PP-LiteSeg segmentation model to A311D based on adb. You can run the following lines:
+4. 基于 adb 工具部署 PP-LiteSeg 分割模型到晶晨 A311D，可使用如下命令：
 ```bash
-# Go to the install directory.
-cd FastDeploy/examples/vision/segmentation/paddleseg/a311d/cpp/build/install/
-# The following line represents: bash run_with_adb.sh, demo needed to run, model path, image path, DEVICE ID.
+# 进入 install 目录
+cd path/to/paddleseg/amlogic/a311d/cpp/build/install/
+cp ../../run_with_adb.sh .
+# 如下命令表示：bash run_with_adb.sh 需要运行的demo 模型路径 图片路径 设备的DEVICE_ID
 bash run_with_adb.sh infer_demo ppliteseg cityscapes_demo.png $DEVICE_ID
 ```
 
-The output is:
+部署成功后运行结果如下：
 
 <img width="640" src="https://user-images.githubusercontent.com/30516196/205544166-9b2719ff-ed82-4908-b90a-095de47392e1.png">
 
-Please note that the model deployed on A311D needs to be quantized. You can refer to [Model Quantization](../../../../../../docs/en/quantize.md).
+## 快速链接
+- [PaddleSeg C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1segmentation.html)
+- [FastDeploy部署PaddleSeg模型概览](../../)
diff --git a/examples/vision/segmentation/paddleseg/amlogic/a311d/cpp/README_CN.md b/examples/vision/segmentation/paddleseg/amlogic/a311d/cpp/README_CN.md
deleted file mode 100644
index 4780a9110..000000000
--- a/examples/vision/segmentation/paddleseg/amlogic/a311d/cpp/README_CN.md
+++ /dev/null
@@ -1,59 +0,0 @@
-[English](README.md) | 简体中文
-# PP-LiteSeg 量化模型 C++ 部署示例
-
-本目录下提供的 `infer.cc`，可以帮助用户快速完成 PP-LiteSeg 量化模型在 晶晨A311D 上的部署推理加速。
-
-## 部署准备
-### FastDeploy 交叉编译环境准备
-1. 软硬件环境满足要求，以及交叉编译环境的准备，请参考：[FastDeploy 交叉编译环境准备](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/a311d.md#交叉编译环境搭建)  
-
-### 模型准备
-1. 用户可以直接使用由 FastDeploy 提供的量化模型进行部署。
-2. 用户可以使用 FastDeploy 提供的一键模型自动化压缩工具,自行进行模型量化, 并使用产出的量化模型进行部署.(注意: 推理量化后的分类模型仍然需要FP32模型文件夹下的 deploy.yaml 文件, 自行量化的模型文件夹内不包含此 yaml 文件, 用户从FP32模型文件夹下复制此yaml文件到量化后的模型文件夹内即可.)
-3. 模型需要异构计算，异构计算文件可以参考：[异构计算](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/heterogeneous_computing_on_timvx_npu.md)，由于 FastDeploy 已经提供了模型，可以先测试我们提供的异构文件，验证精度是否符合要求。
-
-更多量化相关相关信息可查阅[模型量化](../../../quantize/README.md)
-
-## 在 A311D 上部署量化后的 PP-LiteSeg 分割模型
-请按照以下步骤完成在 A311D 上部署 PP-LiteSeg 量化模型：
-1. 交叉编译编译 FastDeploy 库，具体请参考：[交叉编译 FastDeploy](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/a311d.md#基于-paddle-lite-的-fastdeploy-交叉编译库编译)
-
-2. 将编译后的库拷贝到当前目录，可使用如下命令：
-```bash
-cp -r FastDeploy/build/fastdeploy-timvx/ FastDeploy/examples/vision/segmentation/paddleseg/amlogic/a311d/cpp
-```
-
-3. 在当前路径下载部署所需的模型和示例图片：
-```bash
-cd FastDeploy/examples/vision/segmentation/paddleseg/amlogic/a311d/cpp
-mkdir models && mkdir images
-wget https://bj.bcebos.com/fastdeploy/models/rk1/ppliteseg.tar.gz
-tar -xvf ppliteseg.tar.gz
-cp -r ppliteseg models
-wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
-cp -r cityscapes_demo.png images
-```
-
-4. 编译部署示例，可使入如下命令：
-```bash
-cd FastDeploy/examples/vision/segmentation/paddleseg/amlogic/a311d/cpp
-mkdir build && cd build
-cmake -DCMAKE_TOOLCHAIN_FILE=${PWD}/../fastdeploy-timvx/toolchain.cmake -DFASTDEPLOY_INSTALL_DIR=${PWD}/../fastdeploy-timvx -DTARGET_ABI=arm64 ..
-make -j8
-make install
-# 成功编译之后，会生成 install 文件夹，里面有一个运行 demo 和部署所需的库
-```
-
-5. 基于 adb 工具部署 PP-LiteSeg 分割模型到晶晨 A311D，可使用如下命令：
-```bash
-# 进入 install 目录
-cd FastDeploy/examples/vision/segmentation/paddleseg/amlogic/a311d/cpp/build/install/
-# 如下命令表示：bash run_with_adb.sh 需要运行的demo 模型路径 图片路径 设备的DEVICE_ID
-bash run_with_adb.sh infer_demo ppliteseg cityscapes_demo.png $DEVICE_ID
-```
-
-部署成功后运行结果如下：
-
-<img width="640" src="https://user-images.githubusercontent.com/30516196/205544166-9b2719ff-ed82-4908-b90a-095de47392e1.png">
-
-需要特别注意的是，在 A311D 上部署的模型需要是量化后的模型，模型的量化请参考：[模型量化](../../../quantize/README.md)
diff --git a/examples/vision/segmentation/paddleseg/android/README.md b/examples/vision/segmentation/paddleseg/android/README.md
index 0d845f2a0..ab06e1868 100644
--- a/examples/vision/segmentation/paddleseg/android/README.md
+++ b/examples/vision/segmentation/paddleseg/android/README.md
@@ -19,7 +19,7 @@ For real-time portrait segmentation on Android, this demo has good ease of use a
 </p>
 
 > **Notes:**
->> If you encounter an NDK configuration error during importing, compiling or running the program, please open ` File > Project Structure > SDK Location` and change `Andriod SDK location` to your locally configured SDK path. 
+>> If you encounter an NDK configuration error during importing, compiling or running the program, please open ` File > Project Structure > SDK Location` and change `Andriod SDK location` to your locally configured SDK path.
 
 4. Click the Run button to automatically compile the APP and install it to your phone. (The process will automatically download the pre-compiled FastDeploy Android library and model files, internet connection required.)
 The success interface is as follows. Figure 1: Install APP on phone; Figure 2: The opening interface, it will automatically recognize the person in the picture and draw the mask; Figure 3: APP setting options, click setting in the upper right corner, and you can set different options.
@@ -159,11 +159,11 @@ model.init(modelFile, paramFile, configFile, option);
 For details, please refer to [SegmentationMainActivity](./app/src/main/java/com/baidu/paddle/fastdeploy/app/examples/segmentation/SegmentationMainActivity.java).
 
 ##  Replace FastDeploy SDK and model  
- Steps to replace the FastDeploy prediction libraries and model are very simple. The location of the prediction library is `app/libs/fastdeploy-android-sdk-xxx.aar`, where `xxx` indicates the version of the prediction library you are currently using. The location of the model is, `app/src/main/assets/models/portrait_pp_humansegv2_lite_256x144_inference_model`. 
+ Steps to replace the FastDeploy prediction libraries and model are very simple. The location of the prediction library is `app/libs/fastdeploy-android-sdk-xxx.aar`, where `xxx` indicates the version of the prediction library you are currently using. The location of the model is, `app/src/main/assets/models/portrait_pp_humansegv2_lite_256x144_inference_model`.
 - Replace FastDeploy Android SDK: Download or compile the latest FastDeploy Android SDK, unzip it and put it in the `app/libs` directory. For details please refer to:
-     - [Use FastDeploy Java SDK on Android](../../../../../java/android/)
+     - [Use FastDeploy Java SDK on Android](https://github.com/PaddlePaddle/FastDeploy/tree/develop/java/android)
 
-- Steps for replacing the PaddleSeg model.   
+- Steps for replacing the PaddleSeg model.  
   - Put your PaddleSeg model in `app/src/main/assets/models`;  
   - Modify the model path in `app/src/main/res/values/strings.xml`, such as:
 ```xml
@@ -173,5 +173,5 @@ For details, please refer to [SegmentationMainActivity](./app/src/main/java/com/
 
 ## Other Documenets
 If you are interested in more FastDeploy Java API documents and how to access the FastDeploy C++ API via JNI, you can refer to the following:
-- [Use FastDeploy Java SDK on Android](../../../../../java/android/)
-- [Use FastDeploy C++ SDK on Android](../../../../../docs/en/faq/use_cpp_sdk_on_android.md)  
+- [Use FastDeploy Java SDK on Android](https://github.com/PaddlePaddle/FastDeploy/tree/develop/java/android)
+- [Use FastDeploy C++ SDK on Android](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/use_cpp_sdk_on_android.md)  
diff --git a/examples/vision/segmentation/paddleseg/android/README_CN.md b/examples/vision/segmentation/paddleseg/android/README_CN.md
index 442947009..30938b697 100644
--- a/examples/vision/segmentation/paddleseg/android/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/android/README_CN.md
@@ -10,7 +10,7 @@
 
 ## 部署步骤
 
-1. 目标检测 PaddleSeg Demo 位于 `fastdeploy/examples/vision/segmentation/paddleseg/android` 目录
+1. 目标检测 PaddleSeg Demo 位于 `path/to/paddleseg/android` 目录
 2. 用 Android Studio 打开 paddleseg/android 工程
 3. 手机连接电脑，打开 USB 调试和文件传输模式，并在 Android Studio 上连接自己的手机设备（手机需要开启允许从 USB 安装软件权限）
 
@@ -161,7 +161,7 @@ model.init(modelFile, paramFile, configFile, option);
 ## 替换 FastDeploy SDK和模型  
 替换FastDeploy预测库和模型的步骤非常简单。预测库所在的位置为 `app/libs/fastdeploy-android-sdk-xxx.aar`，其中 `xxx` 表示当前您使用的预测库版本号。模型所在的位置为，`app/src/main/assets/models/portrait_pp_humansegv2_lite_256x144_inference_model`。  
 - 替换FastDeploy Android SDK: 下载或编译最新的FastDeploy Android SDK，解压缩后放在 `app/libs` 目录下；详细配置文档可参考:  
-     - [在 Android 中使用 FastDeploy Java SDK](../../../../../java/android/)
+     - [在 Android 中使用 FastDeploy Java SDK](https://github.com/PaddlePaddle/FastDeploy/tree/develop/java/android)
 
 - 替换PaddleSeg模型的步骤：  
   - 将您的PaddleSeg模型放在 `app/src/main/assets/models` 目录下；  
diff --git a/examples/vision/segmentation/paddleseg/ascend/README_CN.md b/examples/vision/segmentation/paddleseg/ascend/README.md
similarity index 97%
rename from examples/vision/segmentation/paddleseg/ascend/README_CN.md
rename to examples/vision/segmentation/paddleseg/ascend/README.md
index fb29615fe..475d8817b 100644
--- a/examples/vision/segmentation/paddleseg/ascend/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/ascend/README.md
@@ -14,7 +14,7 @@ FastDeploy支持在华为昇腾上部署PaddleSeg模型
 - [DeepLabV3系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/deeplabv3/README.md)
 - [SegFormer系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/segformer/README.md)
 
->>**注意** 若需要在华为昇腾上部署**PP-Matting**、**PP-HumanMatting**请从[Matting模型部署](../../matting/)下载对应模型，部署过程与此文档一致
+>>**注意** 若需要在华为昇腾上部署**PP-Matting**、**PP-HumanMatting**请从[Matting模型部署](../../ppmatting/)下载对应模型，部署过程与此文档一致
 
 ## 准备PaddleSeg部署模型
 PaddleSeg模型导出，请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)  
@@ -22,7 +22,7 @@ PaddleSeg模型导出，请参考其文档说明[模型导出](https://github.co
 **注意**
 - PaddleSeg导出的模型包含`model.pdmodel`、`model.pdiparams`和`deploy.yaml`三个文件，FastDeploy会从yaml文件中获取模型在推理时需要的预处理信息
 
-## 下载预训练模型
+## 预导出的推理模型
 
 为了方便开发者的测试，下面提供了PaddleSeg导出的部分推理模型模型
 - without-argmax导出方式为：**不指定**`--input_shape`，**指定**`--output_op none`
diff --git a/examples/vision/segmentation/paddleseg/ascend/cpp/README.md b/examples/vision/segmentation/paddleseg/ascend/cpp/README.md
old mode 100755
new mode 100644
index bcccdd1cb..cfa01c663
--- a/examples/vision/segmentation/paddleseg/ascend/cpp/README.md
+++ b/examples/vision/segmentation/paddleseg/ascend/cpp/README.md
@@ -1,96 +1,38 @@
-English | [简体中文](README_CN.md)
-# PaddleSeg C++ Deployment Example
+[English](README.md) | 简体中文
+# PaddleSeg C++部署示例
 
-This directory provides examples that `infer.cc` fast finishes the deployment of Unet on CPU/GPU and GPU accelerated by TensorRT.
+本目录下提供`infer.cc`快速完成PP-LiteSeg在华为昇腾上部署的示例。
 
-Before deployment, two steps require confirmation
+## 华为昇腾NPU编译FastDeploy环境准备
+在部署前，需自行编译基于华为昇腾NPU的预测库，参考文档[华为昇腾NPU部署环境编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#自行编译安装)
 
-- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
-- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
-
-【Attention】For the deployment of **PP-Matting**、**PP-HumanMatting** and **ModNet**, refer to [Matting Model Deployment](../../../matting)
-
-Taking the inference on Linux as an example, the compilation test can be completed by executing the following command in this directory. FastDeploy version 1.0.0 or above (x.x.x>=1.0.0) is required to support this model.
+>>**注意** **PP-Matting**、**PP-HumanMatting**的模型，请从[Matting模型部署](../../../ppmatting/)下载
 
 ```bash
+#下载部署示例代码
+cd path/to/paddleseg/ascend/cpp
+
 mkdir build
 cd build
-# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
-wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
-tar xvf fastdeploy-linux-x64-x.x.x.tgz
-cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+# 使用编译完成的FastDeploy库编译infer_demo
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-ascend
 make -j
 
-# Download Unet model files and test images
-wget https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz
-tar -xvf Unet_cityscapes_without_argmax_infer.tgz
+# 下载PP-LiteSeg模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
+tar -xvf PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
 wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
 
-
-# CPU inference
-./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 0
-# GPU inference
-./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 1
-# TensorRT inference on GPU
-./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 2
-# kunlunxin XPU inference
-./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 3
+# 华为昇腾推理
+./infer_demo PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer cityscapes_demo.png
 ```
 
-The visualized result after running is as follows
+运行完成可视化结果如下图所示
 <div  align="center">  
 <img src="https://user-images.githubusercontent.com/16222477/191712880-91ae128d-247a-43e0-b1e3-cafae78431e0.jpg", width=512px, height=256px />
 </div>
 
-The above command works for Linux or MacOS. For SDK use-pattern in Windows, refer to:
-- [How to use FastDeploy C++ SDK in Windows](../../../../../docs/cn/faq/use_sdk_on_windows.md)
-
-## PaddleSeg C++ Interface
-
-### PaddleSeg Class
-
-```c++
-fastdeploy::vision::segmentation::PaddleSegModel(
-        const string& model_file,
-        const string& params_file = "",
-        const string& config_file,
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const ModelFormat& model_format = ModelFormat::PADDLE)
-```
-
-PaddleSegModel model loading and initialization, among which model_file is the exported Paddle model format.
-
-**Parameter**
-
-> * **model_file**(str): Model file path
-> * **params_file**(str): Parameter file path
-> * **config_file**(str): Inference deployment configuration file
-> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
-> * **model_format**(ModelFormat): Model format. Paddle format by default
-
-#### Predict Function
-
-> ```c++
-> PaddleSegModel::Predict(cv::Mat* im, DetectionResult* result)
-> ```
->
-> Model prediction interface. Input images and output detection results.
->
-> **Parameter**
->
-> > * **im**: Input images in HWC or BGR format
-> > * **result**: The segmentation result, including the predicted label of the segmentation and the corresponding probability of the label. Refer to [Vision Model Prediction Results](../../../../../docs/api/vision_results/) for the description of SegmentationResult
-
-### Class Member Variable
-#### Pre-processing Parameter
-Users can modify the following pre-processing parameters to their needs, which affects the final inference and deployment results
-
-> > * **is_vertical_screen**(bool): For PP-HumanSeg models, the input image is portrait, height greater than a width, by setting this parameter to`true`
-
-#### Post-processing Parameter
-> > * **apply_softmax**(bool): The `apply_softmax` parameter is not specified when the model is exported. Set this parameter to `true` to normalize the probability result (score_map) of the predicted output segmentation label (label_map)
-
-- [Model Description](../../)
-- [Python Deployment](../python)
-- [Vision Model Prediction Results](../../../../../docs/api/vision_results/)
-- [How to switch the model inference backend engine](../../../../../docs/cn/faq/how_to_change_backend.md)
+## 快速链接
+- [PaddleSeg C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1segmentation.html)
+- [FastDeploy部署PaddleSeg模型概览](../../)
+- [Python部署](../python)
diff --git a/examples/vision/segmentation/paddleseg/ascend/cpp/README_CN.md b/examples/vision/segmentation/paddleseg/ascend/cpp/README_CN.md
deleted file mode 100644
index 38692dc26..000000000
--- a/examples/vision/segmentation/paddleseg/ascend/cpp/README_CN.md
+++ /dev/null
@@ -1,88 +0,0 @@
-[English](README.md) | 简体中文
-# PaddleSeg C++部署示例
-
-本目录下提供`infer.cc`快速完成PP-LiteSeg在华为昇腾上部署的示例。
-
-在部署前，需自行编译基于华为昇腾NPU的预测库，参考文档[华为昇腾NPU部署环境编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/huawei_ascend.md)
-
->>**注意** **PP-Matting**、**PP-HumanMatting**的模型，请从[Matting模型部署](../../../matting)下载
-
-```bash
-#下载部署示例代码
-git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd FastDeploy/examples/vision/segmentation/paddleseg/ascend/cpp
-
-mkdir build
-cd build
-# 使用编译完成的FastDeploy库编译infer_demo
-cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-ascend
-make -j
-
-# 下载PP-LiteSeg模型文件和测试图片
-wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
-tar -xvf PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
-wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
-
-# 华为昇腾推理
-./infer_demo PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer cityscapes_demo.png
-```
-
-运行完成可视化结果如下图所示
-<div  align="center">  
-<img src="https://user-images.githubusercontent.com/16222477/191712880-91ae128d-247a-43e0-b1e3-cafae78431e0.jpg", width=512px, height=256px />
-</div>
-
-## PaddleSeg C++接口
-
-### PaddleSeg类
-
-```c++
-fastdeploy::vision::segmentation::PaddleSegModel(
-        const string& model_file,
-        const string& params_file = "",
-        const string& config_file,
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const ModelFormat& model_format = ModelFormat::PADDLE)
-```
-
-PaddleSegModel模型加载和初始化，其中model_file为导出的Paddle模型格式。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **config_file**(str): 推理部署配置文件
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(ModelFormat): 模型格式，默认为Paddle格式
-
-#### Predict函数
-
-> ```c++
-> PaddleSegModel::Predict(cv::Mat* im, DetectionResult* result)
-> ```
->
-> 模型预测接口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **im**: 输入图像，注意需为HWC，BGR格式
-> > * **result**: 分割结果，包括分割预测的标签以及标签对应的概率值, SegmentationResult结构体说明参考[SegmentationResult结构体介绍](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
-
-### 类成员属性
-#### 预处理参数
-用户可按照自己的实际需求，修改下列预处理参数，从而影响最终的推理和部署效果
-
-> > * **is_vertical_screen**(bool): PP-HumanSeg系列模型通过设置此参数为`true`表明输入图片是竖屏，即height大于width的图片
-
-#### 后处理参数
-> > * **apply_softmax**(bool): 当模型导出时，并未指定`apply_softmax`参数，可通过此设置此参数为`true`，将预测的输出分割标签（label_map）对应的概率结果(score_map)做softmax归一化处理
-
-## 快速链接
-- [PaddleSeg模型介绍](../../)
-- [Python部署](../python)
-
-## 常见问题
-- [如何将模型预测结果SegmentationResult转为numpy格式](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
-- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
-- [PaddleSeg C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1segmentation.html)
-)
diff --git a/examples/vision/segmentation/paddleseg/ascend/python/README.md b/examples/vision/segmentation/paddleseg/ascend/python/README.md
old mode 100755
new mode 100644
index d37d92c9e..ee11ea7b9
--- a/examples/vision/segmentation/paddleseg/ascend/python/README.md
+++ b/examples/vision/segmentation/paddleseg/ascend/python/README.md
@@ -1,82 +1,36 @@
-English | [简体中文](README_CN.md)
-# PaddleSeg Python Deployment Example
+[English](README.md) | 简体中文
+# PaddleSeg Python部署示例
 
-Before deployment, two steps require confirmation
+本目录下提供`infer.py`快速完成PP-LiteSeg在华为昇腾上部署的示例。
 
-- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
-- 2. Install FastDeploy Python whl package. Refer to [FastDeploy Python Installation](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+## 华为昇腾NPU编译FastDeploy wheel包环境准备
+在部署前，需自行编译基于华为昇腾NPU的FastDeploy python wheel包并安装，参考文档[华为昇腾NPU部署环境编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#自行编译安装)
+
+>>**注意** **PP-Matting**、**PP-HumanMatting**的模型，请从[Matting模型部署](../../../ppmatting)下载
 
-【Attention】For the deployment of  **PP-Matting**、**PP-HumanMatting** and **ModNet**, refer to [Matting Model Deployment](../../../matting)
 
-This directory provides examples that `infer.py`  fast finishes the deployment of Unet on CPU/GPU and GPU accelerated by TensorRT. The script is as follows
 ```bash
-# Download the deployment example code
-git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd FastDeploy/examples/vision/segmentation/paddleseg/python
+#下载部署示例代码
+cd path/to/paddleseg/ascend/cpp
 
-# Download Unet model files and test images
-wget https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz
-tar -xvf Unet_cityscapes_without_argmax_infer.tgz
+# 下载PP-LiteSeg模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
+tar -xvf PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
 wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
 
-# CPU inference
-python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device cpu
-# GPU inference
-python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device gpu
-# TensorRT inference on GPU（Attention: It is somewhat time-consuming for the operation of model serialization when running TensorRT inference for the first time. Please be patient.）
-python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device gpu --use_trt True
-# kunlunxin XPU inference
-python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device kunlunxin
+# 华为昇腾推理
+python infer.py --model PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer --image cityscapes_demo.png
 ```
 
-The visualized result after running is as follows
+运行完成可视化结果如下图所示
 <div  align="center">  
 <img src="https://user-images.githubusercontent.com/16222477/191712880-91ae128d-247a-43e0-b1e3-cafae78431e0.jpg", width=512px, height=256px />
 </div>
 
-## PaddleSegModel Python Interface
+## 快速链接
+- [PaddleSeg python API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/semantic_segmentation.html)
+- [FastDeploy部署PaddleSeg模型概览](..)
+- [PaddleSeg C++部署](../cpp)
 
-```python
-fd.vision.segmentation.PaddleSegModel(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
-```
-
-PaddleSeg model loading and initialization, among which model_file, params_file, and config_file are the Paddle inference files exported from the training model. Refer to [Model Export](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)  for more information
-
-**Parameter**
-
-> * **model_file**(str): Model file path
-> * **params_file**(str): Parameter file path
-> * **config_file**(str): Inference deployment configuration file
-> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
-> * **model_format**(ModelFormat): Model format. Paddle format by default
-
-### predict function
-
-> ```python
-> PaddleSegModel.predict(input_image)
-> ```
->
-> Model prediction interface. Input images and output detection results.
->
-> **Parameter**
->
-> > * **input_image**(np.ndarray): Input data in HWC or BGR format
-
-> **Return**
->
-> > Return `fastdeploy.vision.SegmentationResult` structure. Refer to [Vision Model Prediction Results](../../../../../docs/api/vision_results/) for the description of the structure.
-
-### Class Member Variable
-#### Pre-processing Parameter
-Users can modify the following pre-processing parameters to their needs, which affects the final inference and deployment results
-
-> > * **is_vertical_screen**(bool): For PP-HumanSeg models, the input image is portrait with height greater than width by setting this parameter to `true`
-#### Post-processing Parameter
-> > * **apply_softmax**(bool): The `apply_softmax` parameter is not specified when the model is exported. Set this parameter to `true` to normalize the probability result (score_map) of the predicted output segmentation label (label_map) in softmax
-
-## Other Documents
-
-- [PaddleSeg Model Description](..)
-- [PaddleSeg C++ Deployment](../cpp)
-- [Model Prediction Results](../../../../../docs/api/vision_results/)
-- [How to switch the model inference backend engine](../../../../../docs/cn/faq/how_to_change_backend.md)
+## 常见问题
+- [如何将模型预测结果SegmentationResult转为numpy格式](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/vision_result_related_problems.md)
diff --git a/examples/vision/segmentation/paddleseg/ascend/python/README_CN.md b/examples/vision/segmentation/paddleseg/ascend/python/README_CN.md
deleted file mode 100644
index 909784fd3..000000000
--- a/examples/vision/segmentation/paddleseg/ascend/python/README_CN.md
+++ /dev/null
@@ -1,79 +0,0 @@
-[English](README.md) | 简体中文
-# PaddleSeg Python部署示例
-
-本目录下提供`infer.py`快速完成PP-LiteSeg在华为昇腾上部署的示例。
-
-在部署前，需自行编译基于华为昇腾NPU的FastDeploy python wheel包，参考文档[华为昇腾NPU部署环境编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/huawei_ascend.md)，编译python wheel包并安装
-
->>**注意** **PP-Matting**、**PP-HumanMatting**的模型，请从[Matting模型部署](../../../matting)下载
-
-
-```bash
-#下载部署示例代码
-git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd FastDeploy/examples/vision/segmentation/paddleseg/ascend/cpp
-
-# 下载PP-LiteSeg模型文件和测试图片
-wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
-tar -xvf PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
-wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
-
-# 华为昇腾推理
-python infer.py --model PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer --image cityscapes_demo.png
-```
-
-运行完成可视化结果如下图所示
-<div  align="center">  
-<img src="https://user-images.githubusercontent.com/16222477/191712880-91ae128d-247a-43e0-b1e3-cafae78431e0.jpg", width=512px, height=256px />
-</div>
-
-## PaddleSegModel Python接口
-
-```python
-fd.vision.segmentation.PaddleSegModel(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
-```
-
-PaddleSeg模型加载和初始化，其中model_file, params_file以及config_file为训练模型导出的Paddle inference文件，具体请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **config_file**(str): 推理部署配置文件
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(ModelFormat): 模型格式，默认为Paddle格式
-
-### predict函数
-
-> ```python
-> PaddleSegModel.predict(input_image)
-> ```
->
-> 模型预测结口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **input_image**(np.ndarray): 输入数据，注意需为HWC，BGR格式
-
-> **返回**
->
-> > 返回`fastdeploy.vision.SegmentationResult`结构体，SegmentationResult结构体说明参考[SegmentationResult结构体介绍](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
-
-### 类成员属性
-#### 预处理参数
-用户可按照自己的实际需求，修改下列预处理参数，从而影响最终的推理和部署效果
-
-> > * **is_vertical_screen**(bool): PP-HumanSeg系列模型通过设置此参数为`true`表明输入图片是竖屏，即height大于width的图片
-
-#### 后处理参数
-> > * **apply_softmax**(bool): 当模型导出时，并未指定`apply_softmax`参数，可通过此设置此参数为`true`，将预测的输出分割标签（label_map）对应的概率结果(score_map)做softmax归一化处理
-
-## 快速链接
-
-- [PaddleSeg 模型介绍](..)
-- [PaddleSeg C++部署](../cpp)
-
-## 常见问题
-- [如何将模型预测结果SegmentationResult转为numpy格式](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
-- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
-- [PaddleSeg python API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/semantic_segmentation.html)
diff --git a/examples/vision/segmentation/paddleseg/cpu-gpu/README_CN.md b/examples/vision/segmentation/paddleseg/cpu-gpu/README.md
similarity index 97%
rename from examples/vision/segmentation/paddleseg/cpu-gpu/README_CN.md
rename to examples/vision/segmentation/paddleseg/cpu-gpu/README.md
index 0109ac01a..7c9901a9c 100644
--- a/examples/vision/segmentation/paddleseg/cpu-gpu/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/cpu-gpu/README.md
@@ -15,7 +15,7 @@ FastDeploy支持在NVIDIA GPU、X86 CPU、飞腾CPU、ARM CPU、Intel GPU(独立
 - [DeepLabV3系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/deeplabv3/README.md)
 - [SegFormer系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/segformer/README.md)
 
->>**注意**】如部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../matting/)
+>>**注意** 如部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../ppmatting)
 
 ## 准备PaddleSeg部署模型
 PaddleSeg模型导出，请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)  
@@ -23,7 +23,7 @@ PaddleSeg模型导出，请参考其文档说明[模型导出](https://github.co
 **注意**
 - PaddleSeg导出的模型包含`model.pdmodel`、`model.pdiparams`和`deploy.yaml`三个文件，FastDeploy会从yaml文件中获取模型在推理时需要的预处理信息
 
-## 下载预训练模型
+## 预导出的推理模型
 
 为了方便开发者的测试，下面提供了PaddleSeg导出的部分模型
 - without-argmax导出方式为：**不指定**`--input_shape`，**指定**`--output_op none`
diff --git a/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/README.md b/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/README.md
old mode 100755
new mode 100644
index bcccdd1cb..2c1f54e9a
--- a/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/README.md
+++ b/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/README.md
@@ -1,96 +1,59 @@
-English | [简体中文](README_CN.md)
-# PaddleSeg C++ Deployment Example
+[English](README.md) | 简体中文
+# PaddleSeg C++部署示例
 
-This directory provides examples that `infer.cc` fast finishes the deployment of Unet on CPU/GPU and GPU accelerated by TensorRT.
+本目录下提供`infer.cc`快速完成PP-LiteSeg在CPU/GPU，以及GPU上通过Paddle-TensorRT加速部署的示例。
 
-Before deployment, two steps require confirmation
+## 部署环境准备
 
-- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
-- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+在部署前，需确认软硬件环境，同时下载预编译部署库，参考文档[FastDeploy预编译库安装](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#FastDeploy预编译库安装)
 
-【Attention】For the deployment of **PP-Matting**、**PP-HumanMatting** and **ModNet**, refer to [Matting Model Deployment](../../../matting)
+>> **注意** 如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../../ppmatting)
 
-Taking the inference on Linux as an example, the compilation test can be completed by executing the following command in this directory. FastDeploy version 1.0.0 or above (x.x.x>=1.0.0) is required to support this model.
+以Linux上推理为例，在本目录执行如下命令即可完成编译测试，支持此模型需保证FastDeploy版本1.0.0以上(x.x.x>=1.0.0)
 
 ```bash
+#下载部署示例代码
+cd path/to/paddleseg/cpp-gpu/cpp
+
 mkdir build
 cd build
-# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
+# 下载FastDeploy预编译库，用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
 wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
 tar xvf fastdeploy-linux-x64-x.x.x.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
 make -j
 
-# Download Unet model files and test images
-wget https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz
-tar -xvf Unet_cityscapes_without_argmax_infer.tgz
+# 下载PP-LiteSeg模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
+tar -xvf PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
 wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
 
 
-# CPU inference
-./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 0
-# GPU inference
-./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 1
-# TensorRT inference on GPU
-./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 2
-# kunlunxin XPU inference
-./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 3
+# CPU推理
+./infer_demo PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer cityscapes_demo.png 0
+# GPU推理
+./infer_demo PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer cityscapes_demo.png 1
+# GPU上Paddle-TensorRT推理
+./infer_demo PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer cityscapes_demo.png 2
 ```
 
-The visualized result after running is as follows
+运行完成可视化结果如下图所示
 <div  align="center">  
 <img src="https://user-images.githubusercontent.com/16222477/191712880-91ae128d-247a-43e0-b1e3-cafae78431e0.jpg", width=512px, height=256px />
 </div>
 
-The above command works for Linux or MacOS. For SDK use-pattern in Windows, refer to:
-- [How to use FastDeploy C++ SDK in Windows](../../../../../docs/cn/faq/use_sdk_on_windows.md)
+> **注意：**
+以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:  
+- [如何在Windows中使用FastDeploy C++ SDK](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/use_sdk_on_windows.md)
 
-## PaddleSeg C++ Interface
+## 快速链接
+- [PaddleSeg C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1segmentation.html)
+- [FastDeploy部署PaddleSeg模型概览](../../)
+- [Python部署](../python)
 
-### PaddleSeg Class
-
-```c++
-fastdeploy::vision::segmentation::PaddleSegModel(
-        const string& model_file,
-        const string& params_file = "",
-        const string& config_file,
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const ModelFormat& model_format = ModelFormat::PADDLE)
-```
-
-PaddleSegModel model loading and initialization, among which model_file is the exported Paddle model format.
-
-**Parameter**
-
-> * **model_file**(str): Model file path
-> * **params_file**(str): Parameter file path
-> * **config_file**(str): Inference deployment configuration file
-> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
-> * **model_format**(ModelFormat): Model format. Paddle format by default
-
-#### Predict Function
-
-> ```c++
-> PaddleSegModel::Predict(cv::Mat* im, DetectionResult* result)
-> ```
->
-> Model prediction interface. Input images and output detection results.
->
-> **Parameter**
->
-> > * **im**: Input images in HWC or BGR format
-> > * **result**: The segmentation result, including the predicted label of the segmentation and the corresponding probability of the label. Refer to [Vision Model Prediction Results](../../../../../docs/api/vision_results/) for the description of SegmentationResult
-
-### Class Member Variable
-#### Pre-processing Parameter
-Users can modify the following pre-processing parameters to their needs, which affects the final inference and deployment results
-
-> > * **is_vertical_screen**(bool): For PP-HumanSeg models, the input image is portrait, height greater than a width, by setting this parameter to`true`
-
-#### Post-processing Parameter
-> > * **apply_softmax**(bool): The `apply_softmax` parameter is not specified when the model is exported. Set this parameter to `true` to normalize the probability result (score_map) of the predicted output segmentation label (label_map)
-
-- [Model Description](../../)
-- [Python Deployment](../python)
-- [Vision Model Prediction Results](../../../../../docs/api/vision_results/)
-- [How to switch the model inference backend engine](../../../../../docs/cn/faq/how_to_change_backend.md)
+## 常见问题
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
diff --git a/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/README_CN.md b/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/README_CN.md
deleted file mode 100644
index 7618e6f15..000000000
--- a/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/README_CN.md
+++ /dev/null
@@ -1,106 +0,0 @@
-[English](README.md) | 简体中文
-# PaddleSeg C++部署示例
-
-本目录下提供`infer.cc`快速完成PP-LiteSeg在CPU/GPU，以及GPU上通过TensorRT加速部署的示例。
-
-在部署前，需确认以下两个步骤
-
-- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/download_prebuilt_libraries.md)  
-- 2. 根据开发环境，下载预编译部署库和samples代码，参考[FastDeploy预编译库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/download_prebuilt_libraries.md)
-
-【注意】如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../../matting)
-
-以Linux上推理为例，在本目录执行如下命令即可完成编译测试，支持此模型需保证FastDeploy版本1.0.0以上(x.x.x>=1.0.0)
-
-```bash
-#下载部署示例代码
-git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd FastDeploy/examples/vision/segmentation/paddleseg/cpp-gpu/cpp
-
-mkdir build
-cd build
-# 下载FastDeploy预编译库，用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
-wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
-tar xvf fastdeploy-linux-x64-x.x.x.tgz
-cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
-make -j
-
-# 下载PP-LiteSeg模型文件和测试图片
-wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
-tar -xvf PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
-wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
-
-
-# CPU推理
-./infer_demo PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer cityscapes_demo.png 0
-# GPU推理
-./infer_demo PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer cityscapes_demo.png 1
-# GPU上TensorRT推理
-./infer_demo PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer cityscapes_demo.png 2
-```
-
-运行完成可视化结果如下图所示
-<div  align="center">  
-<img src="https://user-images.githubusercontent.com/16222477/191712880-91ae128d-247a-43e0-b1e3-cafae78431e0.jpg", width=512px, height=256px />
-</div>
-
-> **注意：**
-以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:  
-- [如何在Windows中使用FastDeploy C++ SDK](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/use_sdk_on_windows.md)
-
-## PaddleSeg C++接口
-
-### PaddleSeg类
-
-```c++
-fastdeploy::vision::segmentation::PaddleSegModel(
-        const string& model_file,
-        const string& params_file = "",
-        const string& config_file,
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const ModelFormat& model_format = ModelFormat::PADDLE)
-```
-
-PaddleSegModel模型加载和初始化，其中model_file为导出的Paddle模型格式。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **config_file**(str): 推理部署配置文件
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(ModelFormat): 模型格式，默认为Paddle格式
-
-#### Predict函数
-
-> ```c++
-> PaddleSegModel::Predict(const cv::Mat &im, SegmentationResult *result)
-> ```
->
-> 模型预测接口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **im**: 输入图像，注意需为HWC，BGR格式
-> > * **result**: 分割结果，包括分割预测的标签以及标签对应的概率值, SegmentationResult结构体说明参考[SegmentationResult结构体介绍](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
-
-### 类成员属性
-#### 预处理参数
-用户可按照自己的实际需求，修改下列预处理参数，从而影响最终的推理和部署效果
-
-> > * **is_vertical_screen**(bool): PP-HumanSeg系列模型通过设置此参数为`true`表明输入图片是竖屏，即height大于width的图片
-
-#### 后处理参数
-> > * **apply_softmax**(bool): 当模型导出时，并未指定`apply_softmax`参数，可通过此设置此参数为`true`，将预测的输出分割标签（label_map）对应的概率结果(score_map)做softmax归一化处理
-
-## 快速链接
-- [PaddleSeg模型介绍](../../)
-- [Python部署](../python)
-
-## 常见问题
-- [如何将模型预测结果SegmentationResult转为numpy格式](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
-- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
-- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
-- [PaddleSeg C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1segmentation.html)
-- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
-- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
diff --git a/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/infer.cc b/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/infer.cc
index 5269a0c2e..af79d3204 100644
--- a/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/infer.cc
+++ b/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/infer.cc
@@ -85,7 +85,14 @@ void TrtInfer(const std::string& model_dir, const std::string& image_file) {
   auto option = fastdeploy::RuntimeOption();
   option.UseGpu();
   option.UseTrtBackend();
-  auto model = fastdeploy::vision::segmentation::PaddleSegModel(
+  // If use original Tensorrt, not Paddle-TensorRT,
+  // comment the following two lines
+  option.EnablePaddleToTrt();
+  option.EnablePaddleTrtCollectShape();
+  option.SetTrtInputShape("x", {1, 3, 256, 256}, {1, 3, 1024, 1024},
+                          {1, 3, 2048, 2048})
+
+      auto model = fastdeploy::vision::segmentation::PaddleSegModel(
       model_file, params_file, config_file, option);
 
   if (!model.Initialized()) {
diff --git a/examples/vision/segmentation/paddleseg/cpu-gpu/python/README.md b/examples/vision/segmentation/paddleseg/cpu-gpu/python/README.md
old mode 100755
new mode 100644
index d37d92c9e..12d7c7eb1
--- a/examples/vision/segmentation/paddleseg/cpu-gpu/python/README.md
+++ b/examples/vision/segmentation/paddleseg/cpu-gpu/python/README.md
@@ -1,82 +1,45 @@
-English | [简体中文](README_CN.md)
-# PaddleSeg Python Deployment Example
+[English](README.md) | 简体中文
+# PaddleSeg Python部署示例
+本目录下提供`infer.py`快速完成PP-LiteSeg在CPU/GPU，以及GPU上通过Paddle-TensorRT加速部署的示例。执行如下脚本即可完成
 
-Before deployment, two steps require confirmation
+## 部署环境准备
 
-- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
-- 2. Install FastDeploy Python whl package. Refer to [FastDeploy Python Installation](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+在部署前，需确认软硬件环境，同时下载预编译python wheel 包，参考文档[FastDeploy预编译库安装](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#FastDeploy预编译库安装)
 
-【Attention】For the deployment of  **PP-Matting**、**PP-HumanMatting** and **ModNet**, refer to [Matting Model Deployment](../../../matting)
+【注意】如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../../ppmatting)
 
-This directory provides examples that `infer.py`  fast finishes the deployment of Unet on CPU/GPU and GPU accelerated by TensorRT. The script is as follows
 ```bash
-# Download the deployment example code
+#下载部署示例代码
 git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd FastDeploy/examples/vision/segmentation/paddleseg/python
+cd FastDeploy/examples/vision/segmentation/paddleseg/cpu-gpu/python
 
-# Download Unet model files and test images
-wget https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz
-tar -xvf Unet_cityscapes_without_argmax_infer.tgz
+# 下载Unet模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
+tar -xvf PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
 wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
 
-# CPU inference
-python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device cpu
-# GPU inference
-python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device gpu
-# TensorRT inference on GPU（Attention: It is somewhat time-consuming for the operation of model serialization when running TensorRT inference for the first time. Please be patient.）
-python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device gpu --use_trt True
-# kunlunxin XPU inference
-python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device kunlunxin
+# CPU推理
+python infer.py --model PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer --image cityscapes_demo.png --device cpu
+# GPU推理
+python infer.py --model PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer --image cityscapes_demo.png --device gpu
+# GPU上使用Paddle-TensorRT推理 （注意：Paddle-TensorRT推理第一次运行，有序列化模型的操作，有一定耗时，需要耐心等待）
+python infer.py --model PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer --image cityscapes_demo.png --device gpu --use_trt True
 ```
 
-The visualized result after running is as follows
+运行完成可视化结果如下图所示
 <div  align="center">  
 <img src="https://user-images.githubusercontent.com/16222477/191712880-91ae128d-247a-43e0-b1e3-cafae78431e0.jpg", width=512px, height=256px />
 </div>
 
-## PaddleSegModel Python Interface
+## 快速链接
+- [PaddleSeg python API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/semantic_segmentation.html)
+- [FastDeploy部署PaddleSeg模型概览](..)
+- [PaddleSeg C++部署](../cpp)
 
-```python
-fd.vision.segmentation.PaddleSegModel(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
-```
-
-PaddleSeg model loading and initialization, among which model_file, params_file, and config_file are the Paddle inference files exported from the training model. Refer to [Model Export](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)  for more information
-
-**Parameter**
-
-> * **model_file**(str): Model file path
-> * **params_file**(str): Parameter file path
-> * **config_file**(str): Inference deployment configuration file
-> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
-> * **model_format**(ModelFormat): Model format. Paddle format by default
-
-### predict function
-
-> ```python
-> PaddleSegModel.predict(input_image)
-> ```
->
-> Model prediction interface. Input images and output detection results.
->
-> **Parameter**
->
-> > * **input_image**(np.ndarray): Input data in HWC or BGR format
-
-> **Return**
->
-> > Return `fastdeploy.vision.SegmentationResult` structure. Refer to [Vision Model Prediction Results](../../../../../docs/api/vision_results/) for the description of the structure.
-
-### Class Member Variable
-#### Pre-processing Parameter
-Users can modify the following pre-processing parameters to their needs, which affects the final inference and deployment results
-
-> > * **is_vertical_screen**(bool): For PP-HumanSeg models, the input image is portrait with height greater than width by setting this parameter to `true`
-#### Post-processing Parameter
-> > * **apply_softmax**(bool): The `apply_softmax` parameter is not specified when the model is exported. Set this parameter to `true` to normalize the probability result (score_map) of the predicted output segmentation label (label_map) in softmax
-
-## Other Documents
-
-- [PaddleSeg Model Description](..)
-- [PaddleSeg C++ Deployment](../cpp)
-- [Model Prediction Results](../../../../../docs/api/vision_results/)
-- [How to switch the model inference backend engine](../../../../../docs/cn/faq/how_to_change_backend.md)
+## 常见问题
+- [如何将模型预测结果SegmentationResult转为numpy格式](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/vision_result_related_problems.md)
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
diff --git a/examples/vision/segmentation/paddleseg/cpu-gpu/python/README_CN.md b/examples/vision/segmentation/paddleseg/cpu-gpu/python/README_CN.md
deleted file mode 100644
index da23623b2..000000000
--- a/examples/vision/segmentation/paddleseg/cpu-gpu/python/README_CN.md
+++ /dev/null
@@ -1,88 +0,0 @@
-[English](README.md) | 简体中文
-# PaddleSeg Python部署示例
-
-在部署前，需确认以下两个步骤
-
-- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/download_prebuilt_libraries.md)  
-- 2. FastDeploy Python whl包安装，参考[FastDeploy Python安装](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/download_prebuilt_libraries.md)
-
-【注意】如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../../matting)
-
-本目录下提供`infer.py`快速完成PP-LiteSeg在CPU/GPU，以及GPU上通过TensorRT加速部署的示例。执行如下脚本即可完成
-
-```bash
-#下载部署示例代码
-git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd FastDeploy/examples/vision/segmentation/paddleseg/cpu-gpu/python
-
-# 下载Unet模型文件和测试图片
-wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
-tar -xvf PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
-wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
-
-# CPU推理
-python infer.py --model PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer --image cityscapes_demo.png --device cpu
-# GPU推理
-python infer.py --model PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer --image cityscapes_demo.png --device gpu
-# GPU上使用TensorRT推理 （注意：TensorRT推理第一次运行，有序列化模型的操作，有一定耗时，需要耐心等待）
-python infer.py --model PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer --image cityscapes_demo.png --device gpu --use_trt True
-```
-
-运行完成可视化结果如下图所示
-<div  align="center">  
-<img src="https://user-images.githubusercontent.com/16222477/191712880-91ae128d-247a-43e0-b1e3-cafae78431e0.jpg", width=512px, height=256px />
-</div>
-
-## PaddleSegModel Python接口
-
-```python
-fd.vision.segmentation.PaddleSegModel(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
-```
-
-PaddleSeg模型加载和初始化，其中model_file, params_file以及config_file为训练模型导出的Paddle inference文件，具体请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **config_file**(str): 推理部署配置文件
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(ModelFormat): 模型格式，默认为Paddle格式
-
-### predict函数
-
-> ```python
-> PaddleSegModel.predict(input_image)
-> ```
->
-> 模型预测结口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **input_image**(np.ndarray): 输入数据，注意需为HWC，BGR格式
-
-> **返回**
->
-> > 返回`fastdeploy.vision.SegmentationResult`结构体，结构体说明参考文档[SegmentationResult结构体介绍](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
-
-### 类成员属性
-#### 预处理参数
-用户可按照自己的实际需求，修改下列预处理参数，从而影响最终的推理和部署效果
-
-> > * **is_vertical_screen**(bool): PP-HumanSeg系列模型通过设置此参数为`true`表明输入图片是竖屏，即height大于width的图片
-
-#### 后处理参数
-> > * **apply_softmax**(bool): 当模型导出时，并未指定`apply_softmax`参数，可通过此设置此参数为`true`，将预测的输出分割标签（label_map）对应的概率结果(score_map)做softmax归一化处理
-
-## 其它文档
-
-- [PaddleSeg 模型介绍](..)
-- [PaddleSeg C++部署](../cpp)
-
-## 常见问题
-- [如何将模型预测结果SegmentationResult转为numpy格式](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
-- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
-- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
-- [PaddleSeg python API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/semantic_segmentation.html)
-- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
-- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
diff --git a/examples/vision/segmentation/paddleseg/cpu-gpu/python/infer.py b/examples/vision/segmentation/paddleseg/cpu-gpu/python/infer.py
index ba961159f..d90f6eb4c 100755
--- a/examples/vision/segmentation/paddleseg/cpu-gpu/python/infer.py
+++ b/examples/vision/segmentation/paddleseg/cpu-gpu/python/infer.py
@@ -32,6 +32,10 @@ def build_option(args):
 
     if args.use_trt:
         option.use_trt_backend()
+        # If use original Tensorrt, not Paddle-TensorRT,
+        # comment the following two lines
+        option.enable_paddle_to_trt()
+        option.enable_paddle_trt_collect_shape()
         option.set_trt_input_shape("x", [1, 3, 256, 256], [1, 3, 1024, 1024],
                                    [1, 3, 2048, 2048])
     return option
diff --git a/examples/vision/segmentation/paddleseg/kunlun/README_CN.md b/examples/vision/segmentation/paddleseg/kunlun/README.md
similarity index 97%
rename from examples/vision/segmentation/paddleseg/kunlun/README_CN.md
rename to examples/vision/segmentation/paddleseg/kunlun/README.md
index 5fba79c12..08406d082 100644
--- a/examples/vision/segmentation/paddleseg/kunlun/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/kunlun/README.md
@@ -13,7 +13,7 @@
 - [DeepLabV3系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/deeplabv3/README.md)
 - [SegFormer系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/segformer/README.md)
 
->>**注意** 若需要在华为昇腾上部署**PP-Matting**、**PP-HumanMatting**请从[Matting模型部署](../../matting/)下载对应模型，部署过程与此文档一致
+>>**注意** 若需要在华为昇腾上部署**PP-Matting**、**PP-HumanMatting**请从[Matting模型部署](../../ppmating/)下载对应模型，部署过程与此文档一致
 
 ## 准备PaddleSeg部署模型
 PaddleSeg模型导出，请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)  
@@ -21,7 +21,7 @@ PaddleSeg模型导出，请参考其文档说明[模型导出](https://github.co
 **注意**
 - PaddleSeg导出的模型包含`model.pdmodel`、`model.pdiparams`和`deploy.yaml`三个文件，FastDeploy会从yaml文件中获取模型在推理时需要的预处理信息
 
-## 下载预训练模型
+## 预导出的推理模型
 
 为了方便开发者的测试，下面提供了PaddleSeg导出的部分模型
 - without-argmax导出方式为：**不指定**`--input_shape`，**指定**`--output_op none`
diff --git a/examples/vision/segmentation/paddleseg/kunlun/cpp/README.md b/examples/vision/segmentation/paddleseg/kunlun/cpp/README.md
old mode 100755
new mode 100644
index bcccdd1cb..b85e3874e
--- a/examples/vision/segmentation/paddleseg/kunlun/cpp/README.md
+++ b/examples/vision/segmentation/paddleseg/kunlun/cpp/README.md
@@ -1,96 +1,39 @@
-English | [简体中文](README_CN.md)
-# PaddleSeg C++ Deployment Example
+[English](README.md) | 简体中文
+# PaddleSeg C++部署示例
 
-This directory provides examples that `infer.cc` fast finishes the deployment of Unet on CPU/GPU and GPU accelerated by TensorRT.
+本目录下提供`infer.cc`快速完成PP-LiteSeg在华为昇腾上部署的示例。
 
-Before deployment, two steps require confirmation
+## 昆仑芯XPU编译FastDeploy环境准备
+在部署前，需自行编译基于昆仑芯XPU的预测库，参考文档[昆仑芯XPU部署环境编译安装](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#自行编译安装)
 
-- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
-- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
-
-【Attention】For the deployment of **PP-Matting**、**PP-HumanMatting** and **ModNet**, refer to [Matting Model Deployment](../../../matting)
-
-Taking the inference on Linux as an example, the compilation test can be completed by executing the following command in this directory. FastDeploy version 1.0.0 or above (x.x.x>=1.0.0) is required to support this model.
+>>**注意** **PP-Matting**、**PP-HumanMatting**的模型，请从[Matting模型部署](../../../matting)下载
 
 ```bash
+#下载部署示例代码
+cd path/to/paddleseg/ascend/cpp
+
 mkdir build
 cd build
-# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
-wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
-tar xvf fastdeploy-linux-x64-x.x.x.tgz
-cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+# 使用编译完成的FastDeploy库编译infer_demo
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-ascend
 make -j
 
-# Download Unet model files and test images
-wget https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz
-tar -xvf Unet_cityscapes_without_argmax_infer.tgz
+# 下载PP-LiteSeg模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
+tar -xvf PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
 wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
 
-
-# CPU inference
-./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 0
-# GPU inference
-./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 1
-# TensorRT inference on GPU
-./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 2
-# kunlunxin XPU inference
-./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 3
+# 华为昇腾推理
+./infer_demo PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer cityscapes_demo.png
 ```
 
-The visualized result after running is as follows
+运行完成可视化结果如下图所示
 <div  align="center">  
 <img src="https://user-images.githubusercontent.com/16222477/191712880-91ae128d-247a-43e0-b1e3-cafae78431e0.jpg", width=512px, height=256px />
 </div>
 
-The above command works for Linux or MacOS. For SDK use-pattern in Windows, refer to:
-- [How to use FastDeploy C++ SDK in Windows](../../../../../docs/cn/faq/use_sdk_on_windows.md)
-
-## PaddleSeg C++ Interface
-
-### PaddleSeg Class
-
-```c++
-fastdeploy::vision::segmentation::PaddleSegModel(
-        const string& model_file,
-        const string& params_file = "",
-        const string& config_file,
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const ModelFormat& model_format = ModelFormat::PADDLE)
-```
-
-PaddleSegModel model loading and initialization, among which model_file is the exported Paddle model format.
-
-**Parameter**
-
-> * **model_file**(str): Model file path
-> * **params_file**(str): Parameter file path
-> * **config_file**(str): Inference deployment configuration file
-> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
-> * **model_format**(ModelFormat): Model format. Paddle format by default
-
-#### Predict Function
-
-> ```c++
-> PaddleSegModel::Predict(cv::Mat* im, DetectionResult* result)
-> ```
->
-> Model prediction interface. Input images and output detection results.
->
-> **Parameter**
->
-> > * **im**: Input images in HWC or BGR format
-> > * **result**: The segmentation result, including the predicted label of the segmentation and the corresponding probability of the label. Refer to [Vision Model Prediction Results](../../../../../docs/api/vision_results/) for the description of SegmentationResult
-
-### Class Member Variable
-#### Pre-processing Parameter
-Users can modify the following pre-processing parameters to their needs, which affects the final inference and deployment results
-
-> > * **is_vertical_screen**(bool): For PP-HumanSeg models, the input image is portrait, height greater than a width, by setting this parameter to`true`
-
-#### Post-processing Parameter
-> > * **apply_softmax**(bool): The `apply_softmax` parameter is not specified when the model is exported. Set this parameter to `true` to normalize the probability result (score_map) of the predicted output segmentation label (label_map)
-
-- [Model Description](../../)
-- [Python Deployment](../python)
-- [Vision Model Prediction Results](../../../../../docs/api/vision_results/)
-- [How to switch the model inference backend engine](../../../../../docs/cn/faq/how_to_change_backend.md)
+## 快速链接
+how_to_change_backend.md)
+- [PaddleSeg C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1segmentation.html)
+- [FastDeploy部署PaddleSeg模型概览](../../)
+- [Python部署](../python)
diff --git a/examples/vision/segmentation/paddleseg/kunlun/cpp/README_CN.md b/examples/vision/segmentation/paddleseg/kunlun/cpp/README_CN.md
deleted file mode 100644
index 55c6996fc..000000000
--- a/examples/vision/segmentation/paddleseg/kunlun/cpp/README_CN.md
+++ /dev/null
@@ -1,88 +0,0 @@
-[English](README.md) | 简体中文
-# PaddleSeg C++部署示例
-
-本目录下提供`infer.cc`快速完成PP-LiteSeg在华为昇腾上部署的示例。
-
-在部署前，需自行编译基于昆仑芯XPU的预测库，参考文档[昆仑芯XPU部署环境编译安装](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/kunlunxin.md)
-
->>**注意** **PP-Matting**、**PP-HumanMatting**的模型，请从[Matting模型部署](../../../matting)下载
-
-```bash
-#下载部署示例代码
-git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd FastDeploy/examples/vision/segmentation/paddleseg/ascend/cpp
-
-mkdir build
-cd build
-# 使用编译完成的FastDeploy库编译infer_demo
-cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-ascend
-make -j
-
-# 下载PP-LiteSeg模型文件和测试图片
-wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
-tar -xvf PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
-wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
-
-# 华为昇腾推理
-./infer_demo PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer cityscapes_demo.png
-```
-
-运行完成可视化结果如下图所示
-<div  align="center">  
-<img src="https://user-images.githubusercontent.com/16222477/191712880-91ae128d-247a-43e0-b1e3-cafae78431e0.jpg", width=512px, height=256px />
-</div>
-
-## PaddleSeg C++接口
-
-### PaddleSeg类
-
-```c++
-fastdeploy::vision::segmentation::PaddleSegModel(
-        const string& model_file,
-        const string& params_file = "",
-        const string& config_file,
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const ModelFormat& model_format = ModelFormat::PADDLE)
-```
-
-PaddleSegModel模型加载和初始化，其中model_file为导出的Paddle模型格式。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **config_file**(str): 推理部署配置文件
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(ModelFormat): 模型格式，默认为Paddle格式
-
-#### Predict函数
-
-> ```c++
-> PaddleSegModel::Predict(cv::Mat* im, DetectionResult* result)
-> ```
->
-> 模型预测接口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **im**: 输入图像，注意需为HWC，BGR格式
-> > * **result**: 分割结果，包括分割预测的标签以及标签对应的概率值, SegmentationResult结构体说明参考[SegmentationResult结构体介绍](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
-
-### 类成员属性
-#### 预处理参数
-用户可按照自己的实际需求，修改下列预处理参数，从而影响最终的推理和部署效果
-
-> > * **is_vertical_screen**(bool): PP-HumanSeg系列模型通过设置此参数为`true`表明输入图片是竖屏，即height大于width的图片
-
-#### 后处理参数
-> > * **apply_softmax**(bool): 当模型导出时，并未指定`apply_softmax`参数，可通过此设置此参数为`true`，将预测的输出分割标签（label_map）对应的概率结果(score_map)做softmax归一化处理
-
-## 快速链接
-- [PaddleSeg模型介绍](../../)
-- [Python部署](../python)
-
-## 常见问题
-- [如何将模型预测结果SegmentationResult转为numpy格式](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
-- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
-- [PaddleSeg C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1segmentation.html)
-)
diff --git a/examples/vision/segmentation/paddleseg/kunlun/python/README.md b/examples/vision/segmentation/paddleseg/kunlun/python/README.md
old mode 100755
new mode 100644
index d37d92c9e..aee36bffe
--- a/examples/vision/segmentation/paddleseg/kunlun/python/README.md
+++ b/examples/vision/segmentation/paddleseg/kunlun/python/README.md
@@ -1,82 +1,37 @@
-English | [简体中文](README_CN.md)
-# PaddleSeg Python Deployment Example
+[English](README.md) | 简体中文
+# PaddleSeg Python部署示例
 
-Before deployment, two steps require confirmation
+本目录下提供`infer.py`快速完成PP-LiteSeg在华为昇腾上部署的示例。
 
-- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
-- 2. Install FastDeploy Python whl package. Refer to [FastDeploy Python Installation](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+## 昆仑XPU编译FastDeploy wheel包环境准备
+
+在部署前，需自行编译基于昆仑XPU的FastDeploy python wheel包并安装，参考文档[昆仑芯XPU部署环境](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#自行编译安装)
+
+>>**注意** **PP-Matting**、**PP-HumanMatting**的模型，请从[Matting模型部署](../../../ppmatting)下载
 
-【Attention】For the deployment of  **PP-Matting**、**PP-HumanMatting** and **ModNet**, refer to [Matting Model Deployment](../../../matting)
 
-This directory provides examples that `infer.py`  fast finishes the deployment of Unet on CPU/GPU and GPU accelerated by TensorRT. The script is as follows
 ```bash
-# Download the deployment example code
-git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd FastDeploy/examples/vision/segmentation/paddleseg/python
+#下载部署示例代码
+cd path/to/paddleseg/ascend/cpp
 
-# Download Unet model files and test images
-wget https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz
-tar -xvf Unet_cityscapes_without_argmax_infer.tgz
+# 下载PP-LiteSeg模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
+tar -xvf PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
 wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
 
-# CPU inference
-python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device cpu
-# GPU inference
-python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device gpu
-# TensorRT inference on GPU（Attention: It is somewhat time-consuming for the operation of model serialization when running TensorRT inference for the first time. Please be patient.）
-python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device gpu --use_trt True
-# kunlunxin XPU inference
-python infer.py --model Unet_cityscapes_without_argmax_infer --image cityscapes_demo.png --device kunlunxin
+# 华为昇腾推理
+python infer.py --model PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer --image cityscapes_demo.png
 ```
 
-The visualized result after running is as follows
+运行完成可视化结果如下图所示
 <div  align="center">  
 <img src="https://user-images.githubusercontent.com/16222477/191712880-91ae128d-247a-43e0-b1e3-cafae78431e0.jpg", width=512px, height=256px />
 </div>
 
-## PaddleSegModel Python Interface
+## 快速链接
+- [PaddleSeg python API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/semantic_segmentation.html)
+- [FastDeploy部署PaddleSeg模型概览](..)
+- [PaddleSeg C++部署](../cpp)
 
-```python
-fd.vision.segmentation.PaddleSegModel(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
-```
-
-PaddleSeg model loading and initialization, among which model_file, params_file, and config_file are the Paddle inference files exported from the training model. Refer to [Model Export](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)  for more information
-
-**Parameter**
-
-> * **model_file**(str): Model file path
-> * **params_file**(str): Parameter file path
-> * **config_file**(str): Inference deployment configuration file
-> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
-> * **model_format**(ModelFormat): Model format. Paddle format by default
-
-### predict function
-
-> ```python
-> PaddleSegModel.predict(input_image)
-> ```
->
-> Model prediction interface. Input images and output detection results.
->
-> **Parameter**
->
-> > * **input_image**(np.ndarray): Input data in HWC or BGR format
-
-> **Return**
->
-> > Return `fastdeploy.vision.SegmentationResult` structure. Refer to [Vision Model Prediction Results](../../../../../docs/api/vision_results/) for the description of the structure.
-
-### Class Member Variable
-#### Pre-processing Parameter
-Users can modify the following pre-processing parameters to their needs, which affects the final inference and deployment results
-
-> > * **is_vertical_screen**(bool): For PP-HumanSeg models, the input image is portrait with height greater than width by setting this parameter to `true`
-#### Post-processing Parameter
-> > * **apply_softmax**(bool): The `apply_softmax` parameter is not specified when the model is exported. Set this parameter to `true` to normalize the probability result (score_map) of the predicted output segmentation label (label_map) in softmax
-
-## Other Documents
-
-- [PaddleSeg Model Description](..)
-- [PaddleSeg C++ Deployment](../cpp)
-- [Model Prediction Results](../../../../../docs/api/vision_results/)
-- [How to switch the model inference backend engine](../../../../../docs/cn/faq/how_to_change_backend.md)
+## 常见问题
+- [如何将模型预测结果SegmentationResult转为numpy格式](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/vision_result_related_problems.md)
diff --git a/examples/vision/segmentation/paddleseg/kunlun/python/README_CN.md b/examples/vision/segmentation/paddleseg/kunlun/python/README_CN.md
deleted file mode 100644
index 7ce98b44e..000000000
--- a/examples/vision/segmentation/paddleseg/kunlun/python/README_CN.md
+++ /dev/null
@@ -1,79 +0,0 @@
-[English](README.md) | 简体中文
-# PaddleSeg Python部署示例
-
-本目录下提供`infer.py`快速完成PP-LiteSeg在华为昇腾上部署的示例。
-
-在部署前，需自行编译基于昆仑芯XPU的FastDeploy wheel 包，参考文档[昆仑芯XPU部署环境编译安装](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/kunlunxin.md)，编译python wheel包并安装
-
->>**注意** **PP-Matting**、**PP-HumanMatting**的模型，请从[Matting模型部署](../../../matting)下载
-
-
-```bash
-#下载部署示例代码
-git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd FastDeploy/examples/vision/segmentation/paddleseg/ascend/cpp
-
-# 下载PP-LiteSeg模型文件和测试图片
-wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
-tar -xvf PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
-wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
-
-# 华为昇腾推理
-python infer.py --model PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer --image cityscapes_demo.png
-```
-
-运行完成可视化结果如下图所示
-<div  align="center">  
-<img src="https://user-images.githubusercontent.com/16222477/191712880-91ae128d-247a-43e0-b1e3-cafae78431e0.jpg", width=512px, height=256px />
-</div>
-
-## PaddleSegModel Python接口
-
-```python
-fd.vision.segmentation.PaddleSegModel(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
-```
-
-PaddleSeg模型加载和初始化，其中model_file, params_file以及config_file为训练模型导出的Paddle inference文件，具体请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **config_file**(str): 推理部署配置文件
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(ModelFormat): 模型格式，默认为Paddle格式
-
-### predict函数
-
-> ```python
-> PaddleSegModel.predict(input_image)
-> ```
->
-> 模型预测结口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **input_image**(np.ndarray): 输入数据，注意需为HWC，BGR格式
-
-> **返回**
->
-> > 返回`fastdeploy.vision.SegmentationResult`结构体，SegmentationResult结构体说明参考[SegmentationResult结构体介绍](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
-
-### 类成员属性
-#### 预处理参数
-用户可按照自己的实际需求，修改下列预处理参数，从而影响最终的推理和部署效果
-
-> > * **is_vertical_screen**(bool): PP-HumanSeg系列模型通过设置此参数为`true`表明输入图片是竖屏，即height大于width的图片
-
-#### 后处理参数
-> > * **apply_softmax**(bool): 当模型导出时，并未指定`apply_softmax`参数，可通过此设置此参数为`true`，将预测的输出分割标签（label_map）对应的概率结果(score_map)做softmax归一化处理
-
-## 快速链接
-
-- [PaddleSeg 模型介绍](..)
-- [PaddleSeg C++部署](../cpp)
-
-## 常见问题
-- [如何将模型预测结果SegmentationResult转为numpy格式](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
-- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
-- [PaddleSeg python API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/semantic_segmentation.html)
diff --git a/examples/vision/segmentation/paddleseg/quantize/README.md b/examples/vision/segmentation/paddleseg/quantize/README.md
old mode 100755
new mode 100644
index ab0fa77fc..708d21e93
--- a/examples/vision/segmentation/paddleseg/quantize/README.md
+++ b/examples/vision/segmentation/paddleseg/quantize/README.md
@@ -1,37 +1,26 @@
-English | [简体中文](README_CN.md)
-# PaddleSeg Quantized Model Deployment
-FastDeploy already supports the deployment of quantitative models and provides a tool to automatically compress model with just one click.
-You can use the one-click automatical model compression tool to quantify and deploy the models, or directly download the quantified models provided by FastDeploy for deployment.
+[English](README.md) | 简体中文
+# PaddleSeg 量化模型部署
+FastDeploy已支持部署量化模型,并提供一键模型自动化压缩的工具.
+用户可以使用一键模型自动化压缩工具,自行对模型量化后部署, 也可以直接下载FastDeploy提供的量化模型进行部署.
 
-## FastDeploy One-Click Automation Model Compression Tool
-FastDeploy provides an one-click automatical model compression tool that can quantify a model simply by entering configuration file. 
-For details, please refer to [one-click automatical compression tool](../../../../../tools/common_tools/auto_compression/).
-Note: The quantized classification model still needs the deploy.yaml file in the FP32 model folder. Self-quantized model folder does not contain this yaml file, you can copy it from the FP32 model folder to the quantized model folder.
+## FastDeploy一键模型自动化压缩工具
+FastDeploy 提供了一键模型自动化压缩工具, 能够简单地通过输入一个配置文件, 对模型进行量化.
+详细教程请见: [一键模型自动化压缩工具](https://github.com/PaddlePaddle/FastDeploy/tree/develop/tools/common_tools/auto_compression)
+>> **注意**: 推理量化后的分类模型仍然需要FP32模型文件夹下的deploy.yaml文件, 自行量化的模型文件夹内不包含此yaml文件, 用户从FP32模型文件夹下复制此yaml文件到量化后的模型文件夹内即可。
 
-## Download the Quantized PaddleSeg Model
-You can also directly download the quantized models in the following table for deployment (click model name to download).
+## 量化完成的PaddleSeg模型
+用户也可以直接下载下表中的量化模型进行部署.(点击模型名字即可下载)
 
-Note:
-- Runtime latency is the inference latency of the model on various Runtimes, including CPU->GPU data copy, GPU inference, and GPU->CPU data copy time. It does not include the respective pre and post processing time of the models.
-- The end-to-end latency is the latency of the model in the actual inference scenario, including the pre and post processing of the model.
-- The measured latencies are averaged over 1000 inferences, in milliseconds.
-- INT8 + FP16 is to enable the FP16 inference option for Runtime while inferring the INT8 quantization model.
-- INT8 + FP16 + PM is the option to use Pinned Memory while inferring INT8 quantization model and turning on FP16, which can speed up the GPU->CPU data copy speed.
-- The maximum speedup ratio is obtained by dividing the FP32 latency by the fastest INT8 inference latency.
-- The strategy is quantitative distillation training, using a small number of unlabeled data sets to train the quantitative model, and verify the accuracy on the full validation set, INT8 accuracy does not represent the highest INT8 accuracy.
-- The CPU is Intel(R) Xeon(R) Gold 6271C with a fixed CPU thread count of 1 in all tests. The GPU is Tesla T4, TensorRT version 8.4.15.
+| 模型                 | 量化方式   |
+| [PP-LiteSeg-T(STDC1)-cityscapes](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_QAT_new.tar) |量化蒸馏训练 |
 
-#### Runtime Benchmark
-| Model                 |Inference Backends            | Hardware    | FP32 Runtime Latency   | INT8 Runtime Latency | INT8 + FP16 Runtime Latency  | INT8+FP16+PM Runtime Latency  |  Max Speedup    | FP32 mIoU | INT8 mIoU |  Method   |
-| ------------------- | -----------------|-----------|  --------     |--------      |--------      | --------- |-------- |----- |----- |----- |
-| [PP-LiteSeg-T(STDC1)-cityscapes](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_QAT_new.tar)  | Paddle Inference |    CPU    |     1138.04|   602.62 |None|None     |      1.89      |77.37 | 71.62 |Quantaware Distillation Training |
+量化后模型的Benchmark比较，请参考[量化模型 Benchmark](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/quantize.md)
 
-#### End to End Benchmark
-| Model                 |Inference Backends             | Hardware    | FP32 End2End Latency   | INT8 End2End Latency | INT8 + FP16 End2End Latency  | INT8+FP16+PM End2End Latency  | Max Speedup   | FP32 mIoU | INT8 mIoU |   Method  |
-| ------------------- | -----------------|-----------|  --------     |--------      |--------      | --------- |-------- |----- |----- |----- |
-| [PP-LiteSeg-T(STDC1)-cityscapes](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_QAT_new.tar)  | Paddle Inference |    CPU    |     4726.65|   4134.91|None|None     |      1.14      |77.37 | 71.62 |Quantaware Distillation Training|
+## 支持部署量化模型的硬件
+FastDeploy 量化模型部署的过程大致都与FP32模型类似，只是模型量化与非量化的区别，如果硬件在量化模型部署过程有特殊处理，也会在文档中特别标明，因此量化模型部署可以参考如下硬件的链接
 
-## Detailed Deployment Documents
-
-- [Python Deployment](python)
-- [C++ Deployment](cpp)
+| 硬件支持列表 |  |   |   |
+|:----- | :-- | :-- | :-- |
+| [NVIDIA GPU](cpu-gpu) | [X86 CPU](cpu-gpu)| [飞腾CPU](cpu-gpu) | [ARM CPU](cpu-gpu) |
+| [Intel GPU(独立显卡/集成显卡)](cpu-gpu) | [昆仑](kunlun) | [昇腾](ascend) | [瑞芯微](rockchip) |
+| [晶晨](amlogic) | [算能](sophgo) |
diff --git a/examples/vision/segmentation/paddleseg/quantize/README_CN.md b/examples/vision/segmentation/paddleseg/quantize/README_CN.md
deleted file mode 100644
index a9b287754..000000000
--- a/examples/vision/segmentation/paddleseg/quantize/README_CN.md
+++ /dev/null
@@ -1,26 +0,0 @@
-[English](README.md) | 简体中文
-# PaddleSeg 量化模型部署
-FastDeploy已支持部署量化模型,并提供一键模型自动化压缩的工具.
-用户可以使用一键模型自动化压缩工具,自行对模型量化后部署, 也可以直接下载FastDeploy提供的量化模型进行部署.
-
-## FastDeploy一键模型自动化压缩工具
-FastDeploy 提供了一键模型自动化压缩工具, 能够简单地通过输入一个配置文件, 对模型进行量化.
-详细教程请见: [一键模型自动化压缩工具](https://github.com/PaddlePaddle/FastDeploy/tree/develop/tools/common_tools/auto_compression)
->> **注意**: 推理量化后的分类模型仍然需要FP32模型文件夹下的deploy.yaml文件, 自行量化的模型文件夹内不包含此yaml文件, 用户从FP32模型文件夹下复制此yaml文件到量化后的模型文件夹内即可。
-
-## 量化完成的PaddleSeg模型
-用户也可以直接下载下表中的量化模型进行部署.(点击模型名字即可下载)
-
-| 模型                 | 量化方式   |
-| [PP-LiteSeg-T(STDC1)-cityscapes](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_QAT_new.tar) |量化蒸馏训练 |
-
-量化后模型的Benchmark比较，请参考[量化模型 Benchmark](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/quantize.md)
-
-## 支持部署量化模型的硬件
-FastDeploy 量化模型部署的过程大致都与FP32模型类似，只是模型量化与非量化的区别，如果硬件在量化模型部署过程有特殊处理，也会在文档中特别标明，因此量化模型部署可以参考如下硬件的链接
-- [NVIDIA GPU、X86 CPU、飞腾CPU、ARM CPU](../cpu-gpu)
-- [昆仑](../kunlun)
-- [升腾](../ascend)
-- [瑞芯微](../rockchip)
-- [晶晨](../amlogic)
-- [算能](../sophgo)
diff --git a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/README.md b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/README.md
index 40606fee0..f6d1a0b12 100644
--- a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/README.md
+++ b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/README.md
@@ -1,34 +1,63 @@
-English | [简体中文](README_CN.md)
-# PaddleSeg Model Deployment
+[English](README.md) | 简体中文
 
-## Model Version
+# 基于RKNPU2使用FastDeploy部署PaddleSeg模型
+RKNPU2 提供了一个高性能接口来访问 Rockchip NPU，支持如下硬件的部署
+- RK3566/RK3568
+- RK3588/RK3588S
+- RV1103/RV1106
+
+本示例基于 RV3588 来介绍如何使用 FastDeploy 部署 PaddleSeg 模型
+
+## 模型版本说明
 
 - [PaddleSeg develop](https://github.com/PaddlePaddle/PaddleSeg/tree/develop)
 
-Currently FastDeploy using RKNPU2 to infer PPSeg supports the following model deployments:
+目前FastDeploy使用RKNPU2推理PaddleSeg支持如下模型的部署:
+- [U-Net系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/unet/README.md)
+- [PP-LiteSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/pp_liteseg/README.md)
+- [PP-HumanSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/contrib/PP-HumanSeg/README.md)
+- [FCN系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/fcn/README.md)
+- [DeepLabV3系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/deeplabv3/README.md)
 
-| Model                                                                                                                                          | Parameter File Size | Input Shape  | mIoU   | mIoU (flip) | mIoU (ms+flip) |
-|:---------------------------------------------------------------------------------------------------------------------------------------------|:-------|:---------|:-------|:------------|:---------------|
+## 准备PaddleSeg部署模型
+PaddleSeg模型导出，请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)  
+
+**注意**
+- PaddleSeg导出的模型包含`model.pdmodel`、`model.pdiparams`和`deploy.yaml`三个文件，FastDeploy会从yaml文件中获取模型在推理时需要的预处理信息
+
+## 下载预训练模型
+
+为了方便开发者的测试，下面提供了PaddleSeg导出的部分模型
+- without-argmax导出方式为：**不指定**`--input_shape`，**指定**`--output_op none`
+- with-argmax导出方式为：**不指定**`--input_shape`，**指定**`--output_op argmax`
+
+开发者可直接下载使用。
+
+| 模型 | 参数文件大小 | 输入Shape  | mIoU   | mIoU (flip) | mIoU (ms+flip) |
+|:----------------|:-------|:---------|:-------|:------------|:---------------|
 | [Unet-cityscapes](https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz)                                       | 52MB   | 1024x512 | 65.00% | 66.02%      | 66.89%         |
 | [PP-LiteSeg-T(STDC1)-cityscapes](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer.tgz)          | 31MB   | 1024x512 | 77.04% | 77.73%      | 77.46%         |
-| [PP-HumanSegV1-Lite(Universal portrait segmentation model)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Lite_infer.tgz)                                      | 543KB  | 192x192  | 86.2%  | -           | -              |
-| [PP-HumanSegV2-Lite(Universal portrait segmentation model)](https://bj.bcebos.com/paddle2onnx/libs/PP_HumanSegV2_Lite_192x192_infer.tgz)                                  | 12MB   | 192x192  | 92.52% | -           | -              |
-| [PP-HumanSegV2-Mobile(Universal portrait segmentation model)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Mobile_192x192_infer.tgz)                          | 29MB   | 192x192  | 93.13% | -           | -              |
-| [PP-HumanSegV1-Server(Universal portrait segmentation model)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Server_infer.tgz)                                  | 103MB  | 512x512  | 96.47% | -           | -              |
-| [Portait-PP-HumanSegV2_Lite(Portrait segmentation model)](https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV2_Lite_256x144_infer.tgz)               | 3.6M   | 256x144  | 96.63% | -           | -              |
+| [PP-HumanSegV1-Lite(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Lite_infer.tgz)                                      | 543KB  | 192x192  | 86.2%  | -           | -              |
+| [PP-HumanSegV2-Lite(通用人像分割模型)](https://bj.bcebos.com/paddle2onnx/libs/PP_HumanSegV2_Lite_192x192_infer.tgz)                                  | 12MB   | 192x192  | 92.52% | -           | -              |
+| [PP-HumanSegV2-Mobile(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Mobile_192x192_infer.tgz)                          | 29MB   | 192x192  | 93.13% | -           | -              |
+| [PP-HumanSegV1-Server(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Server_infer.tgz)                                  | 103MB  | 512x512  | 96.47% | -           | -              |
+| [Portait-PP-HumanSegV2_Lite(肖像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV2_Lite_256x144_infer.tgz)               | 3.6M   | 256x144  | 96.63% | -           | -              |
 | [FCN-HRNet-W18-cityscapes](https://bj.bcebos.com/paddlehub/fastdeploy/FCN_HRNet_W18_cityscapes_without_argmax_infer.tgz)                     | 37MB   | 1024x512 | 78.97% | 79.49%      | 79.74%         |
 | [Deeplabv3-ResNet101-OS8-cityscapes](https://bj.bcebos.com/paddlehub/fastdeploy/Deeplabv3_ResNet101_OS8_cityscapes_without_argmax_infer.tgz) | 150MB  | 1024x512 | 79.90% | 80.22%      | 80.47%         |
 
-## Prepare PaddleSeg Deployment Model and Conversion Model
-RKNPU needs to convert the Paddle model to RKNN model before deploying, the steps are as follows:
-* For the conversion of Paddle dynamic diagram model to ONNX model, please refer to [PaddleSeg Model Export](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/contrib/PP-HumanSeg).
-* For the process of converting ONNX model to RKNN model, please refer to [Conversion document](../../../../../docs/en/faq/rknpu2/export.md).
+## 准备PaddleSeg部署模型以及转换模型
+RKNPU部署模型前需要将Paddle模型转换成RKNN模型，具体步骤如下:
+* PaddleSeg训练模型导出为推理模型，请参考[PaddleSeg模型导出说明](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)，也可以使用上表中的FastDeploy的预导出模型
+* Paddle模型转换为ONNX模型，请参考[Paddle2ONNX](https://github.com/PaddlePaddle/Paddle2ONNX)
+* ONNX模型转换RKNN模型的过程，请参考[转换文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/export.md)进行转换。
 
-## An example of Model Conversion
+上述步骤可参考以下具体示例
 
-* [PPHumanSeg](./pp_humanseg_EN.md)
+## 模型转换example
 
-## Detailed Deployment Document
-- [Overall RKNN Deployment Guidance](../../../../../docs/en/faq/rknpu2/rknpu2.md)
-- [Deploy with C++](cpp)
-- [Deploy with Python](python)
+* [PP-HumanSeg](./pp_humanseg.md)
+
+## 详细部署文档
+- [RKNN总体部署教程](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/rknpu2.md)
+- [C++部署](cpp)
+- [Python部署](python)
diff --git a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/README_CN.md b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/README_CN.md
deleted file mode 100644
index b7a1be32a..000000000
--- a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/README_CN.md
+++ /dev/null
@@ -1,55 +0,0 @@
-[English](README.md) | 简体中文
-# PaddleSeg 模型部署
-
-## 模型版本说明
-
-- [PaddleSeg develop](https://github.com/PaddlePaddle/PaddleSeg/tree/develop)
-
-目前FastDeploy使用RKNPU2推理PPSeg支持如下模型的部署:
-- [U-Net系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/unet/README.md)
-- [PP-LiteSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/pp_liteseg/README.md)
-- [PP-HumanSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/contrib/PP-HumanSeg/README.md)
-- [FCN系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/fcn/README.md)
-- [DeepLabV3系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/deeplabv3/README.md)
-
-## 准备PaddleSeg部署模型
-PaddleSeg模型导出，请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)  
-
-**注意**
-- PaddleSeg导出的模型包含`model.pdmodel`、`model.pdiparams`和`deploy.yaml`三个文件，FastDeploy会从yaml文件中获取模型在推理时需要的预处理信息
-
-## 下载预训练模型
-
-为了方便开发者的测试，下面提供了PaddleSeg导出的部分模型
-- without-argmax导出方式为：**不指定**`--input_shape`，**指定**`--output_op none`
-- with-argmax导出方式为：**不指定**`--input_shape`，**指定**`--output_op argmax`
-
-开发者可直接下载使用。
-
-| 模型 | 参数文件大小 | 输入Shape  | mIoU   | mIoU (flip) | mIoU (ms+flip) |
-|:----------------|:-------|:---------|:-------|:------------|:---------------|
-| [Unet-cityscapes](https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz)                                       | 52MB   | 1024x512 | 65.00% | 66.02%      | 66.89%         |
-| [PP-LiteSeg-T(STDC1)-cityscapes](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer.tgz)          | 31MB   | 1024x512 | 77.04% | 77.73%      | 77.46%         |
-| [PP-HumanSegV1-Lite(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Lite_infer.tgz)                                      | 543KB  | 192x192  | 86.2%  | -           | -              |
-| [PP-HumanSegV2-Lite(通用人像分割模型)](https://bj.bcebos.com/paddle2onnx/libs/PP_HumanSegV2_Lite_192x192_infer.tgz)                                  | 12MB   | 192x192  | 92.52% | -           | -              |
-| [PP-HumanSegV2-Mobile(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Mobile_192x192_infer.tgz)                          | 29MB   | 192x192  | 93.13% | -           | -              |
-| [PP-HumanSegV1-Server(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Server_infer.tgz)                                  | 103MB  | 512x512  | 96.47% | -           | -              |
-| [Portait-PP-HumanSegV2_Lite(肖像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV2_Lite_256x144_infer.tgz)               | 3.6M   | 256x144  | 96.63% | -           | -              |
-| [FCN-HRNet-W18-cityscapes](https://bj.bcebos.com/paddlehub/fastdeploy/FCN_HRNet_W18_cityscapes_without_argmax_infer.tgz)                     | 37MB   | 1024x512 | 78.97% | 79.49%      | 79.74%         |
-| [Deeplabv3-ResNet101-OS8-cityscapes](https://bj.bcebos.com/paddlehub/fastdeploy/Deeplabv3_ResNet101_OS8_cityscapes_without_argmax_infer.tgz) | 150MB  | 1024x512 | 79.90% | 80.22%      | 80.47%         |
-
-## 准备PaddleSeg部署模型以及转换模型
-RKNPU部署模型前需要将Paddle模型转换成RKNN模型，具体步骤如下:
-* PaddleSeg训练模型导出为推理模型，请参考[PaddleSeg模型导出说明](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)，也可以使用上表中的FastDeploy的预导出模型
-* Paddle模型转换为ONNX模型，请参考[Paddle2ONNX](https://github.com/PaddlePaddle/Paddle2ONNX)
-* ONNX模型转换RKNN模型的过程，请参考[转换文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/export.md)进行转换。
-上述步骤可以可参考以下具体示例
-
-## 模型转换example
-
-* [PPHumanSeg](./pp_humanseg.md)
-
-## 详细部署文档
-- [RKNN总体部署教程](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/rknpu2.md)
-- [C++部署](cpp)
-- [Python部署](python)
diff --git a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/README.md b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/README.md
index 1fe268f81..b6d4c18bf 100644
--- a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/README.md
+++ b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/README.md
@@ -1,31 +1,31 @@
-English | [简体中文](README_CN.md)
-# PaddleSeg Deployment Examples for C++
+[English](README.md) | 简体中文
+# PaddleSeg C++部署示例
 
-This directory demonstrates the deployment of PaddleSeg series models on RKNPU2. The following deployment process takes PHumanSeg as an example.
+本目录下用于展示PaddleSeg系列模型在RKNPU2上的部署，以下的部署过程以PPHumanSeg为例子。
 
-Before deployment, the following two steps need to be confirmed:
+在部署前，需确认以下两个步骤:
 
-1. Hardware and software environment meets the requirements.
-2. Download the pre-compiled deployment repository or compile the FastDeploy repository from scratch according to the development environment.
+1. 软硬件环境满足要求
+2. 根据开发环境，下载预编译部署库或者从头编译FastDeploy仓库
 
-For the above steps, please refer to [How to Build RKNPU2 Deployment Environment](../../../../../../docs/en/build_and_install/rknpu2.md).
+以上步骤请参考[RK2代NPU部署库编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/rknpu2.md)实现
 
-## Generate Basic Directory Files
+## 生成基本目录文件
 
-The routine consists of the following parts:
+该例程由以下几个部分组成
 ```text
 .
 ├── CMakeLists.txt
-├── build  # Compile Folder
-├── image  # Folder for images
+├── build  # 编译文件夹
+├── image  # 存放图片的文件夹
 ├── infer_cpu_npu.cc
 ├── infer_cpu_npu.h
 ├── main.cc
-├── model  # Folder for models
-└── thirdpartys  # Folder for sdk
+├── model  # 存放模型文件的文件夹
+└── thirdpartys  # 存放sdk的文件夹
 ```
 
-First, please build a directory structure
+首先需要先生成目录结构
 ```bash
 mkdir build
 mkdir images
@@ -33,23 +33,23 @@ mkdir model
 mkdir thirdpartys
 ```
 
-## Compile
+## 编译
 
-### Compile and Copy SDK to folder thirdpartys
+### 编译并拷贝SDK到thirdpartys文件夹
 
-Please refer to [How to Build RKNPU2 Deployment Environment](../../../../../../docs/en/build_and_install/rknpu2.md) to compile SDK.After compiling, the fastdeploy-0.0.3 directory will be created in the build directory, please move it to the thirdpartys directory.
+请参考[RK2代NPU部署库编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/rknpu2.md)仓库编译SDK，编译完成后，将在build目录下生成fastdeploy-x-x-x目录，请移动它至thirdpartys目录下.
 
-### Copy model and configuration files to folder Model
-In the process of Paddle dynamic map model -> Paddle static map model -> ONNX mdoel, ONNX file and the corresponding yaml configuration file will be generated. Please move the configuration file to the folder model.
-After converting to RKNN, the model file also needs to be copied to folder model. Run the following command to download and use (the model file is RK3588. RK3568 needs to be [reconverted to PPSeg RKNN model](../README.md)).
+### 拷贝模型文件，以及配置文件至model文件夹
+在Paddle动态图模型 -> Paddle静态图模型 -> ONNX模型的过程中，将生成ONNX文件以及对应的yaml配置文件，请将配置文件存放到model文件夹内。
+转换为RKNN后的模型文件也需要拷贝至model，输入以下命令下载使用(模型文件为RK3588，RK3568需要重新[转换PPSeg RKNN模型](../README.md))。
 
-### Prepare Test Images to folder image
+### 准备测试图片至image文件夹
 ```bash
 wget https://paddleseg.bj.bcebos.com/dygraph/pp_humanseg_v2/images.zip
 unzip -qo images.zip
 ```
 
-### Compile example
+### 编译example
 
 ```bash
 cd build
@@ -58,16 +58,19 @@ make -j8
 make install
 ```
 
-## Running Routines
+## 运行例程
 
 ```bash
 cd ./build/install
 ./rknpu_test model/Portrait_PP_HumanSegV2_Lite_256x144_infer/ images/portrait_heng.jpg
 ```
 
-## Notes
-The input requirement for the model on RKNPU is to use NHWC format, and image normalization will be embedded into the model when converting the RKNN model, so we need to call DisableNormalizeAndPermute(C++) or disable_normalize_and_permute(Python) first when deploying with FastDeploy to disable normalization and data format conversion in the preprocessing stage.
+## 注意事项
+RKNPU上对模型的输入要求是使用NHWC格式，且图片归一化操作会在转RKNN模型时，内嵌到模型中，因此我们在使用FastDeploy部署时，需要先调用DisableNormalizeAndPermute(C++)或`disable_normalize_and_permute(Python)，在预处理阶段禁用归一化以及数据格式的转换
 
-- [Model Description](../../)
-- [Python Deployment](../python)
-- [Convert PPSeg and RKNN model](../README.md)
+## 快速链接
+- [FastDeploy部署PaddleSeg模型概览](../../)
+- [Python部署](../python)
+- [转换PPSeg RKNN模型文档](../README.md)
+- [PaddleSeg C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1segmentation.html)
+)
diff --git a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/README_CN.md b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/README_CN.md
deleted file mode 100644
index 45bb923a0..000000000
--- a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/README_CN.md
+++ /dev/null
@@ -1,73 +0,0 @@
-[English](README.md) | 简体中文
-# PaddleSeg C++部署示例
-
-本目录下用于展示PaddleSeg系列模型在RKNPU2上的部署，以下的部署过程以PPHumanSeg为例子。
-
-在部署前，需确认以下两个步骤:
-
-1. 软硬件环境满足要求
-2. 根据开发环境，下载预编译部署库或者从头编译FastDeploy仓库
-
-以上步骤请参考[RK2代NPU部署库编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/rknpu2.md)实现
-
-## 生成基本目录文件
-
-该例程由以下几个部分组成
-```text
-.
-├── CMakeLists.txt
-├── build  # 编译文件夹
-├── image  # 存放图片的文件夹
-├── infer_cpu_npu.cc
-├── infer_cpu_npu.h
-├── main.cc
-├── model  # 存放模型文件的文件夹
-└── thirdpartys  # 存放sdk的文件夹
-```
-
-首先需要先生成目录结构
-```bash
-mkdir build
-mkdir images
-mkdir model
-mkdir thirdpartys
-```
-
-## 编译
-
-### 编译并拷贝SDK到thirdpartys文件夹
-
-请参考[RK2代NPU部署库编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/rknpu2.md)仓库编译SDK，编译完成后，将在build目录下生成fastdeploy-x-x-x目录，请移动它至thirdpartys目录下.
-
-### 拷贝模型文件，以及配置文件至model文件夹
-在Paddle动态图模型 -> Paddle静态图模型 -> ONNX模型的过程中，将生成ONNX文件以及对应的yaml配置文件，请将配置文件存放到model文件夹内。
-转换为RKNN后的模型文件也需要拷贝至model，输入以下命令下载使用(模型文件为RK3588，RK3568需要重新[转换PPSeg RKNN模型](../README.md))。
-
-### 准备测试图片至image文件夹
-```bash
-wget https://paddleseg.bj.bcebos.com/dygraph/pp_humanseg_v2/images.zip
-unzip -qo images.zip
-```
-
-### 编译example
-
-```bash
-cd build
-cmake ..
-make -j8
-make install
-```
-
-## 运行例程
-
-```bash
-cd ./build/install
-./rknpu_test model/Portrait_PP_HumanSegV2_Lite_256x144_infer/ images/portrait_heng.jpg
-```
-
-## 注意事项
-RKNPU上对模型的输入要求是使用NHWC格式，且图片归一化操作会在转RKNN模型时，内嵌到模型中，因此我们在使用FastDeploy部署时，需要先调用DisableNormalizeAndPermute(C++)或`disable_normalize_and_permute(Python)，在预处理阶段禁用归一化以及数据格式的转换。
-
-- [模型介绍](../../)
-- [Python部署](../python)
-- [转换PPSeg RKNN模型文档](../README.md)
diff --git a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/pp_humanseg.md b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/pp_humanseg.md
index e212d4e2d..ede1656df 100644
--- a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/pp_humanseg.md
+++ b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/pp_humanseg.md
@@ -1,5 +1,5 @@
 [English](pp_humanseg_EN.md) | 简体中文
-# PPHumanSeg模型部署
+# PP-HumanSeg模型转换示例
 
 ## 转换模型
 下面以Portait-PP-HumanSegV2_Lite(肖像分割模型)为例子，教大家如何转换PaddleSeg模型到RKNN模型。
diff --git a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/python/README.md b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/python/README.md
index f5b99400f..5b7c3df35 100644
--- a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/python/README.md
+++ b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/python/README.md
@@ -1,36 +1,38 @@
-English | [简体中文](README_CN.md)
-# PaddleSeg Deployment Examples for Python
+[English](README.md) | 简体中文
+# PaddleSeg Python部署示例
 
-Before deployment, the following step need to be confirmed:
+在部署前，需确认以下步骤
 
-- 1. Hardware and software environment meets the requirements, please refer to [Environment Requirements for FastDeploy](../../../../../../docs/en/build_and_install/rknpu2.md).
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/rknpu2.md)
 
-【Note】If you are deploying **PP-Matting**, **PP-HumanMatting** or **ModNet**, please refer to [Matting Model Deployment](../../../../matting/).
+【注意】如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../../../../matting/)
 
-This directory provides `infer.py` for a quick example of PPHumanseg deployment on RKNPU. This can be done by running the following script.
+本目录下提供`infer.py`快速完成PPHumanseg在RKNPU上部署的示例。执行如下脚本即可完成
 
 ```bash
-# Download the deploying demo code.
+# 下载部署示例代码
 git clone https://github.com/PaddlePaddle/FastDeploy.git
 cd FastDeploy/examples/vision/segmentation/paddleseg/python
 
-# Download images.
+# 下载图片
 wget https://paddleseg.bj.bcebos.com/dygraph/pp_humanseg_v2/images.zip
 unzip images.zip
 
-# Inference.
+# 推理
 python3 infer.py --model_file ./Portrait_PP_HumanSegV2_Lite_256x144_infer/Portrait_PP_HumanSegV2_Lite_256x144_infer_rk3588.rknn \
                 --config_file ./Portrait_PP_HumanSegV2_Lite_256x144_infer/deploy.yaml \
                 --image images/portrait_heng.jpg
 ```
 
 
-## Notes
-The input requirement for the model on RKNPU is to use NHWC format, and image normalization will be embedded into the model when converting the RKNN model, so we need to call DisableNormalizeAndPermute(C++) or disable_normalize_and_permute(Python) first when deploying with FastDeploy to disable normalization and data format conversion in the preprocessing stage.
+## 注意事项
+RKNPU上对模型的输入要求是使用NHWC格式，且图片归一化操作会在转RKNN模型时，内嵌到模型中，因此我们在使用FastDeploy部署时，需要先调用DisableNormalizeAndPermute(C++)或`disable_normalize_and_permute(Python)，在预处理阶段禁用归一化以及数据格式的转换。
 
-## Other Documents
+## 快速链接
 
-- [PaddleSeg Model Description](..)
-- [PaddleSeg C++ Deployment](../cpp)
-- [Description of the prediction](../../../../../../docs/api/vision_results/)
-- [Convert PPSeg and RKNN model](../README.md)
+- [FastDeploy部署PaddleSeg模型概览](..)
+- [PaddleSeg C++部署](../cpp)
+- [转换PaddleSeg模型至RKNN模型文档](../README_CN.md#准备paddleseg部署模型以及转换模型)
+
+## 常见问题
+- [如何将模型预测结果SegmentationResult转为numpy格式](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/vision_result_related_problems.md)
diff --git a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/python/README_CN.md b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/python/README_CN.md
deleted file mode 100644
index 0bf8b9396..000000000
--- a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/python/README_CN.md
+++ /dev/null
@@ -1,36 +0,0 @@
-[English](README.md) | 简体中文
-# PaddleSeg Python部署示例
-
-在部署前，需确认以下步骤
-
-- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/rknpu2.md)
-
-【注意】如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../../../../matting/)
-
-本目录下提供`infer.py`快速完成PPHumanseg在RKNPU上部署的示例。执行如下脚本即可完成
-
-```bash
-# 下载部署示例代码
-git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd FastDeploy/examples/vision/segmentation/paddleseg/python
-
-# 下载图片
-wget https://paddleseg.bj.bcebos.com/dygraph/pp_humanseg_v2/images.zip
-unzip images.zip
-
-# 推理
-python3 infer.py --model_file ./Portrait_PP_HumanSegV2_Lite_256x144_infer/Portrait_PP_HumanSegV2_Lite_256x144_infer_rk3588.rknn \
-                --config_file ./Portrait_PP_HumanSegV2_Lite_256x144_infer/deploy.yaml \
-                --image images/portrait_heng.jpg
-```
-
-
-## 注意事项
-RKNPU上对模型的输入要求是使用NHWC格式，且图片归一化操作会在转RKNN模型时，内嵌到模型中，因此我们在使用FastDeploy部署时，需要先调用DisableNormalizeAndPermute(C++)或`disable_normalize_and_permute(Python)，在预处理阶段禁用归一化以及数据格式的转换。
-
-## 其它文档
-
-- [PaddleSeg 模型介绍](..)
-- [PaddleSeg C++部署](../cpp)
-- [模型预测结果说明](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/api/vision_results/segmentation_result_CN.md)
-- [转换PaddleSeg模型至RKNN模型文档](../README.md)
diff --git a/examples/vision/segmentation/paddleseg/rockchip/rv1126/README.md b/examples/vision/segmentation/paddleseg/rockchip/rv1126/README.md
old mode 100755
new mode 100644
index 7e4620b18..5f92e7f6f
--- a/examples/vision/segmentation/paddleseg/rockchip/rv1126/README.md
+++ b/examples/vision/segmentation/paddleseg/rockchip/rv1126/README.md
@@ -1,12 +1,27 @@
-English | [简体中文](README_CN.md)
-# Deployment of PP-LiteSeg Quantification Model on RV1126
-Now FastDeploy allows deploying PP-LiteSeg quantization model to RV1126 based on Paddle Lite.
+[English](README.md) | 简体中文
+# 在瑞芯微 RV1126 上使用 FastDeploy 部署 PaddleSeg 模型
+瑞芯微 RV1126 是一款编解码芯片，专门面相人工智能的机器视觉领域。目前，FastDeploy 支持在 RV1126 上基于 Paddle-Lite 部署 PaddleSeg 相关模型
 
-For model quantization and download of quantized models, refer to [Model Quantization](../quantize/README.md)
+## 瑞芯微 RV1126 支持的PaddleSeg模型
+目前瑞芯微 RV1126 的 NPU 支持的量化模型如下：
+## 预导出的推理模型
+为了方便开发者的测试，下面提供了PaddleSeg导出的部分量化后的推理模型，开发者可直接下载使用。
 
+| 模型                              | 参数文件大小    |输入Shape |  mIoU | mIoU (flip) | mIoU (ms+flip) |
+|:---------------------------------------------------------------- |:----- |:----- | :----- | :----- | :----- |
+| [PP-LiteSeg-T(STDC1)-cityscapes-without-argmax](https://bj.bcebos.com/fastdeploy/models/rk1/ppliteseg.tar.gz)| 31MB  | 1024x512 | 77.04% | 77.73% | 77.46% |
+**注意**
+- PaddleSeg量化模型包含`model.pdmodel`、`model.pdiparams`、`deploy.yaml`和`subgraph.txt`四个文件，FastDeploy会从yaml文件中获取模型在推理时需要的预处理信息，subgraph.txt是为了异构计算而存储的配置文件
+- 若以上列表中无满足要求的模型，可参考下方教程自行导出适配A311D的模型
 
-## Detailed Deployment Tutorials
+## PaddleSeg动态图模型导出为RV1126支持的INT8模型
+模型导出分为以下两步
+1. PaddleSeg训练的动态图模型导出为推理静态图模型，请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)
+瑞芯微RV1126仅支持INT8
+2. 将推理模型量化压缩为INT8模型，FastDeploy模型量化的方法及一键自动化压缩工具可以参考[模型量化](../../../quantize/README.md)
 
-Only C++ deployment is supported on RV1126.
+## 详细部署文档
 
-- [C++ Deployment](cpp)
+目前，瑞芯微 RV1126 上只支持C++的部署。
+
+- [C++部署](cpp)
diff --git a/examples/vision/segmentation/paddleseg/rockchip/rv1126/README_CN.md b/examples/vision/segmentation/paddleseg/rockchip/rv1126/README_CN.md
deleted file mode 100644
index 2b51362b8..000000000
--- a/examples/vision/segmentation/paddleseg/rockchip/rv1126/README_CN.md
+++ /dev/null
@@ -1,20 +0,0 @@
-[English](README.md) | 简体中文
-# 在瑞芯微 RV1126 上使用 FastDeploy 部署 PaddleSeg 模型
-瑞芯微 RV1126 是一款编解码芯片，专门面相人工智能的机器视觉领域。目前，FastDeploy 支持在 RV1126 上基于 Paddle-Lite 部署 PaddleSeg 相关模型
-
-## 瑞芯微 RV1126 支持的PaddleSeg模型
-由于瑞芯微 RV1126 的 NPU 仅支持 INT8 量化模型的部署，因此所支持的量化模型如下：
-- [PP-LiteSeg 系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/pp_liteseg/README.md)
-
-为了方便开发者的测试，下面提供了 PaddleSeg 导出的部分模型，开发者可直接下载使用。
-
-| 模型                              | 参数文件大小    |输入Shape |  mIoU | mIoU (flip) | mIoU (ms+flip) |
-|:---------------------------------------------------------------- |:----- |:----- | :----- | :----- | :----- |
-| [PP-LiteSeg-T(STDC1)-cityscapes-without-argmax](https://bj.bcebos.com/fastdeploy/models/rk1/ppliteseg.tar.gz)| 31MB  | 1024x512 | 77.04% | 77.73% | 77.46% |
->> **注意**: FastDeploy 模型量化的方法及一键自动化压缩工具可以参考[模型量化](../../../quantize/README.md)
-
-## 详细部署文档
-
-目前，瑞芯微 RV1126 上只支持C++的部署。
-
-- [C++部署](cpp)
diff --git a/examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/README.md b/examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/README.md
old mode 100755
new mode 100644
index 0022e6840..aacad9ab1
--- a/examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/README.md
+++ b/examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/README.md
@@ -1,29 +1,27 @@
-English | [简体中文](README_CN.md)
-# PP-LiteSeg Quantitative Model C++ Deployment Example
+[English](README.md) | 简体中文
+# PP-LiteSeg 量化模型 C++ 部署示例
 
-`infer.cc` in this directory can help you quickly complete the inference acceleration of PP-LiteSeg quantization model deployment on RV1126.
+本目录下提供的 `infer.cc`，可以帮助用户快速完成 PP-LiteSeg 量化模型在 RV1126 上的部署推理加速。
 
-## Deployment Preparations
-### FastDeploy Cross-compile Environment Preparations
-1. For the software and hardware environment, and the cross-compile environment, please refer to [Preparations for FastDeploy Cross-compile environment](../../../../../../docs/en/build_and_install/rv1126.md#Cross-compilation-environment-construction).  
+## 部署准备
+### FastDeploy 交叉编译环境准备
+软硬件环境满足要求，以及交叉编译环境的准备，请参考：[瑞芯微RV1126部署环境](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#自行编译安装)  
 
-### Model Preparations
-1. You can directly use the quantized model provided by FastDeploy for deployment.
-2. You can use one-click automatical compression tool provided by FastDeploy to quantize model by yourself, and use the generated quantized model for deployment.(Note: The quantized classification model still needs the deploy.yaml file in the FP32 model folder. Self-quantized model folder does not contain this yaml file, you can copy it from the FP32 model folder to the quantized model folder.)
-3. The model requires heterogeneous computation. Please refer to: [Heterogeneous Computation](./../../../../../../docs/en/faq/heterogeneous_computing_on_timvx_npu.md). Since the model is already provided, you can test the heterogeneous file we provide first to verify whether the accuracy meets the requirements.
+### 模型准备
+1. 用户可以直接使用由[FastDeploy 提供的量化模型](../README_CN.md#瑞芯微-rv1126-支持的paddleseg模型)进行部署。
+2. 若FastDeploy没有提供满足要求的量化模型，用户可以参考[PaddleSeg动态图模型导出为RV1126支持的INT8模型](../README_CN.md#paddleseg动态图模型导出为rv1126支持的int8模型)自行导出或训练量化模型
+3. 若上述导出或训练的模型出现精度下降或者报错，则需要使用异构计算，使得模型算子部分跑在RV1126的ARM CPU上进行调试以及精度验证，其中异构计算所需的文件是subgraph.txt。具体关于异构计算可参考：[异构计算](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/heterogeneous_computing_on_timvx_npu.md)。
 
-For more information, please refer to [Model Quantization](../../quantize/README.md).
+## 在 RV1126 上部署量化后的 PP-LiteSeg 分割模型
+请按照以下步骤完成在 RV1126 上部署 PP-LiteSeg 量化模型：
+1. 交叉编译编译 FastDeploy 库，具体请参考：[交叉编译 FastDeploy](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/a311d.md#基于-paddle-lite-的-fastdeploy-交叉编译库编译)
 
-## Deploying the Quantized PP-LiteSeg Segmentation model on RV1126
-Please follow these steps to complete the deployment of the PP-LiteSeg quantization model on RV1126.
-1. Cross-compile the FastDeploy library as described in [Cross-compile FastDeploy](../../../../../../docs/en/build_and_install/rv1126.md#FastDeploy-cross-compilation-library-compilation-based-on-Paddle-Lite).
-
-2. Copy the compiled library to the current directory. You can run this line:
+2. 将编译后的库拷贝到当前目录，可使用如下命令：
 ```bash
-cp -r FastDeploy/build/fastdeploy-timvx/ FastDeploy/examples/vision/segmentation/paddleseg/rv1126/cpp
+cp -r FastDeploy/build/fastdeploy-timvx/ path/to/paddleseg/rockchip/rv1126/cpp
 ```
 
-3. Download the model and example images required for deployment in current path.
+3. 在当前路径下载部署所需的模型和示例图片：
 ```bash
 mkdir models && mkdir images
 wget https://bj.bcebos.com/fastdeploy/models/rk1/ppliteseg.tar.gz
@@ -33,25 +31,29 @@ wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
 cp -r cityscapes_demo.png images
 ```
 
-4. Compile the deployment example. You can run the following lines:
+4. 编译部署示例，可使入如下命令：
 ```bash
 mkdir build && cd build
 cmake -DCMAKE_TOOLCHAIN_FILE=${PWD}/../fastdeploy-timvx/toolchain.cmake -DFASTDEPLOY_INSTALL_DIR=${PWD}/../fastdeploy-timvx -DTARGET_ABI=armhf ..
 make -j8
 make install
-# After success, an install folder will be created with a running demo and libraries required for deployment.
+# 成功编译之后，会生成 install 文件夹，里面有一个运行 demo 和部署所需的库
 ```
 
-5. Deploy the PP-LiteSeg segmentation model to Rockchip RV1126 based on adb. You can run the following lines:
+5. 基于 adb 工具部署 PP-LiteSeg 分割模型到 Rockchip RV1126，可使用如下命令：
 ```bash
-# Go to the install directory.
-cd FastDeploy/examples/vision/segmentation/paddleseg/rv1126/cpp/build/install/
-# The following line represents: bash run_with_adb.sh, demo needed to run, model path, image path, DEVICE ID.
+# 进入 install 目录
+cd path/to/paddleseg/rockchip/rv1126/cpp/build/install/
+cp ../../run_with_adb.sh .
+# 如下命令表示：bash run_with_adb.sh 需要运行的demo 模型路径 图片路径 设备的DEVICE_ID
 bash run_with_adb.sh infer_demo ppliteseg cityscapes_demo.png $DEVICE_ID
 ```
 
-The output is:
+部署成功后运行结果如下：
 
 <img width="640" src="https://user-images.githubusercontent.com/30516196/205544166-9b2719ff-ed82-4908-b90a-095de47392e1.png">
 
-Please note that the model deployed on RV1126 needs to be quantized. You can refer to [Model Quantization](../../../../../../docs/en/quantize.md).
+
+## 快速链接
+- [PaddleSeg C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1segmentation.html)
+- [FastDeploy部署PaddleSeg模型概览](../../)
diff --git a/examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/README_CN.md b/examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/README_CN.md
deleted file mode 100644
index afd185ca0..000000000
--- a/examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/README_CN.md
+++ /dev/null
@@ -1,57 +0,0 @@
-[English](README.md) | 简体中文
-# PP-LiteSeg 量化模型 C++ 部署示例
-
-本目录下提供的 `infer.cc`，可以帮助用户快速完成 PP-LiteSeg 量化模型在 RV1126 上的部署推理加速。
-
-## 部署准备
-### FastDeploy 交叉编译环境准备
-1. 软硬件环境满足要求，以及交叉编译环境的准备，请参考：[FastDeploy 交叉编译环境准备](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/rv1126.md#交叉编译环境搭建)  
-
-### 模型准备
-1. 用户可以直接使用由 FastDeploy 提供的量化模型进行部署。
-2. 用户可以使用 FastDeploy 提供的一键模型自动化压缩工具,自行进行模型量化, 并使用产出的量化模型进行部署.(注意: 推理量化后的分类模型仍然需要FP32模型文件夹下的 deploy.yaml 文件, 自行量化的模型文件夹内不包含此 yaml 文件, 用户从FP32模型文件夹下复制此yaml文件到量化后的模型文件夹内即可.)
-3. 模型需要异构计算，异构计算文件可以参考：[异构计算](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/heterogeneous_computing_on_timvx_npu.md)，由于 FastDeploy 已经提供了模型，可以先测试我们提供的异构文件，验证精度是否符合要求。
-
-更多量化相关相关信息可查阅[模型量化](../../quantize/README.md)
-
-## 在 RV1126 上部署量化后的 PP-LiteSeg 分割模型
-请按照以下步骤完成在 RV1126 上部署 PP-LiteSeg 量化模型：
-1. 交叉编译编译 FastDeploy 库，具体请参考：[交叉编译 FastDeploy](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/a311d.md#基于-paddle-lite-的-fastdeploy-交叉编译库编译)
-
-2. 将编译后的库拷贝到当前目录，可使用如下命令：
-```bash
-cp -r FastDeploy/build/fastdeploy-timvx/ FastDeploy/examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp
-```
-
-3. 在当前路径下载部署所需的模型和示例图片：
-```bash
-mkdir models && mkdir images
-wget https://bj.bcebos.com/fastdeploy/models/rk1/ppliteseg.tar.gz
-tar -xvf ppliteseg.tar.gz
-cp -r ppliteseg models
-wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
-cp -r cityscapes_demo.png images
-```
-
-4. 编译部署示例，可使入如下命令：
-```bash
-mkdir build && cd build
-cmake -DCMAKE_TOOLCHAIN_FILE=${PWD}/../fastdeploy-timvx/toolchain.cmake -DFASTDEPLOY_INSTALL_DIR=${PWD}/../fastdeploy-timvx -DTARGET_ABI=armhf ..
-make -j8
-make install
-# 成功编译之后，会生成 install 文件夹，里面有一个运行 demo 和部署所需的库
-```
-
-5. 基于 adb 工具部署 PP-LiteSeg 分割模型到 Rockchip RV1126，可使用如下命令：
-```bash
-# 进入 install 目录
-cd FastDeploy/examples/vision/segmentation/paddleseg/rockchip/rv1126/cpp/build/install/
-# 如下命令表示：bash run_with_adb.sh 需要运行的demo 模型路径 图片路径 设备的DEVICE_ID
-bash run_with_adb.sh infer_demo ppliteseg cityscapes_demo.png $DEVICE_ID
-```
-
-部署成功后运行结果如下：
-
-<img width="640" src="https://user-images.githubusercontent.com/30516196/205544166-9b2719ff-ed82-4908-b90a-095de47392e1.png">
-
-需要特别注意的是，在 RV1126 上部署的模型需要是量化后的模型，模型的量化请参考：[模型量化](../../../quantize/README.md)
diff --git a/examples/vision/segmentation/paddleseg/serving/README_CN.md b/examples/vision/segmentation/paddleseg/serving/README_CN.md
index 0c2e5194b..ea1599432 100644
--- a/examples/vision/segmentation/paddleseg/serving/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/serving/README_CN.md
@@ -1,7 +1,9 @@
 [English](README.md) | 简体中文
 # 使用 FastDeploy 服务化部署 PaddleSeg 模型
 ## FastDeploy 服务化部署介绍
-在线推理作为企业或个人线上部署模型的最后一环，是工业界必不可少的环节，其中最重要的就是服务化推理框架。FastDeploy 目前提供两种服务化部署方式：simple_serving和fastdeploy_serving。simple_serving 基于Flask框架具有简单高效的特点，可以快速验证线上部署模型的可行性。fastdeploy_serving基于Triton Inference Server框架，是一套完备且性能卓越的服务化部署框架，可用于实际生产。
+在线推理作为企业或个人线上部署模型的最后一环，是工业界必不可少的环节，其中最重要的就是服务化推理框架。FastDeploy 目前提供两种服务化部署方式：simple_serving和fastdeploy_serving
+- simple_serving基于Flask框架具有简单高效的特点，可以快速验证线上部署模型的可行性。
+- fastdeploy_serving基于Triton Inference Server框架，是一套完备且性能卓越的服务化部署框架，可用于实际生产。
 
 ## 详细部署文档
 
diff --git a/examples/vision/segmentation/paddleseg/sophgo/README.md b/examples/vision/segmentation/paddleseg/sophgo/README.md
index 337507b49..1c08a5b7a 100644
--- a/examples/vision/segmentation/paddleseg/sophgo/README.md
+++ b/examples/vision/segmentation/paddleseg/sophgo/README.md
@@ -1,41 +1,51 @@
-English | [简体中文](README_CN.md)
-# PaddleSeg C++ Deployment Example
+[English](README.md) | 简体中文
+# PaddleSeg C++部署示例
 
-## Supporting Model List
+## 支持模型列表
 
-- PP-LiteSeg deployment models are from [PaddleSeg PP-LiteSeg series model](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/pp_liteseg/README.md).
+- [PP-LiteSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/pp_liteseg/README.md)
 
-## PP-LiteSeg Model Deployment and Conversion Preparations
+## 预导出的推理模型
 
-Befor SOPHGO-TPU model deployment, you should first convert Paddle model to bmodel model. Specific steps are as follows:
-- Download Paddle model: [PP-LiteSeg-B(STDC2)-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz).
-- Convert Paddle model to ONNX model. Please refer to [Paddle2ONNX](https://github.com/PaddlePaddle/Paddle2ONNX).
-- For the process of converting ONNX model to bmodel, please refer to [TPU-MLIR](https://github.com/sophgo/tpu-mlir).
+为了方便开发者的测试，下面提供了PaddleSeg导出的部分推理模型，开发者可直接下载使用。
 
-## Model Converting Example
+PaddleSeg训练模型导出为推理模型，请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)  
 
-Here we take [PP-LiteSeg-B(STDC2)-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz) as an example to show you how to convert Paddle model to SOPHGO-TPU model.
+| 模型                              | 参数文件大小    |输入Shape |  mIoU | mIoU (flip) | mIoU (ms+flip) |
+|:---------------------------------------------------------------- |:----- |:----- | :----- | :----- | :----- |
+| [PP-LiteSeg-T(STDC1)-cityscapes-without-argmax](https://bj.bcebos.com/fastdeploy/models/rk1/ppliteseg.tar.gz)| 31MB  | 1024x512 | 77.04% | 77.73% | 77.46% |
 
-### Download PP-LiteSeg-B(STDC2)-cityscapes-without-argmax, and convert it to ONNX
+## 将PaddleSeg推理模型转换为bmodel模型步骤
+
+SOPHGO-TPU部署模型前需要将Paddle模型转换成bmodel模型，具体步骤如下:
+- 下载Paddle模型[PP-LiteSeg-B(STDC2)-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz)
+- Paddle模型转换为ONNX模型，请参考[Paddle2ONNX](https://github.com/PaddlePaddle/Paddle2ONNX)
+- ONNX模型转换bmodel模型的过程，请参考[TPU-MLIR](https://github.com/sophgo/tpu-mlir)
+
+## bmode模型转换example
+
+下面以[PP-LiteSeg-B(STDC2)-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz)为例,教大家如何转换Paddle模型到SOPHGO-TPU支持的bmodel模型
+
+### 下载PP-LiteSeg-B(STDC2)-cityscapes-without-argmax模型,并转换为ONNX模型
 ```shell
-# Download Paddle2ONNX repository.
+# 下载Paddle2ONNX仓库
 git clone https://github.com/PaddlePaddle/Paddle2ONNX
 
-# Download the Paddle static map model and fix the input shape.
-## Go to the directory where the input shape is fixed for the Paddle static map model.
+# 下载Paddle静态图模型并为Paddle静态图模型固定输入shape
+## 进入为Paddle静态图模型固定输入shape的目录
 cd Paddle2ONNX/tools/paddle
 
 wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
 tar xvf PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
 
-# Modify the input shape of PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer model from dynamic input to constant input.
+# 修改PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer模型的输入shape，由动态输入变成固定输入
 python paddle_infer_shape.py --model_dir PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer \
                              --model_filename model.pdmodel \
                              --params_filename model.pdiparams \
                              --save_dir pp_liteseg_fix \
                              --input_shape_dict="{'x':[1,3,512,512]}"
 
-# Convert constant input Paddle model to ONNX model.
+#将固定输入的Paddle模型转换成ONNX模型
 paddle2onnx --model_dir pp_liteseg_fix \
             --model_filename model.pdmodel \
             --params_filename model.pdiparams \
@@ -43,32 +53,32 @@ paddle2onnx --model_dir pp_liteseg_fix \
             --enable_dev_version True
 ```
 
-### Export bmodel
+### 导出bmodel模型
 
-Take converting BM1684x model to bmodel as an example. You need to download [TPU-MLIR](https://github.com/sophgo/tpu-mlir) project. For the process of installation, please refer to [TPU-MLIR Document](https://github.com/sophgo/tpu-mlir/blob/master/README.md).
-### 1.	Installation
+以转换BM1684x的bmodel模型为例子，我们需要下载[TPU-MLIR](https://github.com/sophgo/tpu-mlir)工程，安装过程具体参见[TPU-MLIR文档](https://github.com/sophgo/tpu-mlir/blob/master/README.md)。
+#### 1.	安装
 ``` shell
 docker pull sophgo/tpuc_dev:latest
 
-# myname1234 is just an example, you can customize your own name.
+# myname1234是一个示例，也可以设置其他名字
 docker run --privileged --name myname1234 -v $PWD:/workspace -it sophgo/tpuc_dev:latest
 
 source ./envsetup.sh
 ./build.sh
 ```
 
-### 2.	Convert ONNX model to bmodel
+#### 2.	ONNX模型转换为bmodel模型
 ``` shell
 mkdir pp_liteseg && cd pp_liteseg
 
-# Put the test image in this file, and put the converted pp_liteseg.onnx into this folder.
+#在该文件中放入测试图片，同时将上一步转换的pp_liteseg.onnx放入该文件夹中
 cp -rf ${REGRESSION_PATH}/dataset/COCO2017 .
 cp -rf ${REGRESSION_PATH}/image .
-# Put in the onnx model file pp_liteseg.onnx.
+#放入onnx模型文件pp_liteseg.onnx
 
 mkdir workspace && cd workspace
 
-# Convert ONNX model to mlir model, the parameter --output_names can be viewed via NETRON.
+#将ONNX模型转换为mlir模型，其中参数--output_names可以通过NETRON查看
 model_transform.py \
     --model_name pp_liteseg \
     --model_def ../pp_liteseg.onnx \
@@ -82,7 +92,7 @@ model_transform.py \
     --test_result pp_liteseg_top_outputs.npz \
     --mlir pp_liteseg.mlir
 
-# Convert mlir model to BM1684x F32 bmodel.
+#将mlir模型转换为BM1684x的F32 bmodel模型
 model_deploy.py \
   --mlir pp_liteseg.mlir \
   --quantize F32 \
@@ -91,7 +101,8 @@ model_deploy.py \
   --test_reference pp_liteseg_top_outputs.npz \
   --model pp_liteseg_1684x_f32.bmodel
 ```
-The final bmodel, pp_liteseg_1684x_f32.bmodel, can run on BM1684x. If you want to further accelerate the model, you can convert ONNX model to INT8 bmodel. For details, please refer to [TPU-MLIR Document](https://github.com/sophgo/tpu-mlir/blob/master/README.md).
+最终获得可以在BM1684x上能够运行的bmodel模型pp_liteseg_1684x_f32.bmodel。如果需要进一步对模型进行加速，可以将ONNX模型转换为INT8 bmodel，具体步骤参见[TPU-MLIR文档](https://github.com/sophgo/tpu-mlir/blob/master/README.md)。
 
-## Other Documents
-- [Cpp Deployment](./cpp)
+## 快速链接
+- [Cpp部署](./cpp)
+- [Python部署](./python)
diff --git a/examples/vision/segmentation/paddleseg/sophgo/README_CN.md b/examples/vision/segmentation/paddleseg/sophgo/README_CN.md
deleted file mode 100644
index 563507c25..000000000
--- a/examples/vision/segmentation/paddleseg/sophgo/README_CN.md
+++ /dev/null
@@ -1,106 +0,0 @@
-[English](README.md) | 简体中文
-# PaddleSeg C++部署示例
-
-## 支持模型列表
-
-- [PP-LiteSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/pp_liteseg/README.md)
-
-为了方便开发者的测试，下面提供了PaddleSeg导出的部分推理模型，开发者可直接下载使用。
-
-PaddleSeg模型导出，请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)  
-
-| 模型                              | 参数文件大小    |输入Shape |  mIoU | mIoU (flip) | mIoU (ms+flip) |
-|:---------------------------------------------------------------- |:----- |:----- | :----- | :----- | :----- |
-| [PP-LiteSeg-T(STDC1)-cityscapes-without-argmax](https://bj.bcebos.com/fastdeploy/models/rk1/ppliteseg.tar.gz)| 31MB  | 1024x512 | 77.04% | 77.73% | 77.46% |
-
-## 准备PP-LiteSeg部署模型以及转换模型
-
-SOPHGO-TPU部署模型前需要将Paddle模型转换成bmodel模型，具体步骤如下:
-- 下载Paddle模型[PP-LiteSeg-B(STDC2)-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz)
-- Paddle模型转换为ONNX模型，请参考[Paddle2ONNX](https://github.com/PaddlePaddle/Paddle2ONNX)
-- ONNX模型转换bmodel模型的过程，请参考[TPU-MLIR](https://github.com/sophgo/tpu-mlir)
-
-## 模型转换example
-
-下面以[PP-LiteSeg-B(STDC2)-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz)为例子,教大家如何转换Paddle模型到SOPHGO-TPU模型
-
-### 下载PP-LiteSeg-B(STDC2)-cityscapes-without-argmax模型,并转换为ONNX模型
-```shell
-# 下载Paddle2ONNX仓库
-git clone https://github.com/PaddlePaddle/Paddle2ONNX
-
-# 下载Paddle静态图模型并为Paddle静态图模型固定输入shape
-## 进入为Paddle静态图模型固定输入shape的目录
-cd Paddle2ONNX/tools/paddle
-
-wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
-tar xvf PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz
-
-# 修改PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer模型的输入shape，由动态输入变成固定输入
-python paddle_infer_shape.py --model_dir PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer \
-                             --model_filename model.pdmodel \
-                             --params_filename model.pdiparams \
-                             --save_dir pp_liteseg_fix \
-                             --input_shape_dict="{'x':[1,3,512,512]}"
-
-#将固定输入的Paddle模型转换成ONNX模型
-paddle2onnx --model_dir pp_liteseg_fix \
-            --model_filename model.pdmodel \
-            --params_filename model.pdiparams \
-            --save_file pp_liteseg.onnx \
-            --enable_dev_version True
-```
-
-### 导出bmodel模型
-
-以转换BM1684x的bmodel模型为例子，我们需要下载[TPU-MLIR](https://github.com/sophgo/tpu-mlir)工程，安装过程具体参见[TPU-MLIR文档](https://github.com/sophgo/tpu-mlir/blob/master/README.md)。
-### 1.	安装
-``` shell
-docker pull sophgo/tpuc_dev:latest
-
-# myname1234是一个示例，也可以设置其他名字
-docker run --privileged --name myname1234 -v $PWD:/workspace -it sophgo/tpuc_dev:latest
-
-source ./envsetup.sh
-./build.sh
-```
-
-### 2.	ONNX模型转换为bmodel模型
-``` shell
-mkdir pp_liteseg && cd pp_liteseg
-
-#在该文件中放入测试图片，同时将上一步转换的pp_liteseg.onnx放入该文件夹中
-cp -rf ${REGRESSION_PATH}/dataset/COCO2017 .
-cp -rf ${REGRESSION_PATH}/image .
-#放入onnx模型文件pp_liteseg.onnx
-
-mkdir workspace && cd workspace
-
-#将ONNX模型转换为mlir模型，其中参数--output_names可以通过NETRON查看
-model_transform.py \
-    --model_name pp_liteseg \
-    --model_def ../pp_liteseg.onnx \
-    --input_shapes [[1,3,512,512]] \
-    --mean 0.0,0.0,0.0 \
-    --scale 0.0039216,0.0039216,0.0039216 \
-    --keep_aspect_ratio \
-    --pixel_format rgb \
-    --output_names bilinear_interp_v2_6.tmp_0 \
-    --test_input ../image/dog.jpg \
-    --test_result pp_liteseg_top_outputs.npz \
-    --mlir pp_liteseg.mlir
-
-#将mlir模型转换为BM1684x的F32 bmodel模型
-model_deploy.py \
-  --mlir pp_liteseg.mlir \
-  --quantize F32 \
-  --chip bm1684x \
-  --test_input pp_liteseg_in_f32.npz \
-  --test_reference pp_liteseg_top_outputs.npz \
-  --model pp_liteseg_1684x_f32.bmodel
-```
-最终获得可以在BM1684x上能够运行的bmodel模型pp_liteseg_1684x_f32.bmodel。如果需要进一步对模型进行加速，可以将ONNX模型转换为INT8 bmodel，具体步骤参见[TPU-MLIR文档](https://github.com/sophgo/tpu-mlir/blob/master/README.md)。
-
-## 快速链接
-- [Cpp部署](./cpp)
-- [Python部署](./python)
diff --git a/examples/vision/segmentation/paddleseg/sophgo/cpp/README.md b/examples/vision/segmentation/paddleseg/sophgo/cpp/README.md
index eae65d559..3f33c398c 100644
--- a/examples/vision/segmentation/paddleseg/sophgo/cpp/README.md
+++ b/examples/vision/segmentation/paddleseg/sophgo/cpp/README.md
@@ -1,57 +1,56 @@
-English | [简体中文](README_CN.md)
-# PaddleSeg C++ Deployment Example
+[English](README.md) | 简体中文
+# PaddleSeg C++部署示例
 
-`infer.cc` in this directory provides a quick example of accelerated deployment of the pp_liteseg model on SOPHGO BM1684x.
+本目录下提供`infer.cc`快速完成PP-LiteSeg在SOPHGO BM1684x板子上加速部署的示例。
 
-Before deployment, the following two steps need to be confirmed:
+## 算能硬件编译FastDeploy环境准备
+在部署前，需自行编译基于算能硬件的预测库，参考文档[算能硬件部署环境](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#算能硬件部署环境)
 
-1. Hardware and software environment meets the requirements.
-2. Compile the FastDeploy repository from scratch according to the development environment.
+## 生成基本目录文件
 
-For the above steps, please refer to [How to Build SOPHGO Deployment Environment](../../../../../../docs/en/build_and_install/sophgo.md).
-
-## Generate Basic Directory Files
-
-The routine consists of the following parts:
+该例程由以下几个部分组成
 ```text
 .
 ├── CMakeLists.txt
-├── build  # Compile Folder
-├── image  # Folder for images
+├── fastdeploy-sophgo  # 编译文件夹
+├── image  # 存放图片的文件夹
 ├── infer.cc
-└── model  # Folder for models
+└── model  # 存放模型文件的文件夹
 ```
 
-## Compile
+## 编译
 
-### Compile and Copy SDK to folder thirdpartys
+### 编译FastDeploy
 
-Please refer to [How to Build SOPHGO Deployment Environment](../../../../../../docs/en/build_and_install/sophgo.md) to compile SDK.After compiling, the fastdeploy-0.0.3 directory will be created in the build directory.
+请参考[SOPHGO部署库编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/sophgo.md)编译SDK，编译完成后，将在build目录下生成fastdeploy-sophgo目录。拷贝fastdeploy-sophgo至当前目录
 
-### Copy model and configuration files to folder Model
-Convert Paddle model to SOPHGO bmodel model. For the conversion steps, please refer to [Document](../README.md).
-Please copy the converted SOPHGO bmodel to folder model.
+### 拷贝模型文件，以及配置文件至model文件夹
+将Paddle模型转换为SOPHGO bmodel模型，转换步骤参考[文档](../README_CN.md#将paddleseg推理模型转换为bmodel模型步骤)
 
-### Prepare Test Images to folder image
+将转换后的SOPHGO bmodel模型文件拷贝至model中
+
+### 准备测试图片至image文件夹
 ```bash
 wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
 cp cityscapes_demo.png ./images
 ```
 
-### Compile example
+### 编译example
 
 ```bash
 cd build
-cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-0.0.3
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-sophgo
 make
 ```
 
-## Running Routines
+## 运行例程
 
 ```bash
 ./infer_demo model images/cityscapes_demo.png
 ```
 
-
-- [Model Description](../../)
-- [Model Conversion](../)
+## 快速链接
+- [PaddleSeg C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1segmentation.html)
+- [FastDeploy部署PaddleSeg模型概览](../../)
+- [Python部署](../python)
+- [模型转换](../README_CN.md#将paddleseg推理模型转换为bmodel模型步骤)
diff --git a/examples/vision/segmentation/paddleseg/sophgo/cpp/README_CN.md b/examples/vision/segmentation/paddleseg/sophgo/cpp/README_CN.md
deleted file mode 100644
index fbb274b15..000000000
--- a/examples/vision/segmentation/paddleseg/sophgo/cpp/README_CN.md
+++ /dev/null
@@ -1,57 +0,0 @@
-[English](README.md) | 简体中文
-# PaddleSeg C++部署示例
-
-本目录下提供`infer.cc`快速完成pp_liteseg模型在SOPHGO BM1684x板子上加速部署的示例。
-
-在部署前，需确认以下两个步骤:
-
-1. 软硬件环境满足要求
-2. 根据开发环境，从头编译FastDeploy仓库
-
-以上步骤请参考[SOPHGO部署库编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/sophgo.md)实现
-
-## 生成基本目录文件
-
-该例程由以下几个部分组成
-```text
-.
-├── CMakeLists.txt
-├── build  # 编译文件夹
-├── image  # 存放图片的文件夹
-├── infer.cc
-└── model  # 存放模型文件的文件夹
-```
-
-## 编译
-
-### 编译并拷贝SDK到thirdpartys文件夹
-
-请参考[SOPHGO部署库编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/sophgo.md)仓库编译SDK，编译完成后，将在build目录下生成fastdeploy-0.0.3目录.
-
-### 拷贝模型文件，以及配置文件至model文件夹
-将Paddle模型转换为SOPHGO bmodel模型，转换步骤参考[文档](../README.md)  
-将转换后的SOPHGO bmodel模型文件拷贝至model中
-
-### 准备测试图片至image文件夹
-```bash
-wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
-cp cityscapes_demo.png ./images
-```
-
-### 编译example
-
-```bash
-cd build
-cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-0.0.3
-make
-```
-
-## 运行例程
-
-```bash
-./infer_demo model images/cityscapes_demo.png
-```
-
-
-- [模型介绍](../../)
-- [模型转换](../)
diff --git a/examples/vision/segmentation/paddleseg/sophgo/python/README.md b/examples/vision/segmentation/paddleseg/sophgo/python/README.md
index 5aba6590f..e646d6a90 100644
--- a/examples/vision/segmentation/paddleseg/sophgo/python/README.md
+++ b/examples/vision/segmentation/paddleseg/sophgo/python/README.md
@@ -1,27 +1,33 @@
-English | [简体中文](README_CN.md)
-# PaddleSeg Python Deployment Example
+[English](README.md) | 简体中文
+# PaddleSeg Python部署示例
 
-Before deployment, the following step need to be confirmed:
+## 算能硬件编译FastDeploy wheel包环境准备
 
-- 1. Hardware and software environment meets the requirements. Please refer to [FastDeploy Environment Requirement](../../../../../../docs/en/build_and_install/sophgo.md).
+在部署前，需自行编译基于算能硬件的FastDeploy python wheel包并安装，参考文档[算能硬件部署环境](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#算能硬件部署环境)
 
-`infer.py` in this directory provides a quick example of deployment of the pp_liteseg model on SOPHGO TPU. Please run the following script:
+本目录下提供`infer.py`快速完成 pp_liteseg 在SOPHGO TPU上部署的示例。执行如下脚本即可完成
 
 ```bash
-# Download the sample deployment code.
+# 下载部署示例代码
 git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd FastDeploy/examples/vision/segmentation/paddleseg/sophgo/python
+cd path/to/paddleseg/sophgo/python
 
-# Download images.
+# 下载图片
 wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
 
-# Inference.
+# PaddleSeg模型转换为bmodel模型
+将Paddle模型转换为SOPHGO bmodel模型，转换步骤参考[文档](../README_CN.md#将paddleseg推理模型转换为bmodel模型步骤)
+
+# 推理
 python3 infer.py --model_file ./bmodel/pp_liteseg_1684x_f32.bmodel --config_file ./bmodel/deploy.yaml --image cityscapes_demo.png
 
-# The returned result.
-The result is saved as sophgo_img.png.
+# 运行完成后返回结果如下所示
+运行结果保存在sophgo_img.png中
 ```
 
-## Other Documents
-- [pp_liteseg C++ Deployment](../cpp)
-- [Converting pp_liteseg SOPHGO model](../README.md)
+## 快速链接
+- [pp_liteseg C++部署](../cpp)
+- [转换 pp_liteseg SOPHGO模型文档](../README_CN.md#导出bmodel模型)
+
+## 常见问题
+- [如何将模型预测结果SegmentationResult转为numpy格式](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/vision_result_related_problems.md)
diff --git a/examples/vision/segmentation/paddleseg/sophgo/python/README_CN.md b/examples/vision/segmentation/paddleseg/sophgo/python/README_CN.md
deleted file mode 100644
index a6eb37f8f..000000000
--- a/examples/vision/segmentation/paddleseg/sophgo/python/README_CN.md
+++ /dev/null
@@ -1,27 +0,0 @@
-[English](README.md) | 简体中文
-# PaddleSeg Python部署示例
-
-在部署前，需确认以下步骤
-
-- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/sophgo.md)
-
-本目录下提供`infer.py`快速完成 pp_liteseg 在SOPHGO TPU上部署的示例。执行如下脚本即可完成
-
-```bash
-# 下载部署示例代码
-git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd FastDeploy/examples/vision/segmentation/paddleseg/sophgo/python
-
-# 下载图片
-wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
-
-# 推理
-python3 infer.py --model_file ./bmodel/pp_liteseg_1684x_f32.bmodel --config_file ./bmodel/deploy.yaml --image cityscapes_demo.png
-
-# 运行完成后返回结果如下所示
-运行结果保存在sophgo_img.png中
-```
-
-## 其它文档
-- [pp_liteseg C++部署](../cpp)
-- [转换 pp_liteseg SOPHGO模型文档](../README.md)

From eb1992d1a6e24966eb2b159ad6724791baf24ee7 Mon Sep 17 00:00:00 2001
From: huangjianhui <852142024@qq.com>
Date: Thu, 9 Feb 2023 00:36:05 +0800
Subject: [PATCH 06/41] Update ppmatting

---
 examples/vision/matting/ppmatting | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/vision/matting/ppmatting b/examples/vision/matting/ppmatting
index c89893416..c64ba0454 120000
--- a/examples/vision/matting/ppmatting
+++ b/examples/vision/matting/ppmatting
@@ -1 +1 @@
-/huangjianhui/doc/FastDeploy/examples/vision/segmentation/ppmatting/
\ No newline at end of file
+../segmentation/ppmatting/

From b6b8835739136d6ca1be06d61e5b761f7b573f12 Mon Sep 17 00:00:00 2001
From: huangjianhui <852142024@qq.com>
Date: Thu, 9 Feb 2023 00:36:54 +0800
Subject: [PATCH 07/41] Update README.md

---
 examples/vision/segmentation/paddleseg/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/vision/segmentation/paddleseg/README.md b/examples/vision/segmentation/paddleseg/README.md
index a1947ae56..c240e41b7 100644
--- a/examples/vision/segmentation/paddleseg/README.md
+++ b/examples/vision/segmentation/paddleseg/README.md
@@ -1,4 +1,4 @@
-# PaddleSeg高性能全场景模型部署方案——FastDeploy
+# PaddleSeg高性能全场景模型部署方案—FastDeploy
 
 ## FastDeploy介绍
 

From 14352eacdf88eeba2fee170567ca9420e6d214c9 Mon Sep 17 00:00:00 2001
From: huangjianhui <852142024@qq.com>
Date: Thu, 9 Feb 2023 00:38:38 +0800
Subject: [PATCH 08/41] Update README.md

---
 .../vision/segmentation/paddleseg/rockchip/rknpu2/README.md     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/README.md b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/README.md
index f6d1a0b12..21a9b92ba 100644
--- a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/README.md
+++ b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/README.md
@@ -53,7 +53,7 @@ RKNPU部署模型前需要将Paddle模型转换成RKNN模型，具体步骤如
 
 上述步骤可参考以下具体示例
 
-## 模型转换example
+## 模型转换示例
 
 * [PP-HumanSeg](./pp_humanseg.md)
 

From c47a1c81aa14b8ad560155ac1a94b8474b6fbccf Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Wed, 8 Feb 2023 16:50:08 +0000
Subject: [PATCH 09/41] Update soft link

---
 examples/vision/matting/ppmatting | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/vision/matting/ppmatting b/examples/vision/matting/ppmatting
index c64ba0454..352f37bf2 120000
--- a/examples/vision/matting/ppmatting
+++ b/examples/vision/matting/ppmatting
@@ -1 +1 @@
-../segmentation/ppmatting/
+../segmentation/ppmatting/
\ No newline at end of file

From 09929d9d28777b0a0c23627164ece23d49a50cde Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Wed, 8 Feb 2023 17:08:33 +0000
Subject: [PATCH 10/41] Update docs

---
 .../vision/segmentation/ppmatting/README.md   |  42 +++++
 .../segmentation/ppmatting/README_CN.md       |  43 +++++
 .../segmentation/ppmatting/cpp/CMakeLists.txt |  14 ++
 .../segmentation/ppmatting/cpp/README.md      |  93 ++++++++++
 .../segmentation/ppmatting/cpp/README_CN.md   |  94 ++++++++++
 .../segmentation/ppmatting/cpp/infer.cc       | 170 ++++++++++++++++++
 .../segmentation/ppmatting/python/README.md   |  81 +++++++++
 .../ppmatting/python/README_CN.md             |  80 +++++++++
 .../segmentation/ppmatting/python/infer.py    |  70 ++++++++
 9 files changed, 687 insertions(+)
 create mode 100644 examples/vision/segmentation/ppmatting/README.md
 create mode 100644 examples/vision/segmentation/ppmatting/README_CN.md
 create mode 100644 examples/vision/segmentation/ppmatting/cpp/CMakeLists.txt
 create mode 100755 examples/vision/segmentation/ppmatting/cpp/README.md
 create mode 100644 examples/vision/segmentation/ppmatting/cpp/README_CN.md
 create mode 100644 examples/vision/segmentation/ppmatting/cpp/infer.cc
 create mode 100755 examples/vision/segmentation/ppmatting/python/README.md
 create mode 100644 examples/vision/segmentation/ppmatting/python/README_CN.md
 create mode 100755 examples/vision/segmentation/ppmatting/python/infer.py

diff --git a/examples/vision/segmentation/ppmatting/README.md b/examples/vision/segmentation/ppmatting/README.md
new file mode 100644
index 000000000..a2cbdc346
--- /dev/null
+++ b/examples/vision/segmentation/ppmatting/README.md
@@ -0,0 +1,42 @@
+English | [简体中文](README_CN.md)
+# PP-Matting Model Deployment
+
+## Model Description
+
+- [PP-Matting Release/2.6](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
+
+## List of Supported Models
+
+Now FastDeploy supports the deployment of the following models
+
+- [PP-Matting models](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
+- [PP-HumanMatting models](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
+- [ModNet models](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
+
+
+## Export Deployment Model
+
+Before deployment, PP-Matting needs to be exported into the deployment model. Refer to [Export Model](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting) for more information. (Tips: You need to set the `--input_shape` parameter of the export script when exporting PP-Matting and PP-HumanMatting models)
+
+
+## Download Pre-trained Models
+
+For developers' testing, models exported by PP-Matting are provided below. Developers can download and use them directly.
+
+The accuracy metric is sourced from the model description in PP-Matting. (Accuracy data are not provided) Refer to the introduction in PP-Matting for more details.
+
+| Model                                                               | Parameter Size    | Accuracy    | Note |
+|:---------------------------------------------------------------- |:----- |:----- | :------ |
+| [PP-Matting-512](https://bj.bcebos.com/paddlehub/fastdeploy/PP-Matting-512.tgz) | 106MB | - |
+| [PP-Matting-1024](https://bj.bcebos.com/paddlehub/fastdeploy/PP-Matting-1024.tgz) | 106MB | - |
+| [PP-HumanMatting](https://bj.bcebos.com/paddlehub/fastdeploy/PPHumanMatting.tgz) | 247MB | - |
+| [Modnet-ResNet50_vd](https://bj.bcebos.com/paddlehub/fastdeploy/PPModnet_ResNet50_vd.tgz) | 355MB | - |
+| [Modnet-MobileNetV2](https://bj.bcebos.com/paddlehub/fastdeploy/PPModnet_MobileNetV2.tgz) | 28MB | - |
+| [Modnet-HRNet_w18](https://bj.bcebos.com/paddlehub/fastdeploy/PPModnet_HRNet_w18.tgz) | 51MB | - |
+
+
+
+## Detailed Deployment Tutorials
+
+- [Python Deployment](python)
+- [C++ Deployment](cpp)
diff --git a/examples/vision/segmentation/ppmatting/README_CN.md b/examples/vision/segmentation/ppmatting/README_CN.md
new file mode 100644
index 000000000..a1c9801aa
--- /dev/null
+++ b/examples/vision/segmentation/ppmatting/README_CN.md
@@ -0,0 +1,43 @@
+[English](README.md) | 简体中文
+# PP-Matting模型部署
+
+## 模型版本说明
+
+- [PP-Matting Release/2.6](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
+
+## 支持模型列表
+
+目前FastDeploy支持如下模型的部署
+
+- [PP-Matting系列模型](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
+- [PP-HumanMatting系列模型](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
+- [ModNet系列模型](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
+
+
+## 导出部署模型
+
+在部署前，需要先将PP-Matting导出成部署模型，导出步骤参考文档[导出模型](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)(Tips:导出PP-Matting系列模型和PP-HumanMatting系列模型需要设置导出脚本的`--input_shape`参数)
+
+
+## 下载预训练模型
+
+为了方便开发者的测试，下面提供了PP-Matting导出的各系列模型，开发者可直接下载使用。
+
+其中精度指标来源于PP-Matting中对各模型的介绍(未提供精度数据)，详情各参考PP-Matting中的说明。
+
+
+| 模型                                                               | 参数大小    | 精度    | 备注 |
+|:---------------------------------------------------------------- |:----- |:----- | :------ |
+| [PP-Matting-512](https://bj.bcebos.com/paddlehub/fastdeploy/PP-Matting-512.tgz) | 106MB | - |
+| [PP-Matting-1024](https://bj.bcebos.com/paddlehub/fastdeploy/PP-Matting-1024.tgz) | 106MB | - |
+| [PP-HumanMatting](https://bj.bcebos.com/paddlehub/fastdeploy/PPHumanMatting.tgz) | 247MB | - |
+| [Modnet-ResNet50_vd](https://bj.bcebos.com/paddlehub/fastdeploy/PPModnet_ResNet50_vd.tgz) | 355MB | - |
+| [Modnet-MobileNetV2](https://bj.bcebos.com/paddlehub/fastdeploy/PPModnet_MobileNetV2.tgz) | 28MB | - |
+| [Modnet-HRNet_w18](https://bj.bcebos.com/paddlehub/fastdeploy/PPModnet_HRNet_w18.tgz) | 51MB | - |
+
+
+
+## 详细部署文档
+
+- [Python部署](python)
+- [C++部署](cpp)
diff --git a/examples/vision/segmentation/ppmatting/cpp/CMakeLists.txt b/examples/vision/segmentation/ppmatting/cpp/CMakeLists.txt
new file mode 100644
index 000000000..93540a7e8
--- /dev/null
+++ b/examples/vision/segmentation/ppmatting/cpp/CMakeLists.txt
@@ -0,0 +1,14 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+# 指定下载解压后的fastdeploy库路径
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+# 添加FastDeploy依赖头文件
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
+# 添加FastDeploy库依赖
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
diff --git a/examples/vision/segmentation/ppmatting/cpp/README.md b/examples/vision/segmentation/ppmatting/cpp/README.md
new file mode 100755
index 000000000..f678fabd4
--- /dev/null
+++ b/examples/vision/segmentation/ppmatting/cpp/README.md
@@ -0,0 +1,93 @@
+English | [简体中文](README_CN.md)
+# PP-Matting C++ Deployment Example
+
+This directory provides examples that `infer.cc` fast finishes the deployment of PP-Matting on CPU/GPU and GPU accelerated by TensorRT.
+Before deployment, two steps require confirmation
+
+- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)  
+- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy  Precompiled Library](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
+
+Taking the PP-Matting inference on Linux as an example, the compilation test can be completed by executing the following command in this directory. FastDeploy version 0.7.0 or above (x.x.x>=0.7.0) is required to support this model.
+
+```bash
+mkdir build
+cd build
+# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy  Precompiled Library` mentioned above
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-x.x.x.tgz
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j
+
+# Download PP-Matting model files and test images
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP-Matting-512.tgz
+tar -xvf PP-Matting-512.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_input.jpg
+wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_bgr.jpg
+
+
+# CPU inference
+./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 0
+# GPU inference
+./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 1
+# TensorRT inference on GPU
+./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 2
+# kunlunxin XPU inference
+./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 3
+```
+
+The visualized result after running is as follows
+<div width="840">
+<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852040-759da522-fca4-4786-9205-88c622cd4a39.jpg">
+<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852587-48895efc-d24a-43c9-aeec-d7b0362ab2b9.jpg">
+<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852116-cf91445b-3a67-45d9-a675-c69fe77c383a.jpg">
+<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852554-6960659f-4fd7-4506-b33b-54e1a9dd89bf.jpg">
+</div>
+
+The above command works for Linux or MacOS. For SDK use-pattern in Windows, refer to:
+- [How to use FastDeploy C++ SDK in Windows](../../../../../docs/en/faq/use_sdk_on_windows.md)
+
+## PP-Matting C++ Interface
+
+### PPMatting Class
+
+```c++
+fastdeploy::vision::matting::PPMatting(
+        const string& model_file,
+        const string& params_file = "",
+        const string& config_file,
+        const RuntimeOption& runtime_option = RuntimeOption(),
+        const ModelFormat& model_format = ModelFormat::PADDLE)
+```
+
+PP-Matting model loading and initialization, among which model_file is the exported Paddle model format.
+
+**Parameter**
+
+> * **model_file**(str): Model file path
+> * **params_file**(str): Parameter file path
+> * **config_file**(str): Inference deployment configuration file
+> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
+> * **model_format**(ModelFormat): Model format. Paddle format by default
+
+#### Predict Function
+
+> ```c++
+> PPMatting::Predict(cv::Mat* im, MattingResult* result)
+> ```
+>
+> Model prediction interface. Input images and output detection results.
+>
+> **Parameter**
+>
+> > * **im**: Input images in HWC or BGR format
+> > * **result**: The segmentation result, including the predicted label of the segmentation and the corresponding probability of the label. Refer to [Vision Model Prediction Results](../../../../../docs/api/vision_results/) for the description of SegmentationResult
+
+### Class Member Variable
+#### Pre-processing Parameter
+Users can modify the following pre-processing parameters to their needs, which affects the final inference and deployment results
+
+
+- [Model Description](../../)
+- [Python Deployment](../python)
+- [Vision Model Prediction Results](../../../../../docs/api/vision_results/)
+- [How to switch the model inference backend engine](../../../../../docs/en/faq/how_to_change_backend.md)
diff --git a/examples/vision/segmentation/ppmatting/cpp/README_CN.md b/examples/vision/segmentation/ppmatting/cpp/README_CN.md
new file mode 100644
index 000000000..38e2e592a
--- /dev/null
+++ b/examples/vision/segmentation/ppmatting/cpp/README_CN.md
@@ -0,0 +1,94 @@
+[English](README.md) | 简体中文
+# PP-Matting C++部署示例
+
+本目录下提供`infer.cc`快速完成PP-Matting在CPU/GPU，以及GPU上通过TensorRT加速部署的示例。
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. 根据开发环境，下载预编译部署库和samples代码，参考[FastDeploy预编译库](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+以Linux上 PP-Matting 推理为例，在本目录执行如下命令即可完成编译测试，支持此模型需保证FastDeploy版本0.7.0以上(x.x.x>=0.7.0)
+
+```bash
+mkdir build
+cd build
+# 下载FastDeploy预编译库，用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-x.x.x.tgz
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j
+
+# 下载PP-Matting模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP-Matting-512.tgz
+tar -xvf PP-Matting-512.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_input.jpg
+wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_bgr.jpg
+
+
+# CPU推理
+./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 0
+# GPU推理
+./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 1
+# GPU上TensorRT推理
+./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 2
+# 昆仑芯XPU推理
+./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 3
+```
+
+运行完成可视化结果如下图所示
+<div width="840">
+<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852040-759da522-fca4-4786-9205-88c622cd4a39.jpg">
+<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852587-48895efc-d24a-43c9-aeec-d7b0362ab2b9.jpg">
+<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852116-cf91445b-3a67-45d9-a675-c69fe77c383a.jpg">
+<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852554-6960659f-4fd7-4506-b33b-54e1a9dd89bf.jpg">
+</div>
+
+以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:  
+- [如何在Windows中使用FastDeploy C++ SDK](../../../../../docs/cn/faq/use_sdk_on_windows.md)
+
+## PP-Matting C++接口
+
+### PPMatting类
+
+```c++
+fastdeploy::vision::matting::PPMatting(
+        const string& model_file,
+        const string& params_file = "",
+        const string& config_file,
+        const RuntimeOption& runtime_option = RuntimeOption(),
+        const ModelFormat& model_format = ModelFormat::PADDLE)
+```
+
+PP-Matting模型加载和初始化，其中model_file为导出的Paddle模型格式。
+
+**参数**
+
+> * **model_file**(str): 模型文件路径
+> * **params_file**(str): 参数文件路径
+> * **config_file**(str): 推理部署配置文件
+> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
+> * **model_format**(ModelFormat): 模型格式，默认为Paddle格式
+
+#### Predict函数
+
+> ```c++
+> PPMatting::Predict(cv::Mat* im, MattingResult* result)
+> ```
+>
+> 模型预测接口，输入图像直接输出检测结果。
+>
+> **参数**
+>
+> > * **im**: 输入图像，注意需为HWC，BGR格式
+> > * **result**: 分割结果，包括分割预测的标签以及标签对应的概率值, MattingResult说明参考[视觉模型预测结果](../../../../../docs/api/vision_results/)
+
+### 类成员属性
+#### 预处理参数
+用户可按照自己的实际需求，修改下列预处理参数，从而影响最终的推理和部署效果
+
+
+- [模型介绍](../../)
+- [Python部署](../python)
+- [视觉模型预测结果](../../../../../docs/api/vision_results/)
+- [如何切换模型推理后端引擎](../../../../../docs/cn/faq/how_to_change_backend.md)
diff --git a/examples/vision/segmentation/ppmatting/cpp/infer.cc b/examples/vision/segmentation/ppmatting/cpp/infer.cc
new file mode 100644
index 000000000..e7f1ffbcb
--- /dev/null
+++ b/examples/vision/segmentation/ppmatting/cpp/infer.cc
@@ -0,0 +1,170 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void CpuInfer(const std::string& model_dir, const std::string& image_file,
+              const std::string& background_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "deploy.yaml";
+  auto option = fastdeploy::RuntimeOption();
+  option.UseCpu();
+  auto model = fastdeploy::vision::matting::PPMatting(model_file, params_file,
+                                                      config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+  cv::Mat bg = cv::imread(background_file);
+  fastdeploy::vision::MattingResult res;
+  if (!model.Predict(&im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+  auto vis_im = fastdeploy::vision::VisMatting(im, res);
+  auto vis_im_with_bg = fastdeploy::vision::SwapBackground(im, bg, res);
+  cv::imwrite("visualized_result.jpg", vis_im_with_bg);
+  cv::imwrite("visualized_result_fg.jpg", vis_im);
+  std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
+               "and ./visualized_result_fg.jpg"
+            << std::endl;
+}
+
+void KunlunXinInfer(const std::string& model_dir, const std::string& image_file,
+                    const std::string& background_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "deploy.yaml";
+  auto option = fastdeploy::RuntimeOption();
+  option.UseKunlunXin();
+  auto model = fastdeploy::vision::matting::PPMatting(model_file, params_file,
+                                                      config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+  cv::Mat bg = cv::imread(background_file);
+  fastdeploy::vision::MattingResult res;
+  if (!model.Predict(&im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+  auto vis_im = fastdeploy::vision::VisMatting(im, res);
+  auto vis_im_with_bg = fastdeploy::vision::SwapBackground(im, bg, res);
+  cv::imwrite("visualized_result.jpg", vis_im_with_bg);
+  cv::imwrite("visualized_result_fg.jpg", vis_im);
+  std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
+               "and ./visualized_result_fg.jpg"
+            << std::endl;
+}
+
+void GpuInfer(const std::string& model_dir, const std::string& image_file,
+              const std::string& background_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "deploy.yaml";
+
+  auto option = fastdeploy::RuntimeOption();
+  option.UseGpu();
+  option.UsePaddleInferBackend();
+  auto model = fastdeploy::vision::matting::PPMatting(model_file, params_file,
+                                                      config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+  cv::Mat bg = cv::imread(background_file);
+  fastdeploy::vision::MattingResult res;
+  if (!model.Predict(&im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+  auto vis_im = fastdeploy::vision::VisMatting(im, res);
+  auto vis_im_with_bg = fastdeploy::vision::SwapBackground(im, bg, res);
+  cv::imwrite("visualized_result.jpg", vis_im_with_bg);
+  cv::imwrite("visualized_result_fg.jpg", vis_im);
+  std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
+               "and ./visualized_result_fg.jpg"
+            << std::endl;
+}
+
+void TrtInfer(const std::string& model_dir, const std::string& image_file,
+              const std::string& background_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "deploy.yaml";
+
+  auto option = fastdeploy::RuntimeOption();
+  option.UseGpu();
+  option.UseTrtBackend();
+  option.SetTrtInputShape("img", {1, 3, 512, 512});
+  auto model = fastdeploy::vision::matting::PPMatting(model_file, params_file,
+                                                      config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+  cv::Mat bg = cv::imread(background_file);
+  fastdeploy::vision::MattingResult res;
+  if (!model.Predict(&im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+  auto vis_im = fastdeploy::vision::VisMatting(im, res);
+  auto vis_im_with_bg = fastdeploy::vision::SwapBackground(im, bg, res);
+  cv::imwrite("visualized_result.jpg", vis_im_with_bg);
+  cv::imwrite("visualized_result_fg.jpg", vis_im);
+  std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
+               "and ./visualized_result_fg.jpg"
+            << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+  if (argc < 5) {
+    std::cout
+        << "Usage: infer_demo path/to/model_dir path/to/image run_option, "
+           "e.g ./infer_model ./PP-Matting-512 ./test.jpg ./test_bg.jpg 0"
+        << std::endl;
+    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
+                 "with gpu; 2: run with gpu and use tensorrt backend, 3: run "
+                 "with kunlunxin."
+              << std::endl;
+    return -1;
+  }
+  if (std::atoi(argv[4]) == 0) {
+    CpuInfer(argv[1], argv[2], argv[3]);
+  } else if (std::atoi(argv[4]) == 1) {
+    GpuInfer(argv[1], argv[2], argv[3]);
+  } else if (std::atoi(argv[4]) == 2) {
+    TrtInfer(argv[1], argv[2], argv[3]);
+  } else if (std::atoi(argv[4]) == 3) {
+    KunlunXinInfer(argv[1], argv[2], argv[3]);
+  }
+  return 0;
+}
diff --git a/examples/vision/segmentation/ppmatting/python/README.md b/examples/vision/segmentation/ppmatting/python/README.md
new file mode 100755
index 000000000..fb7d42617
--- /dev/null
+++ b/examples/vision/segmentation/ppmatting/python/README.md
@@ -0,0 +1,81 @@
+English | [简体中文](README_CN.md)
+# PP-Matting Python Deployment Example
+
+Before deployment, two steps require confirmation
+
+- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)  
+- 2. Install FastDeploy Python whl package. Refer to [FastDeploy Python Installation](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
+
+This directory provides examples that `infer.py`  fast finishes the deployment of PP-Matting on CPU/GPU and GPU accelerated by TensorRT. The script is as follows
+```bash
+# Download the deployment example code
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/matting/ppmatting/python
+
+# Download PP-Matting model files and test images
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP-Matting-512.tgz
+tar -xvf PP-Matting-512.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_input.jpg
+wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_bgr.jpg
+# CPU inference
+python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device cpu
+# GPU inference
+python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device gpu
+# TensorRT inference on GPU（Attention: It is somewhat time-consuming for the operation of model serialization when running TensorRT inference for the first time. Please be patient.）
+python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device gpu --use_trt True
+# kunlunxin XPU inference
+python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device kunlunxin
+```
+
+The visualized result after running is as follows
+<div width="840">
+<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852040-759da522-fca4-4786-9205-88c622cd4a39.jpg">
+<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852587-48895efc-d24a-43c9-aeec-d7b0362ab2b9.jpg">
+<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852116-cf91445b-3a67-45d9-a675-c69fe77c383a.jpg">
+<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852554-6960659f-4fd7-4506-b33b-54e1a9dd89bf.jpg">
+</div>
+## PP-Matting Python Interface
+
+```python
+fd.vision.matting.PPMatting(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+```
+
+PP-Matting model loading and initialization, among which model_file, params_file, and config_file are the Paddle inference files exported from the training model. Refer to [Model Export](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)  for more information
+
+**Parameter**
+
+> * **model_file**(str): Model file path
+> * **params_file**(str): Parameter file path
+> * **config_file**(str): Inference deployment configuration file
+> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
+> * **model_format**(ModelFormat): Model format. Paddle format by default
+
+### predict function
+
+> ```python
+> PPMatting.predict(input_image)
+> ```
+>
+> Model prediction interface. Input images and output detection results.
+>
+> **Parameter**
+>
+> > * **input_image**(np.ndarray): Input data in HWC or BGR format
+
+> **Return**
+>
+> > Return `fastdeploy.vision.MattingResult` structure. Refer to [Vision Model Prediction Results](../../../../../docs/api/vision_results/) for the description of the structure.
+
+### Class Member Variable
+
+#### Pre-processing Parameter
+Users can modify the following pre-processing parameters to their needs, which affects the final inference and deployment results
+
+
+
+## Other Documents
+
+- [PP-Matting Model Description](..)
+- [PP-Matting C++ Deployment](../cpp)
+- [Model Prediction Results](../../../../../docs/api/vision_results/)
+- [How to switch the model inference backend engine](../../../../../docs/en/faq/how_to_change_backend.md)
diff --git a/examples/vision/segmentation/ppmatting/python/README_CN.md b/examples/vision/segmentation/ppmatting/python/README_CN.md
new file mode 100644
index 000000000..5e676a1cf
--- /dev/null
+++ b/examples/vision/segmentation/ppmatting/python/README_CN.md
@@ -0,0 +1,80 @@
+[English](README.md) | 简体中文
+# PP-Matting Python部署示例
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. FastDeploy Python whl包安装，参考[FastDeploy Python安装](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+本目录下提供`infer.py`快速完成PP-Matting在CPU/GPU，以及GPU上通过TensorRT加速部署的示例。执行如下脚本即可完成
+
+```bash
+#下载部署示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/matting/ppmatting/python
+
+# 下载PP-Matting模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP-Matting-512.tgz
+tar -xvf PP-Matting-512.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_input.jpg
+wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_bgr.jpg
+# CPU推理
+python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device cpu
+# GPU推理
+python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device gpu
+# GPU上使用TensorRT推理 （注意：TensorRT推理第一次运行，有序列化模型的操作，有一定耗时，需要耐心等待）
+python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device gpu --use_trt True
+# 昆仑芯XPU推理
+python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device kunlunxin
+```
+
+运行完成可视化结果如下图所示
+<div width="840">
+<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852040-759da522-fca4-4786-9205-88c622cd4a39.jpg">
+<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852587-48895efc-d24a-43c9-aeec-d7b0362ab2b9.jpg">
+<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852116-cf91445b-3a67-45d9-a675-c69fe77c383a.jpg">
+<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852554-6960659f-4fd7-4506-b33b-54e1a9dd89bf.jpg">
+</div>
+## PP-Matting Python接口
+
+```python
+fd.vision.matting.PPMatting(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+```
+
+PP-Matting模型加载和初始化，其中model_file, params_file以及config_file为训练模型导出的Paddle inference文件，具体请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
+
+**参数**
+
+> * **model_file**(str): 模型文件路径
+> * **params_file**(str): 参数文件路径
+> * **config_file**(str): 推理部署配置文件
+> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
+> * **model_format**(ModelFormat): 模型格式，默认为Paddle格式
+
+### predict函数
+
+> ```python
+> PPMatting.predict(input_image)
+> ```
+>
+> 模型预测结口，输入图像直接输出检测结果。
+>
+> **参数**
+>
+> > * **input_image**(np.ndarray): 输入数据，注意需为HWC，BGR格式
+
+> **返回**
+>
+> > 返回`fastdeploy.vision.MattingResult`结构体，结构体说明参考文档[视觉模型预测结果](../../../../../docs/api/vision_results/)
+
+### 类成员属性
+#### 预处理参数
+用户可按照自己的实际需求，修改下列预处理参数，从而影响最终的推理和部署效果
+
+
+## 其它文档
+
+- [PP-Matting 模型介绍](..)
+- [PP-Matting C++部署](../cpp)
+- [模型预测结果说明](../../../../../docs/api/vision_results/)
+- [如何切换模型推理后端引擎](../../../../../docs/cn/faq/how_to_change_backend.md)
diff --git a/examples/vision/segmentation/ppmatting/python/infer.py b/examples/vision/segmentation/ppmatting/python/infer.py
new file mode 100755
index 000000000..512d0ca86
--- /dev/null
+++ b/examples/vision/segmentation/ppmatting/python/infer.py
@@ -0,0 +1,70 @@
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+    import argparse
+    import ast
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model", required=True, help="Path of PaddleSeg model.")
+    parser.add_argument(
+        "--image", type=str, required=True, help="Path of test image file.")
+    parser.add_argument(
+        "--bg",
+        type=str,
+        required=True,
+        default=None,
+        help="Path of test background image file.")
+    parser.add_argument(
+        "--device",
+        type=str,
+        default='cpu',
+        help="Type of inference device, support 'cpu', 'kunlunxin' or 'gpu'.")
+    parser.add_argument(
+        "--use_trt",
+        type=ast.literal_eval,
+        default=False,
+        help="Wether to use tensorrt.")
+    return parser.parse_args()
+
+
+def build_option(args):
+    option = fd.RuntimeOption()
+    if args.device.lower() == "gpu":
+        option.use_gpu()
+        option.use_paddle_infer_backend()
+
+    if args.use_trt:
+        option.use_trt_backend()
+        option.set_trt_input_shape("img", [1, 3, 512, 512])
+
+    if args.device.lower() == "kunlunxin":
+        option.use_kunlunxin()
+    return option
+
+
+args = parse_arguments()
+
+# 配置runtime，加载模型
+runtime_option = build_option(args)
+model_file = os.path.join(args.model, "model.pdmodel")
+params_file = os.path.join(args.model, "model.pdiparams")
+config_file = os.path.join(args.model, "deploy.yaml")
+model = fd.vision.matting.PPMatting(
+    model_file, params_file, config_file, runtime_option=runtime_option)
+
+# 预测图片抠图结果
+im = cv2.imread(args.image)
+bg = cv2.imread(args.bg)
+result = model.predict(im)
+print(result)
+# 可视化结果
+vis_im = fd.vision.vis_matting(im, result)
+vis_im_with_bg = fd.vision.swap_background(im, bg, result)
+cv2.imwrite("visualized_result_fg.jpg", vis_im)
+cv2.imwrite("visualized_result_replaced_bg.jpg", vis_im_with_bg)
+print(
+    "Visualized result save in ./visualized_result_replaced_bg.jpg and ./visualized_result_fg.jpg"
+)

From ad63c2707ae9255b8fe8a897ef2fbfce8ec7e852 Mon Sep 17 00:00:00 2001
From: Zheng-Bicheng <58363586+Zheng-Bicheng@users.noreply.github.com>
Date: Wed, 8 Feb 2023 19:45:24 +0800
Subject: [PATCH 11/41] [Doc]Update RKNPU2 docs (#1266)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

更新docs
---
 docs/cn/build_and_install/rknpu2.md |  2 +-
 docs/cn/faq/rknpu2/rknpu2.md        | 26 ++++++++++++++------------
 2 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/docs/cn/build_and_install/rknpu2.md b/docs/cn/build_and_install/rknpu2.md
index 33ee596fe..f432c94ce 100644
--- a/docs/cn/build_and_install/rknpu2.md
+++ b/docs/cn/build_and_install/rknpu2.md
@@ -11,5 +11,5 @@ RKNPU2指的是Rockchip推出的RK356X以及RK3588系列芯片的NPU。
 * [RKNPU2开发环境搭建](../faq/rknpu2/environment.md)
 * [编译FastDeploy](../faq/rknpu2/build.md)
 * [RKNN模型导出建议](../faq/rknpu2/export.md)
-* [RKNPU2模型部署demo](../faq/rknpu2/rknpu2.md)
+* [RKNPU2模型速度一览表](../faq/rknpu2/rknpu2.md)
 * [RKNPU2 常见问题合集](../faq/rknpu2/issues.md)
diff --git a/docs/cn/faq/rknpu2/rknpu2.md b/docs/cn/faq/rknpu2/rknpu2.md
index 81f35bd43..99554e5ba 100644
--- a/docs/cn/faq/rknpu2/rknpu2.md
+++ b/docs/cn/faq/rknpu2/rknpu2.md
@@ -13,18 +13,20 @@ ONNX模型不能直接调用RK芯片中的NPU进行运算，需要把ONNX模型
 * ARM CPU使用ONNX框架进行测试
 * NPU均使用单核进行测试
 
-| 任务场景                 | 模型                                                                                       | 模型版本(表示已经测试的版本)          | ARM CPU/RKNN速度(ms) |
-|----------------------|------------------------------------------------------------------------------------------|--------------------------|--------------------|
-| Detection            | [Picodet](../../../../examples/vision/detection/paddledetection/rknpu2/README.md)        | Picodet-s                | 162/112            |
-| Detection            | [RKYOLOV5](../../../../examples/vision/detection/rkyolo/README.md)                       | YOLOV5-S-Relu(int8)      | -/57               |
-| Detection            | [RKYOLOX](../../../../examples/vision/detection/rkyolo/README.md)                        | -                        | -/-                |
-| Detection            | [RKYOLOV7](../../../../examples/vision/detection/rkyolo/README.md)                       | -                        | -/-                |
-| Segmentation         | [Unet](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md)              | Unet-cityscapes          | -/-                |
-| Segmentation         | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md) | portrait(int8)           | 133/43             |
-| Segmentation         | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md) | human(int8)              | 133/43             |
-| Face Detection       | [SCRFD](../../../../examples/vision/facedet/scrfd/rknpu2/README.md)                      | SCRFD-2.5G-kps-640(int8) | 108/42             |
-| Face FaceRecognition | [InsightFace](../../../../examples/vision/faceid/insightface/rknpu2/README_CN.md)        | ms1mv3_arcface_r18(int8) | 81/12              |
-| Classification       | [ResNet](../../../../examples/vision/classification/paddleclas/rknpu2/README.md)         | ResNet50_vd              | -/33               |
+| 任务场景                 | 模型                                                                                               | 模型版本(表示已经测试的版本)          | ARM CPU/RKNN速度(ms) |
+|----------------------|--------------------------------------------------------------------------------------------------|--------------------------|--------------------|
+| Detection            | [Picodet](../../../../examples/vision/detection/paddledetection/rknpu2/README.md)                | Picodet-s                | 162/112            |
+| Detection            | [PaddleDetection Yolov8](../../../../examples/vision/detection/paddledetection/rknpu2/README.md) | yolov8-n                 | -/100              |
+| Detection            | [PPYOLOE](../../../../examples/vision/detection/paddledetection/rknpu2/README.md)                | ppyoloe-s(int8)          | -/77               |
+| Detection            | [RKYOLOV5](../../../../examples/vision/detection/rkyolo/README.md)                               | YOLOV5-S-Relu(int8)      | -/57               |
+| Detection            | [RKYOLOX](../../../../examples/vision/detection/rkyolo/README.md)                                | -                        | -/-                |
+| Detection            | [RKYOLOV7](../../../../examples/vision/detection/rkyolo/README.md)                               | -                        | -/-                |
+| Segmentation         | [Unet](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md)                      | Unet-cityscapes          | -/-                |
+| Segmentation         | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md)         | portrait(int8)           | 133/43             |
+| Segmentation         | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md)         | human(int8)              | 133/43             |
+| Face Detection       | [SCRFD](../../../../examples/vision/facedet/scrfd/rknpu2/README.md)                              | SCRFD-2.5G-kps-640(int8) | 108/42             |
+| Face FaceRecognition | [InsightFace](../../../../examples/vision/faceid/insightface/rknpu2/README_CN.md)                | ms1mv3_arcface_r18(int8) | 81/12              |
+| Classification       | [ResNet](../../../../examples/vision/classification/paddleclas/rknpu2/README.md)                 | ResNet50_vd              | -/33               |
 
 ## 预编译库下载
 

From 0414f0fd734abba5a91b62dda081d54b124b4172 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Thu, 9 Feb 2023 02:43:38 +0000
Subject: [PATCH 12/41] Update doc

---
 docs/api/vision_results/README_CN.md | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/docs/api/vision_results/README_CN.md b/docs/api/vision_results/README_CN.md
index a6ad2a808..94efce21e 100755
--- a/docs/api/vision_results/README_CN.md
+++ b/docs/api/vision_results/README_CN.md
@@ -16,6 +16,3 @@ FastDeploy根据视觉模型的任务类型，定义了不同的结构体(`fastd
 | OCRResult               | [C++/Python文档](./ocr_result.md)               | 文本框检测，分类和文本识别返回结果 | OCR系列模型等                |
 | MOTResult               | [C++/Python文档](./mot_result.md)               | 多目标跟踪返回结果         | pptracking系列模型等         |
 | HeadPoseResult               | [C++/Python文档](./headpose_result.md)               | 头部姿态估计返回结果         | FSANet系列模型等         |
-
-## 常见问题
-- [如何将视觉模型预测结果转换为numpy格式](./faq_CN.md)

From 9574d301c8509d26b697147b415bd0e31779d1fb Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Thu, 9 Feb 2023 03:27:48 +0000
Subject: [PATCH 13/41] Add ppmatting soft link

---
 examples/vision/matting/ppmatting              | 1 -
 examples/vision/matting/ppmatting/README.md    | 1 +
 examples/vision/matting/ppmatting/README_CN.md | 1 +
 3 files changed, 2 insertions(+), 1 deletion(-)
 delete mode 120000 examples/vision/matting/ppmatting
 create mode 120000 examples/vision/matting/ppmatting/README.md
 create mode 120000 examples/vision/matting/ppmatting/README_CN.md

diff --git a/examples/vision/matting/ppmatting b/examples/vision/matting/ppmatting
deleted file mode 120000
index 352f37bf2..000000000
--- a/examples/vision/matting/ppmatting
+++ /dev/null
@@ -1 +0,0 @@
-../segmentation/ppmatting/
\ No newline at end of file
diff --git a/examples/vision/matting/ppmatting/README.md b/examples/vision/matting/ppmatting/README.md
new file mode 120000
index 000000000..a6452162a
--- /dev/null
+++ b/examples/vision/matting/ppmatting/README.md
@@ -0,0 +1 @@
+../../segmentation/ppmatting/README.md
\ No newline at end of file
diff --git a/examples/vision/matting/ppmatting/README_CN.md b/examples/vision/matting/ppmatting/README_CN.md
new file mode 120000
index 000000000..00a0e19ce
--- /dev/null
+++ b/examples/vision/matting/ppmatting/README_CN.md
@@ -0,0 +1 @@
+../../segmentation/ppmatting/README_CN.md
\ No newline at end of file

From b1dc8c09899e3f3672dff089dbb9a9460056dcac Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Thu, 9 Feb 2023 03:56:36 +0000
Subject: [PATCH 14/41] Create README.md in ppmatting directory

---
 examples/vision/matting/ppmatting/README.md    | 4 +++-
 examples/vision/matting/ppmatting/README_CN.md | 1 -
 2 files changed, 3 insertions(+), 2 deletions(-)
 mode change 120000 => 100644 examples/vision/matting/ppmatting/README.md
 delete mode 120000 examples/vision/matting/ppmatting/README_CN.md

diff --git a/examples/vision/matting/ppmatting/README.md b/examples/vision/matting/ppmatting/README.md
deleted file mode 120000
index a6452162a..000000000
--- a/examples/vision/matting/ppmatting/README.md
+++ /dev/null
@@ -1 +0,0 @@
-../../segmentation/ppmatting/README.md
\ No newline at end of file
diff --git a/examples/vision/matting/ppmatting/README.md b/examples/vision/matting/ppmatting/README.md
new file mode 100644
index 000000000..2e8389bc1
--- /dev/null
+++ b/examples/vision/matting/ppmatting/README.md
@@ -0,0 +1,3 @@
+PP-Matting deployment examples, please refer to [document](../../segmentation/ppmatting/README_CN.md).
+
+PP-Matting的部署示例，请参考[文档](../../segmentation/ppmatting/README_CN.md).
diff --git a/examples/vision/matting/ppmatting/README_CN.md b/examples/vision/matting/ppmatting/README_CN.md
deleted file mode 120000
index 00a0e19ce..000000000
--- a/examples/vision/matting/ppmatting/README_CN.md
+++ /dev/null
@@ -1 +0,0 @@
-../../segmentation/ppmatting/README_CN.md
\ No newline at end of file

From b405cc3a2c5b54d07771894ede57ef29678627e5 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Thu, 9 Feb 2023 07:38:11 +0000
Subject: [PATCH 15/41] Update ppmatting directory

---
 examples/vision/matting/ppmatting/README.md   |  4 +-
 .../paddleseg/amlogic/a311d/README.md         | 12 ++-
 .../segmentation/paddleseg/ascend/README.md   | 11 ++-
 .../segmentation/paddleseg/cpu-gpu/README.md  |  7 +-
 .../segmentation/paddleseg/kunlun/README.md   |  9 +-
 .../paddleseg/rockchip/rknpu2/README.md       |  6 +-
 .../rockchip/rknpu2/python/README.md          |  2 +-
 .../paddleseg/rockchip/rv1126/README.md       | 14 ++-
 .../paddleseg/serving/README_CN.md            | 45 ++++++++-
 .../serving/fastdeploy_serving/README.md      |  2 +
 .../serving/fastdeploy_serving/README_CN.md   |  4 +-
 .../serving/simple_serving/README.md          |  2 +-
 .../serving/simple_serving/README_CN.md       |  5 +-
 .../segmentation/paddleseg/sophgo/README.md   |  9 +-
 .../vision/segmentation/ppmatting/README.md   | 46 +++------
 .../segmentation/ppmatting/ascend/README.md   |  1 +
 .../segmentation/ppmatting/cpp/README.md      | 93 ------------------
 .../segmentation/ppmatting/cpp/README_CN.md   | 94 -------------------
 .../{README_CN.md => cpu-gpu/README.md}       | 28 +++---
 .../{ => cpu-gpu}/cpp/CMakeLists.txt          |  0
 .../ppmatting/cpu-gpu/cpp/README.md           | 60 ++++++++++++
 .../ppmatting/{ => cpu-gpu}/cpp/infer.cc      |  4 +
 .../ppmatting/cpu-gpu/python/README.md        | 52 ++++++++++
 .../ppmatting/{ => cpu-gpu}/python/infer.py   |  4 +
 .../segmentation/ppmatting/kunlun/README.md   |  1 +
 .../segmentation/ppmatting/python/README.md   | 81 ----------------
 .../ppmatting/python/README_CN.md             | 80 ----------------
 27 files changed, 250 insertions(+), 426 deletions(-)
 create mode 120000 examples/vision/segmentation/ppmatting/ascend/README.md
 delete mode 100755 examples/vision/segmentation/ppmatting/cpp/README.md
 delete mode 100644 examples/vision/segmentation/ppmatting/cpp/README_CN.md
 rename examples/vision/segmentation/ppmatting/{README_CN.md => cpu-gpu/README.md} (56%)
 rename examples/vision/segmentation/ppmatting/{ => cpu-gpu}/cpp/CMakeLists.txt (100%)
 create mode 100644 examples/vision/segmentation/ppmatting/cpu-gpu/cpp/README.md
 rename examples/vision/segmentation/ppmatting/{ => cpu-gpu}/cpp/infer.cc (97%)
 create mode 100644 examples/vision/segmentation/ppmatting/cpu-gpu/python/README.md
 rename examples/vision/segmentation/ppmatting/{ => cpu-gpu}/python/infer.py (91%)
 create mode 120000 examples/vision/segmentation/ppmatting/kunlun/README.md
 delete mode 100755 examples/vision/segmentation/ppmatting/python/README.md
 delete mode 100644 examples/vision/segmentation/ppmatting/python/README_CN.md

diff --git a/examples/vision/matting/ppmatting/README.md b/examples/vision/matting/ppmatting/README.md
index 2e8389bc1..2a54d53c7 100644
--- a/examples/vision/matting/ppmatting/README.md
+++ b/examples/vision/matting/ppmatting/README.md
@@ -1,3 +1,3 @@
-PP-Matting deployment examples, please refer to [document](../../segmentation/ppmatting/README_CN.md).
+PaddleSeg Matting deployment examples, please refer to [document](../../segmentation/ppmatting/README_CN.md).
 
-PP-Matting的部署示例，请参考[文档](../../segmentation/ppmatting/README_CN.md).
+PaddleSeg Matting的部署示例，请参考[文档](../../segmentation/ppmatting/README_CN.md).
diff --git a/examples/vision/segmentation/paddleseg/amlogic/a311d/README.md b/examples/vision/segmentation/paddleseg/amlogic/a311d/README.md
index 9f856deb4..c9a04fd41 100644
--- a/examples/vision/segmentation/paddleseg/amlogic/a311d/README.md
+++ b/examples/vision/segmentation/paddleseg/amlogic/a311d/README.md
@@ -1,13 +1,17 @@
 [English](README.md) | 简体中文
 
-# 在晶晨A311D上使用FastDeploy部署PaddleSeg模型
-晶晨A311D是一款先进的AI应用处理器。FastDeploy支持在A311D上基于Paddle-Lite部署PaddleSeg相关模型
+# PaddleSeg在晶晨A311D上通过FastDeploy部署模型
+晶晨A311D是一款先进的AI应用处理器。PaddleSeg支持通过FastDeploy在A311D上基于Paddle-Lite部署相关Segmentation模型
 
 ## 晶晨A311D支持的PaddleSeg模型
-目前所支持的PaddleSeg模型如下：
+
+- [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg)
+>> **注意**：支持PaddleSeg高于2.6版本的Segmentation模型
+
+目前晶晨A311D所支持的PaddleSeg模型如下：
 - [PP-LiteSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/pp_liteseg/README.md)
 
-## 预导出的推理模型
+## 预导出的量化推理模型
 为了方便开发者的测试，下面提供了PaddleSeg导出的部分量化后的推理模型，开发者可直接下载使用。
 
 | 模型                              | 参数文件大小    |输入Shape |  mIoU | mIoU (flip) | mIoU (ms+flip) |
diff --git a/examples/vision/segmentation/paddleseg/ascend/README.md b/examples/vision/segmentation/paddleseg/ascend/README.md
index 475d8817b..05f4d8348 100644
--- a/examples/vision/segmentation/paddleseg/ascend/README.md
+++ b/examples/vision/segmentation/paddleseg/ascend/README.md
@@ -1,10 +1,13 @@
-# 使用FastDeploy部署PaddleSeg模型
+[English](README.md) | 简体中文
 
-FastDeploy支持在华为昇腾上部署PaddleSeg模型
+# PaddleSeg利用FastDeploy在华为昇腾上部署模型
 
-## 模型版本说明
+PaddleSeg支持通过FastDeploy在华为昇腾上部署Segmentation相关模型
 
-- [PaddleSeg develop](https://github.com/PaddlePaddle/PaddleSeg/tree/develop)
+## 支持的PaddleSeg模型
+
+- [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg)
+>> **注意**：支持PaddleSeg高于2.6版本的Segmentation模型
 
 目前FastDeploy支持如下模型的部署
 
diff --git a/examples/vision/segmentation/paddleseg/cpu-gpu/README.md b/examples/vision/segmentation/paddleseg/cpu-gpu/README.md
index a5e02e6c9..b126e9ddb 100644
--- a/examples/vision/segmentation/paddleseg/cpu-gpu/README.md
+++ b/examples/vision/segmentation/paddleseg/cpu-gpu/README.md
@@ -1,10 +1,13 @@
+[English](README.md) | 简体中文
+
 # PaddleSeg模型高性能全场景部署方案-FastDeploy
 
-PaddleSeg通过FastDeploy支持在NVIDIA GPU、X86 CPU、飞腾CPU、ARM CPU、Intel GPU(独立显卡/集成显卡)硬件上部署
+PaddleSeg支持利用FastDeploy在NVIDIA GPU、X86 CPU、飞腾CPU、ARM CPU、Intel GPU(独立显卡/集成显卡)硬件上部署Segmentation模型
 
 ## 模型版本说明
 
-- [PaddleSeg develop](https://github.com/PaddlePaddle/PaddleSeg/tree/develop)
+- [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg)
+>> **注意**：支持PaddleSeg高于2.6版本的Segmentation模型
 
 目前FastDeploy支持如下模型的部署
 
diff --git a/examples/vision/segmentation/paddleseg/kunlun/README.md b/examples/vision/segmentation/paddleseg/kunlun/README.md
index 08406d082..cdb727988 100644
--- a/examples/vision/segmentation/paddleseg/kunlun/README.md
+++ b/examples/vision/segmentation/paddleseg/kunlun/README.md
@@ -1,8 +1,13 @@
-# 使用FastDeploy部署PaddleSeg模型
+[English](README.md) | 简体中文
+
+# PaddleSeg模型高性能全场景部署方案-FastDeploy
+
+PaddleSeg支持利用FastDeploy在昆仑芯片上部署Segmentation模型
 
 ## 模型版本说明
 
-- [PaddleSeg develop](https://github.com/PaddlePaddle/PaddleSeg/tree/develop)
+- [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg)
+>> **注意**：支持PaddleSeg高于2.6版本的Segmentation模型
 
 目前FastDeploy支持如下模型的部署
 
diff --git a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/README.md b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/README.md
index 21a9b92ba..a536630e3 100644
--- a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/README.md
+++ b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/README.md
@@ -1,6 +1,7 @@
 [English](README.md) | 简体中文
 
-# 基于RKNPU2使用FastDeploy部署PaddleSeg模型
+# PaddleSeg利用FastDeploy基于RKNPU2部署Segmentation模型
+
 RKNPU2 提供了一个高性能接口来访问 Rockchip NPU，支持如下硬件的部署
 - RK3566/RK3568
 - RK3588/RK3588S
@@ -10,7 +11,8 @@ RKNPU2 提供了一个高性能接口来访问 Rockchip NPU，支持如下硬件
 
 ## 模型版本说明
 
-- [PaddleSeg develop](https://github.com/PaddlePaddle/PaddleSeg/tree/develop)
+- [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg)
+>> **注意**：支持PaddleSeg高于2.6版本的Segmentation模型
 
 目前FastDeploy使用RKNPU2推理PaddleSeg支持如下模型的部署:
 - [U-Net系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/unet/README.md)
diff --git a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/python/README.md b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/python/README.md
index 5b7c3df35..7524b6c60 100644
--- a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/python/README.md
+++ b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/python/README.md
@@ -3,7 +3,7 @@
 
 在部署前，需确认以下步骤
 
-- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/rknpu2.md)
+- 1. 软硬件环境满足要求，RKNPU2环境部署等参考[FastDeploy环境要求](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/rknpu2.md)
 
 【注意】如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../../../../matting/)
 
diff --git a/examples/vision/segmentation/paddleseg/rockchip/rv1126/README.md b/examples/vision/segmentation/paddleseg/rockchip/rv1126/README.md
index 5f92e7f6f..12b9a0d05 100644
--- a/examples/vision/segmentation/paddleseg/rockchip/rv1126/README.md
+++ b/examples/vision/segmentation/paddleseg/rockchip/rv1126/README.md
@@ -1,10 +1,16 @@
 [English](README.md) | 简体中文
-# 在瑞芯微 RV1126 上使用 FastDeploy 部署 PaddleSeg 模型
-瑞芯微 RV1126 是一款编解码芯片，专门面相人工智能的机器视觉领域。目前，FastDeploy 支持在 RV1126 上基于 Paddle-Lite 部署 PaddleSeg 相关模型
+# PaddleSeg在瑞芯微 RV1126上通过FastDeploy部署模型
+瑞芯微 RV1126 是一款编解码芯片，专门面相人工智能的机器视觉领域。PaddleSeg支持通过FastDeploy在RV1126上基于Paddle-Lite部署相关Segmentation模型
+
+## 瑞芯微 RV1126支持的PaddleSeg模型
+
+- [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg)
+>> **注意**：支持PaddleSeg高于2.6版本的Segmentation模型
 
-## 瑞芯微 RV1126 支持的PaddleSeg模型
 目前瑞芯微 RV1126 的 NPU 支持的量化模型如下：
-## 预导出的推理模型
+- [PP-LiteSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/pp_liteseg/README.md)
+
+## 预导出的量化推理模型
 为了方便开发者的测试，下面提供了PaddleSeg导出的部分量化后的推理模型，开发者可直接下载使用。
 
 | 模型                              | 参数文件大小    |输入Shape |  mIoU | mIoU (flip) | mIoU (ms+flip) |
diff --git a/examples/vision/segmentation/paddleseg/serving/README_CN.md b/examples/vision/segmentation/paddleseg/serving/README_CN.md
index ea1599432..803465941 100644
--- a/examples/vision/segmentation/paddleseg/serving/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/serving/README_CN.md
@@ -1,10 +1,53 @@
 [English](README.md) | 简体中文
-# 使用 FastDeploy 服务化部署 PaddleSeg 模型
+# PaddleSeg 使用 FastDeploy 服务化部署 Segmentation 模型
 ## FastDeploy 服务化部署介绍
 在线推理作为企业或个人线上部署模型的最后一环，是工业界必不可少的环节，其中最重要的就是服务化推理框架。FastDeploy 目前提供两种服务化部署方式：simple_serving和fastdeploy_serving
 - simple_serving基于Flask框架具有简单高效的特点，可以快速验证线上部署模型的可行性。
 - fastdeploy_serving基于Triton Inference Server框架，是一套完备且性能卓越的服务化部署框架，可用于实际生产。
 
+## 模型版本说明
+
+- [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg)
+>> **注意**：支持PaddleSeg高于2.6版本的Segmentation模型
+
+目前FastDeploy支持如下模型的部署
+
+- [U-Net系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/unet/README.md)
+- [PP-LiteSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/pp_liteseg/README.md)
+- [PP-HumanSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/contrib/PP-HumanSeg/README.md)
+- [FCN系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/fcn/README.md)
+- [DeepLabV3系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/deeplabv3/README.md)
+- [SegFormer系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/segformer/README.md)
+
+>>**注意** 如部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../ppmatting)
+
+## 准备PaddleSeg部署模型
+PaddleSeg模型导出，请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)  
+
+**注意**
+- PaddleSeg导出的模型包含`model.pdmodel`、`model.pdiparams`和`deploy.yaml`三个文件，FastDeploy会从yaml文件中获取模型在推理时需要的预处理信息
+
+## 预导出的推理模型
+
+为了方便开发者的测试，下面提供了PaddleSeg导出的部分模型
+- without-argmax导出方式为：**不指定**`--input_shape`，**指定**`--output_op none`
+- with-argmax导出方式为：**不指定**`--input_shape`，**指定**`--output_op argmax`
+
+开发者可直接下载使用。
+
+| 模型                                                               | 参数文件大小    |输入Shape |  mIoU | mIoU (flip) | mIoU (ms+flip) |
+|:---------------------------------------------------------------- |:----- |:----- | :----- | :----- | :----- |
+| [Unet-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_with_argmax_infer.tgz) \| [Unet-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz)  | 52MB | 1024x512 | 65.00% | 66.02% | 66.89% |
+| [PP-LiteSeg-B(STDC2)-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz) \| [PP-LiteSeg-B(STDC2)-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz) | 31MB  | 1024x512 | 79.04% |	79.52% | 79.85% |
+|[PP-HumanSegV1-Lite-with-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV1_Lite_with_argmax_infer.tgz) \| [PP-HumanSegV1-Lite-without-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Lite_infer.tgz) |  543KB | 192x192 | 86.2% | - | - |
+|[PP-HumanSegV2-Lite-with-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Lite_192x192_with_argmax_infer.tgz) \| [PP-HumanSegV2-Lite-without-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Lite_192x192_infer.tgz) |  12MB | 192x192 | 92.52% | - | - |
+| [PP-HumanSegV2-Mobile-with-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Mobile_192x192_with_argmax_infer.tgz) \| [PP-HumanSegV2-Mobile-without-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Mobile_192x192_infer.tgz) |  29MB | 192x192 | 93.13% | - | - |
+|[PP-HumanSegV1-Server-with-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Server_with_argmax_infer.tgz) \| [PP-HumanSegV1-Server-without-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Server_infer.tgz) |  103MB | 512x512 | 96.47% | - | - |
+| [Portait-PP-HumanSegV2-Lite-with-argmax(肖像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV2_Lite_256x144_with_argmax_infer.tgz) \| [Portait-PP-HumanSegV2-Lite-without-argmax(肖像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV2_Lite_256x144_infer.tgz) |  3.6M | 256x144 | 96.63% | - | - |
+| [FCN-HRNet-W18-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/FCN_HRNet_W18_cityscapes_with_argmax_infer.tgz) \| [FCN-HRNet-W18-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/FCN_HRNet_W18_cityscapes_without_argmax_infer.tgz)(暂时不支持ONNXRuntime的GPU推理) |  37MB | 1024x512 | 78.97% | 79.49% | 79.74% |
+| [Deeplabv3-ResNet101-OS8-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Deeplabv3_ResNet101_OS8_cityscapes_with_argmax_infer.tgz) \| [Deeplabv3-ResNet101-OS8-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Deeplabv3_ResNet101_OS8_cityscapes_without_argmax_infer.tgz) |  150MB | 1024x512 | 79.90% | 80.22% | 80.47% |
+| [SegFormer_B0-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/SegFormer_B0-cityscapes-with-argmax.tgz) \| [SegFormer_B0-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/SegFormer_B0-cityscapes-without-argmax.tgz) |  15MB | 1024x1024 | 76.73% | 77.16% | - |
+
 ## 详细部署文档
 
 - [fastdeploy serving](fastdeploy_serving)
diff --git a/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/README.md b/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/README.md
index a451e8730..c5b6dd41f 100644
--- a/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/README.md
+++ b/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/README.md
@@ -1,6 +1,8 @@
 English | [简体中文](README_CN.md)
 # PaddleSegmentation Serving Deployment Demo
 
+Before serving deployment, it is necessary to confirm the hardware and software environment requirements of the service image and the image pull command, please refer to [FastDeploy service deployment](https://github.com/PaddlePaddle/FastDeploy/blob/develop/serving/README.md)
+
 ## Launch Serving
 
 ```bash
diff --git a/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/README_CN.md b/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/README_CN.md
index ac8965d75..ae346cb5b 100644
--- a/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/README_CN.md
@@ -1,9 +1,7 @@
 [English](README.md) | 简体中文
 # PaddleSeg 服务化部署示例
 
-在服务化部署前，需确认
-
-- 1. 服务化镜像的软硬件环境要求和镜像拉取命令请参考[FastDeploy服务化部署](https://github.com/PaddlePaddle/FastDeploy/blob/develop/serving/README_CN.md)
+在服务化部署前，需确认服务化镜像的软硬件环境要求和镜像拉取命令，请参考[FastDeploy服务化部署](https://github.com/PaddlePaddle/FastDeploy/blob/develop/serving/README_CN.md)
 
 
 ## 启动服务
diff --git a/examples/vision/segmentation/paddleseg/serving/simple_serving/README.md b/examples/vision/segmentation/paddleseg/serving/simple_serving/README.md
index da41a3a00..686164ad7 100644
--- a/examples/vision/segmentation/paddleseg/serving/simple_serving/README.md
+++ b/examples/vision/segmentation/paddleseg/serving/simple_serving/README.md
@@ -5,7 +5,7 @@ English | [简体中文](README_CN.md)
 
 ## Environment
 
-- 1. Prepare environment and install FastDeploy Python whl, refer to [download_prebuilt_libraries](../../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
+- 1. Prepare environment and install FastDeploy Python whl, refer to [download_prebuilt_libraries](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/en/build_and_install#install-prebuilt-fastdeploy)
 
 Server:
 ```bash
diff --git a/examples/vision/segmentation/paddleseg/serving/simple_serving/README_CN.md b/examples/vision/segmentation/paddleseg/serving/simple_serving/README_CN.md
index d12bb9f2e..db06103ed 100644
--- a/examples/vision/segmentation/paddleseg/serving/simple_serving/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/serving/simple_serving/README_CN.md
@@ -2,10 +2,9 @@
 
 # PaddleSeg Python轻量服务化部署示例
 
-在部署前，需确认以下两个步骤
+## 部署环境准备
 
-- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/download_prebuilt_libraries.md)  
-- 2. FastDeploy Python whl包安装，参考[FastDeploy Python安装](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/download_prebuilt_libraries.md)
+在部署前，需确认软硬件环境，同时下载预编译python wheel 包，参考文档[FastDeploy预编译库安装](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#FastDeploy预编译库安装)
 
 服务端：
 ```bash
diff --git a/examples/vision/segmentation/paddleseg/sophgo/README.md b/examples/vision/segmentation/paddleseg/sophgo/README.md
index 1c08a5b7a..366656a75 100644
--- a/examples/vision/segmentation/paddleseg/sophgo/README.md
+++ b/examples/vision/segmentation/paddleseg/sophgo/README.md
@@ -1,8 +1,13 @@
 [English](README.md) | 简体中文
-# PaddleSeg C++部署示例
+# PaddleSeg在算能（Sophgo）硬件上通过FastDeploy部署模型
+PaddleSeg支持通过FastDeploy在算能TPU上部署相关Segmentation模型
 
-## 支持模型列表
+## 算能硬件支持的PaddleSeg模型
 
+- [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg)
+>> **注意**：支持PaddleSeg高于2.6版本的Segmentation模型
+
+目前算能TPU支持的模型如下：
 - [PP-LiteSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/configs/pp_liteseg/README.md)
 
 ## 预导出的推理模型
diff --git a/examples/vision/segmentation/ppmatting/README.md b/examples/vision/segmentation/ppmatting/README.md
index a2cbdc346..b3dd9cc80 100644
--- a/examples/vision/segmentation/ppmatting/README.md
+++ b/examples/vision/segmentation/ppmatting/README.md
@@ -1,42 +1,22 @@
-English | [简体中文](README_CN.md)
-# PP-Matting Model Deployment
+# PaddleSeg高性能全场景模型部署方案—FastDeploy
 
-## Model Description
+## FastDeploy介绍
 
-- [PP-Matting Release/2.6](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
+[FastDeploy](https://github.com/PaddlePaddle/FastDeploy)是一款全场景、易用灵活、极致高效的AI推理部署工具，使用FastDeploy可以简单高效的在10+款硬件上对PaddleSeg Matting模型进行快速部署
 
-## List of Supported Models
+## 支持如下的硬件部署
 
-Now FastDeploy supports the deployment of the following models
+| 硬件支持列表 |  |   |   |
+|:----- | :-- | :-- | :-- |
+| [NVIDIA GPU](cpu-gpu) | [X86 CPU](cpu-gpu)| [飞腾CPU](cpu-gpu) | [ARM CPU](cpu-gpu) |
+| [Intel GPU(独立显卡/集成显卡)](cpu-gpu) | [昆仑](cpu-gpu) | [昇腾](cpu-gpu) |
 
-- [PP-Matting models](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
-- [PP-HumanMatting models](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
-- [ModNet models](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
+## 常见问题
 
+遇到问题可查看常见问题集合文档或搜索FastDeploy issues，链接如下：
 
-## Export Deployment Model
+[常见问题集合](https://github.com/PaddlePaddle/FastDeploy/tree/develop/docs/cn/faq)
 
-Before deployment, PP-Matting needs to be exported into the deployment model. Refer to [Export Model](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting) for more information. (Tips: You need to set the `--input_shape` parameter of the export script when exporting PP-Matting and PP-HumanMatting models)
+[FastDeploy issues](https://github.com/PaddlePaddle/FastDeploy/issues)
 
-
-## Download Pre-trained Models
-
-For developers' testing, models exported by PP-Matting are provided below. Developers can download and use them directly.
-
-The accuracy metric is sourced from the model description in PP-Matting. (Accuracy data are not provided) Refer to the introduction in PP-Matting for more details.
-
-| Model                                                               | Parameter Size    | Accuracy    | Note |
-|:---------------------------------------------------------------- |:----- |:----- | :------ |
-| [PP-Matting-512](https://bj.bcebos.com/paddlehub/fastdeploy/PP-Matting-512.tgz) | 106MB | - |
-| [PP-Matting-1024](https://bj.bcebos.com/paddlehub/fastdeploy/PP-Matting-1024.tgz) | 106MB | - |
-| [PP-HumanMatting](https://bj.bcebos.com/paddlehub/fastdeploy/PPHumanMatting.tgz) | 247MB | - |
-| [Modnet-ResNet50_vd](https://bj.bcebos.com/paddlehub/fastdeploy/PPModnet_ResNet50_vd.tgz) | 355MB | - |
-| [Modnet-MobileNetV2](https://bj.bcebos.com/paddlehub/fastdeploy/PPModnet_MobileNetV2.tgz) | 28MB | - |
-| [Modnet-HRNet_w18](https://bj.bcebos.com/paddlehub/fastdeploy/PPModnet_HRNet_w18.tgz) | 51MB | - |
-
-
-
-## Detailed Deployment Tutorials
-
-- [Python Deployment](python)
-- [C++ Deployment](cpp)
+若以上方式都无法解决问题，欢迎给FastDeploy提交新的[issue](https://github.com/PaddlePaddle/FastDeploy/issues)
diff --git a/examples/vision/segmentation/ppmatting/ascend/README.md b/examples/vision/segmentation/ppmatting/ascend/README.md
new file mode 120000
index 000000000..3ed44e130
--- /dev/null
+++ b/examples/vision/segmentation/ppmatting/ascend/README.md
@@ -0,0 +1 @@
+../cpu-gpu/README.md
\ No newline at end of file
diff --git a/examples/vision/segmentation/ppmatting/cpp/README.md b/examples/vision/segmentation/ppmatting/cpp/README.md
deleted file mode 100755
index f678fabd4..000000000
--- a/examples/vision/segmentation/ppmatting/cpp/README.md
+++ /dev/null
@@ -1,93 +0,0 @@
-English | [简体中文](README_CN.md)
-# PP-Matting C++ Deployment Example
-
-This directory provides examples that `infer.cc` fast finishes the deployment of PP-Matting on CPU/GPU and GPU accelerated by TensorRT.
-Before deployment, two steps require confirmation
-
-- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)  
-- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy  Precompiled Library](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
-
-Taking the PP-Matting inference on Linux as an example, the compilation test can be completed by executing the following command in this directory. FastDeploy version 0.7.0 or above (x.x.x>=0.7.0) is required to support this model.
-
-```bash
-mkdir build
-cd build
-# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy  Precompiled Library` mentioned above
-wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
-tar xvf fastdeploy-linux-x64-x.x.x.tgz
-cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
-make -j
-
-# Download PP-Matting model files and test images
-wget https://bj.bcebos.com/paddlehub/fastdeploy/PP-Matting-512.tgz
-tar -xvf PP-Matting-512.tgz
-wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_input.jpg
-wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_bgr.jpg
-
-
-# CPU inference
-./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 0
-# GPU inference
-./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 1
-# TensorRT inference on GPU
-./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 2
-# kunlunxin XPU inference
-./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 3
-```
-
-The visualized result after running is as follows
-<div width="840">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852040-759da522-fca4-4786-9205-88c622cd4a39.jpg">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852587-48895efc-d24a-43c9-aeec-d7b0362ab2b9.jpg">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852116-cf91445b-3a67-45d9-a675-c69fe77c383a.jpg">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852554-6960659f-4fd7-4506-b33b-54e1a9dd89bf.jpg">
-</div>
-
-The above command works for Linux or MacOS. For SDK use-pattern in Windows, refer to:
-- [How to use FastDeploy C++ SDK in Windows](../../../../../docs/en/faq/use_sdk_on_windows.md)
-
-## PP-Matting C++ Interface
-
-### PPMatting Class
-
-```c++
-fastdeploy::vision::matting::PPMatting(
-        const string& model_file,
-        const string& params_file = "",
-        const string& config_file,
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const ModelFormat& model_format = ModelFormat::PADDLE)
-```
-
-PP-Matting model loading and initialization, among which model_file is the exported Paddle model format.
-
-**Parameter**
-
-> * **model_file**(str): Model file path
-> * **params_file**(str): Parameter file path
-> * **config_file**(str): Inference deployment configuration file
-> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
-> * **model_format**(ModelFormat): Model format. Paddle format by default
-
-#### Predict Function
-
-> ```c++
-> PPMatting::Predict(cv::Mat* im, MattingResult* result)
-> ```
->
-> Model prediction interface. Input images and output detection results.
->
-> **Parameter**
->
-> > * **im**: Input images in HWC or BGR format
-> > * **result**: The segmentation result, including the predicted label of the segmentation and the corresponding probability of the label. Refer to [Vision Model Prediction Results](../../../../../docs/api/vision_results/) for the description of SegmentationResult
-
-### Class Member Variable
-#### Pre-processing Parameter
-Users can modify the following pre-processing parameters to their needs, which affects the final inference and deployment results
-
-
-- [Model Description](../../)
-- [Python Deployment](../python)
-- [Vision Model Prediction Results](../../../../../docs/api/vision_results/)
-- [How to switch the model inference backend engine](../../../../../docs/en/faq/how_to_change_backend.md)
diff --git a/examples/vision/segmentation/ppmatting/cpp/README_CN.md b/examples/vision/segmentation/ppmatting/cpp/README_CN.md
deleted file mode 100644
index 38e2e592a..000000000
--- a/examples/vision/segmentation/ppmatting/cpp/README_CN.md
+++ /dev/null
@@ -1,94 +0,0 @@
-[English](README.md) | 简体中文
-# PP-Matting C++部署示例
-
-本目录下提供`infer.cc`快速完成PP-Matting在CPU/GPU，以及GPU上通过TensorRT加速部署的示例。
-
-在部署前，需确认以下两个步骤
-
-- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
-- 2. 根据开发环境，下载预编译部署库和samples代码，参考[FastDeploy预编译库](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
-
-以Linux上 PP-Matting 推理为例，在本目录执行如下命令即可完成编译测试，支持此模型需保证FastDeploy版本0.7.0以上(x.x.x>=0.7.0)
-
-```bash
-mkdir build
-cd build
-# 下载FastDeploy预编译库，用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
-wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
-tar xvf fastdeploy-linux-x64-x.x.x.tgz
-cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
-make -j
-
-# 下载PP-Matting模型文件和测试图片
-wget https://bj.bcebos.com/paddlehub/fastdeploy/PP-Matting-512.tgz
-tar -xvf PP-Matting-512.tgz
-wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_input.jpg
-wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_bgr.jpg
-
-
-# CPU推理
-./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 0
-# GPU推理
-./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 1
-# GPU上TensorRT推理
-./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 2
-# 昆仑芯XPU推理
-./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 3
-```
-
-运行完成可视化结果如下图所示
-<div width="840">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852040-759da522-fca4-4786-9205-88c622cd4a39.jpg">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852587-48895efc-d24a-43c9-aeec-d7b0362ab2b9.jpg">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852116-cf91445b-3a67-45d9-a675-c69fe77c383a.jpg">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852554-6960659f-4fd7-4506-b33b-54e1a9dd89bf.jpg">
-</div>
-
-以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:  
-- [如何在Windows中使用FastDeploy C++ SDK](../../../../../docs/cn/faq/use_sdk_on_windows.md)
-
-## PP-Matting C++接口
-
-### PPMatting类
-
-```c++
-fastdeploy::vision::matting::PPMatting(
-        const string& model_file,
-        const string& params_file = "",
-        const string& config_file,
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const ModelFormat& model_format = ModelFormat::PADDLE)
-```
-
-PP-Matting模型加载和初始化，其中model_file为导出的Paddle模型格式。
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **config_file**(str): 推理部署配置文件
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(ModelFormat): 模型格式，默认为Paddle格式
-
-#### Predict函数
-
-> ```c++
-> PPMatting::Predict(cv::Mat* im, MattingResult* result)
-> ```
->
-> 模型预测接口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **im**: 输入图像，注意需为HWC，BGR格式
-> > * **result**: 分割结果，包括分割预测的标签以及标签对应的概率值, MattingResult说明参考[视觉模型预测结果](../../../../../docs/api/vision_results/)
-
-### 类成员属性
-#### 预处理参数
-用户可按照自己的实际需求，修改下列预处理参数，从而影响最终的推理和部署效果
-
-
-- [模型介绍](../../)
-- [Python部署](../python)
-- [视觉模型预测结果](../../../../../docs/api/vision_results/)
-- [如何切换模型推理后端引擎](../../../../../docs/cn/faq/how_to_change_backend.md)
diff --git a/examples/vision/segmentation/ppmatting/README_CN.md b/examples/vision/segmentation/ppmatting/cpu-gpu/README.md
similarity index 56%
rename from examples/vision/segmentation/ppmatting/README_CN.md
rename to examples/vision/segmentation/ppmatting/cpu-gpu/README.md
index a1c9801aa..e590ac42c 100644
--- a/examples/vision/segmentation/ppmatting/README_CN.md
+++ b/examples/vision/segmentation/ppmatting/cpu-gpu/README.md
@@ -1,30 +1,32 @@
-[English](README.md) | 简体中文
-# PP-Matting模型部署
+# PaddleSeg Matting模型高性能全场景部署方案-FastDeploy
+
+PaddleSeg通过[FastDeploy](https://github.com/PaddlePaddle/FastDeploy)支持在NVIDIA GPU、X86 CPU、飞腾CPU、ARM CPU、Intel GPU(独立显卡/集成显卡)、昆仑芯、华为昇腾硬件上部署Matting模型
 
 ## 模型版本说明
 
-- [PP-Matting Release/2.6](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
-
-## 支持模型列表
+- [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg/tree/develop)
+>> **注意**：支持PaddleSeg高于2.6版本的Matting模型
 
 目前FastDeploy支持如下模型的部署
 
-- [PP-Matting系列模型](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
-- [PP-HumanMatting系列模型](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
-- [ModNet系列模型](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
+- [PP-Matting系列模型](https://github.com/PaddlePaddle/PaddleSeg/tree/develop/Matting)
+- [PP-HumanMatting系列模型](https://github.com/PaddlePaddle/PaddleSeg/tree/develop/Matting)
+- [ModNet系列模型](https://github.com/PaddlePaddle/PaddleSeg/tree/develop/Matting)
 
 
-## 导出部署模型
+## 准备PaddleSeg部署模型
+在部署前，需要先将Matting模型导出成部署模型，导出步骤参考文档[导出模型](https://github.com/PaddlePaddle/PaddleSeg/tree/develop/Matting)
 
-在部署前，需要先将PP-Matting导出成部署模型，导出步骤参考文档[导出模型](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)(Tips:导出PP-Matting系列模型和PP-HumanMatting系列模型需要设置导出脚本的`--input_shape`参数)
+**注意**
+- PaddleSeg导出的模型包含`model.pdmodel`、`model.pdiparams`和`deploy.yaml`三个文件，FastDeploy会从yaml文件中获取模型在推理时需要的预处理信息
 
-
-## 下载预训练模型
+## 预导出的推理模型
 
 为了方便开发者的测试，下面提供了PP-Matting导出的各系列模型，开发者可直接下载使用。
 
 其中精度指标来源于PP-Matting中对各模型的介绍(未提供精度数据)，详情各参考PP-Matting中的说明。
 
+>> **注意**`deploy.yaml`文件记录导出模型的`input_shape`以及预处理信息，若不满足要求，用户可重新导出相关模型
 
 | 模型                                                               | 参数大小    | 精度    | 备注 |
 |:---------------------------------------------------------------- |:----- |:----- | :------ |
@@ -35,8 +37,6 @@
 | [Modnet-MobileNetV2](https://bj.bcebos.com/paddlehub/fastdeploy/PPModnet_MobileNetV2.tgz) | 28MB | - |
 | [Modnet-HRNet_w18](https://bj.bcebos.com/paddlehub/fastdeploy/PPModnet_HRNet_w18.tgz) | 51MB | - |
 
-
-
 ## 详细部署文档
 
 - [Python部署](python)
diff --git a/examples/vision/segmentation/ppmatting/cpp/CMakeLists.txt b/examples/vision/segmentation/ppmatting/cpu-gpu/cpp/CMakeLists.txt
similarity index 100%
rename from examples/vision/segmentation/ppmatting/cpp/CMakeLists.txt
rename to examples/vision/segmentation/ppmatting/cpu-gpu/cpp/CMakeLists.txt
diff --git a/examples/vision/segmentation/ppmatting/cpu-gpu/cpp/README.md b/examples/vision/segmentation/ppmatting/cpu-gpu/cpp/README.md
new file mode 100644
index 000000000..b88b79942
--- /dev/null
+++ b/examples/vision/segmentation/ppmatting/cpu-gpu/cpp/README.md
@@ -0,0 +1,60 @@
+[English](README.md) | 简体中文
+# PP-Matting C++部署示例
+
+本目录下提供`infer.cc`快速完成PP-Matting在CPU/GPU、昆仑芯、华为昇腾以及GPU上通过Paddle-TensorRT加速部署的示例。
+
+在部署前，需确认软硬件环境，同时下载预编译部署库，参考文档[FastDeploy预编译库安装](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install)
+
+>> **注意** 只有CPU、GPU提供预编译库，华为昇腾以及昆仑芯需要参考以上文档自行编译部署环境
+
+以Linux上推理为例，在本目录执行如下命令即可完成编译测试，支持此模型需保证FastDeploy版本1.0.0以上(x.x.x>=1.0.0)
+
+```bash
+mkdir build
+cd build
+# 下载FastDeploy预编译库，用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-x.x.x.tgz
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j
+
+# 下载PP-Matting模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP-Matting-512.tgz
+tar -xvf PP-Matting-512.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_input.jpg
+wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_bgr.jpg
+
+
+# CPU推理
+./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 0
+# GPU推理
+./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 1
+# GPU上TensorRT推理
+./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 2
+# 昆仑芯XPU推理
+./infer_demo PP-Matting-512 matting_input.jpg matting_bgr.jpg 3
+```
+>> ***注意** 以上示例未提供华为昇腾的示例，在编译好昇腾部署环境后，只需改造一行代码，将示例文件中KunlunXinInfer方法的`option.UseKunlunXin()`为`option.UseAscend()`就可以完成在华为昇腾上的推理部署
+
+运行完成可视化结果如下图所示
+<div width="840">
+<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852040-759da522-fca4-4786-9205-88c622cd4a39.jpg">
+<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852587-48895efc-d24a-43c9-aeec-d7b0362ab2b9.jpg">
+<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852116-cf91445b-3a67-45d9-a675-c69fe77c383a.jpg">
+<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852554-6960659f-4fd7-4506-b33b-54e1a9dd89bf.jpg">
+</div>
+
+以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:  
+- [如何在Windows中使用FastDeploy C++ SDK](../../../../../docs/cn/faq/use_sdk_on_windows.md)
+
+## 快速链接
+- [PaddleSeg C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1segmentation.html)
+- [FastDeploy部署PaddleSeg模型概览](../../)
+- [Python部署](../python)
+
+## 常见问题
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
diff --git a/examples/vision/segmentation/ppmatting/cpp/infer.cc b/examples/vision/segmentation/ppmatting/cpu-gpu/cpp/infer.cc
similarity index 97%
rename from examples/vision/segmentation/ppmatting/cpp/infer.cc
rename to examples/vision/segmentation/ppmatting/cpu-gpu/cpp/infer.cc
index e7f1ffbcb..3af778f88 100644
--- a/examples/vision/segmentation/ppmatting/cpp/infer.cc
+++ b/examples/vision/segmentation/ppmatting/cpu-gpu/cpp/infer.cc
@@ -121,6 +121,10 @@ void TrtInfer(const std::string& model_dir, const std::string& image_file,
   auto option = fastdeploy::RuntimeOption();
   option.UseGpu();
   option.UseTrtBackend();
+  // If use original Tensorrt, not Paddle-TensorRT,
+  // comment the following two lines
+  option.EnablePaddleToTrt();
+  option.EnablePaddleTrtCollectShape();
   option.SetTrtInputShape("img", {1, 3, 512, 512});
   auto model = fastdeploy::vision::matting::PPMatting(model_file, params_file,
                                                       config_file, option);
diff --git a/examples/vision/segmentation/ppmatting/cpu-gpu/python/README.md b/examples/vision/segmentation/ppmatting/cpu-gpu/python/README.md
new file mode 100644
index 000000000..2adb2458a
--- /dev/null
+++ b/examples/vision/segmentation/ppmatting/cpu-gpu/python/README.md
@@ -0,0 +1,52 @@
+[English](README.md) | 简体中文
+# PP-Matting Python部署示例
+
+本目录下提供`infer.py`快速完成PP-Matting在CPU/GPU、昆仑芯、华为昇腾，以及GPU上通过Paddle-TensorRT加速部署的示例。执行如下脚本即可完成
+
+## 部署环境准备
+
+在部署前，需确认软硬件环境，同时下载预编译python wheel 包，参考文档[FastDeploy预编译库安装](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install)
+>> **注意** 只有CPU、GPU提供预编译库，华为昇腾以及昆仑芯需要参考以上文档自行编译部署环境
+
+
+```bash
+#下载部署示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/matting/ppmatting/python
+
+# 下载PP-Matting模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP-Matting-512.tgz
+tar -xvf PP-Matting-512.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_input.jpg
+wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_bgr.jpg
+# CPU推理
+python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device cpu
+# GPU推理
+python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device gpu
+# GPU上使用TensorRT推理 （注意：TensorRT推理第一次运行，有序列化模型的操作，有一定耗时，需要耐心等待）
+python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device gpu --use_trt True
+# 昆仑芯XPU推理
+python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device kunlunxin
+```
+>> ***注意** 以上示例未提供华为昇腾的示例，在编译好昇腾部署环境后，只需改造一行代码，将示例文件中的`option.use_kunlunxin()`为`option.use_ascend()`就可以完成在华为昇腾上的推理部署
+
+运行完成可视化结果如下图所示
+<div width="840">
+<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852040-759da522-fca4-4786-9205-88c622cd4a39.jpg">
+<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852587-48895efc-d24a-43c9-aeec-d7b0362ab2b9.jpg">
+<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852116-cf91445b-3a67-45d9-a675-c69fe77c383a.jpg">
+<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852554-6960659f-4fd7-4506-b33b-54e1a9dd89bf.jpg">
+</div>
+
+## 快速链接
+- [PaddleSeg python API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/semantic_segmentation.html)
+- [FastDeploy部署PaddleSeg模型概览](..)
+- [PaddleSeg C++部署](../cpp)
+
+## 常见问题
+- [如何将模型预测结果SegmentationResult转为numpy格式](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/vision_result_related_problems.md)
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
diff --git a/examples/vision/segmentation/ppmatting/python/infer.py b/examples/vision/segmentation/ppmatting/cpu-gpu/python/infer.py
similarity index 91%
rename from examples/vision/segmentation/ppmatting/python/infer.py
rename to examples/vision/segmentation/ppmatting/cpu-gpu/python/infer.py
index 512d0ca86..4d7ebb978 100755
--- a/examples/vision/segmentation/ppmatting/python/infer.py
+++ b/examples/vision/segmentation/ppmatting/cpu-gpu/python/infer.py
@@ -38,6 +38,10 @@ def build_option(args):
 
     if args.use_trt:
         option.use_trt_backend()
+        # If use original Tensorrt, not Paddle-TensorRT,
+        # comment the following two lines
+        option.enable_paddle_to_trt()
+        option.enable_paddle_trt_collect_shape()
         option.set_trt_input_shape("img", [1, 3, 512, 512])
 
     if args.device.lower() == "kunlunxin":
diff --git a/examples/vision/segmentation/ppmatting/kunlun/README.md b/examples/vision/segmentation/ppmatting/kunlun/README.md
new file mode 120000
index 000000000..3ed44e130
--- /dev/null
+++ b/examples/vision/segmentation/ppmatting/kunlun/README.md
@@ -0,0 +1 @@
+../cpu-gpu/README.md
\ No newline at end of file
diff --git a/examples/vision/segmentation/ppmatting/python/README.md b/examples/vision/segmentation/ppmatting/python/README.md
deleted file mode 100755
index fb7d42617..000000000
--- a/examples/vision/segmentation/ppmatting/python/README.md
+++ /dev/null
@@ -1,81 +0,0 @@
-English | [简体中文](README_CN.md)
-# PP-Matting Python Deployment Example
-
-Before deployment, two steps require confirmation
-
-- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)  
-- 2. Install FastDeploy Python whl package. Refer to [FastDeploy Python Installation](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
-
-This directory provides examples that `infer.py`  fast finishes the deployment of PP-Matting on CPU/GPU and GPU accelerated by TensorRT. The script is as follows
-```bash
-# Download the deployment example code
-git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd FastDeploy/examples/vision/matting/ppmatting/python
-
-# Download PP-Matting model files and test images
-wget https://bj.bcebos.com/paddlehub/fastdeploy/PP-Matting-512.tgz
-tar -xvf PP-Matting-512.tgz
-wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_input.jpg
-wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_bgr.jpg
-# CPU inference
-python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device cpu
-# GPU inference
-python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device gpu
-# TensorRT inference on GPU（Attention: It is somewhat time-consuming for the operation of model serialization when running TensorRT inference for the first time. Please be patient.）
-python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device gpu --use_trt True
-# kunlunxin XPU inference
-python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device kunlunxin
-```
-
-The visualized result after running is as follows
-<div width="840">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852040-759da522-fca4-4786-9205-88c622cd4a39.jpg">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852587-48895efc-d24a-43c9-aeec-d7b0362ab2b9.jpg">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852116-cf91445b-3a67-45d9-a675-c69fe77c383a.jpg">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852554-6960659f-4fd7-4506-b33b-54e1a9dd89bf.jpg">
-</div>
-## PP-Matting Python Interface
-
-```python
-fd.vision.matting.PPMatting(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
-```
-
-PP-Matting model loading and initialization, among which model_file, params_file, and config_file are the Paddle inference files exported from the training model. Refer to [Model Export](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)  for more information
-
-**Parameter**
-
-> * **model_file**(str): Model file path
-> * **params_file**(str): Parameter file path
-> * **config_file**(str): Inference deployment configuration file
-> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
-> * **model_format**(ModelFormat): Model format. Paddle format by default
-
-### predict function
-
-> ```python
-> PPMatting.predict(input_image)
-> ```
->
-> Model prediction interface. Input images and output detection results.
->
-> **Parameter**
->
-> > * **input_image**(np.ndarray): Input data in HWC or BGR format
-
-> **Return**
->
-> > Return `fastdeploy.vision.MattingResult` structure. Refer to [Vision Model Prediction Results](../../../../../docs/api/vision_results/) for the description of the structure.
-
-### Class Member Variable
-
-#### Pre-processing Parameter
-Users can modify the following pre-processing parameters to their needs, which affects the final inference and deployment results
-
-
-
-## Other Documents
-
-- [PP-Matting Model Description](..)
-- [PP-Matting C++ Deployment](../cpp)
-- [Model Prediction Results](../../../../../docs/api/vision_results/)
-- [How to switch the model inference backend engine](../../../../../docs/en/faq/how_to_change_backend.md)
diff --git a/examples/vision/segmentation/ppmatting/python/README_CN.md b/examples/vision/segmentation/ppmatting/python/README_CN.md
deleted file mode 100644
index 5e676a1cf..000000000
--- a/examples/vision/segmentation/ppmatting/python/README_CN.md
+++ /dev/null
@@ -1,80 +0,0 @@
-[English](README.md) | 简体中文
-# PP-Matting Python部署示例
-
-在部署前，需确认以下两个步骤
-
-- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
-- 2. FastDeploy Python whl包安装，参考[FastDeploy Python安装](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
-
-本目录下提供`infer.py`快速完成PP-Matting在CPU/GPU，以及GPU上通过TensorRT加速部署的示例。执行如下脚本即可完成
-
-```bash
-#下载部署示例代码
-git clone https://github.com/PaddlePaddle/FastDeploy.git
-cd FastDeploy/examples/vision/matting/ppmatting/python
-
-# 下载PP-Matting模型文件和测试图片
-wget https://bj.bcebos.com/paddlehub/fastdeploy/PP-Matting-512.tgz
-tar -xvf PP-Matting-512.tgz
-wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_input.jpg
-wget https://bj.bcebos.com/paddlehub/fastdeploy/matting_bgr.jpg
-# CPU推理
-python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device cpu
-# GPU推理
-python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device gpu
-# GPU上使用TensorRT推理 （注意：TensorRT推理第一次运行，有序列化模型的操作，有一定耗时，需要耐心等待）
-python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device gpu --use_trt True
-# 昆仑芯XPU推理
-python infer.py --model PP-Matting-512 --image matting_input.jpg --bg matting_bgr.jpg --device kunlunxin
-```
-
-运行完成可视化结果如下图所示
-<div width="840">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852040-759da522-fca4-4786-9205-88c622cd4a39.jpg">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852587-48895efc-d24a-43c9-aeec-d7b0362ab2b9.jpg">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852116-cf91445b-3a67-45d9-a675-c69fe77c383a.jpg">
-<img width="200" height="200" float="left" src="https://user-images.githubusercontent.com/67993288/186852554-6960659f-4fd7-4506-b33b-54e1a9dd89bf.jpg">
-</div>
-## PP-Matting Python接口
-
-```python
-fd.vision.matting.PPMatting(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
-```
-
-PP-Matting模型加载和初始化，其中model_file, params_file以及config_file为训练模型导出的Paddle inference文件，具体请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/Matting)
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **params_file**(str): 参数文件路径
-> * **config_file**(str): 推理部署配置文件
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-> * **model_format**(ModelFormat): 模型格式，默认为Paddle格式
-
-### predict函数
-
-> ```python
-> PPMatting.predict(input_image)
-> ```
->
-> 模型预测结口，输入图像直接输出检测结果。
->
-> **参数**
->
-> > * **input_image**(np.ndarray): 输入数据，注意需为HWC，BGR格式
-
-> **返回**
->
-> > 返回`fastdeploy.vision.MattingResult`结构体，结构体说明参考文档[视觉模型预测结果](../../../../../docs/api/vision_results/)
-
-### 类成员属性
-#### 预处理参数
-用户可按照自己的实际需求，修改下列预处理参数，从而影响最终的推理和部署效果
-
-
-## 其它文档
-
-- [PP-Matting 模型介绍](..)
-- [PP-Matting C++部署](../cpp)
-- [模型预测结果说明](../../../../../docs/api/vision_results/)
-- [如何切换模型推理后端引擎](../../../../../docs/cn/faq/how_to_change_backend.md)

From ba5a1b79e1162e473e44dbc12a221cda13a0881a Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Mon, 13 Feb 2023 07:20:07 +0000
Subject: [PATCH 16/41] Update by comments

---
 docs/api_docs/cpp/vision_results_cn.md        | 34 +++++++++++++----
 docs/api_docs/cpp/vision_results_en.md        | 38 +++++++++++++------
 docs/cn/build_and_install/huawei_ascend.md    |  2 +-
 docs/cn/faq/vision_result_related_problems.md |  2 +-
 .../segmentation/paddleseg/android/README.md  |  4 +-
 .../paddleseg/android/README_CN.md            |  4 +-
 6 files changed, 58 insertions(+), 26 deletions(-)

diff --git a/docs/api_docs/cpp/vision_results_cn.md b/docs/api_docs/cpp/vision_results_cn.md
index 73af7f9ed..669343eb4 100644
--- a/docs/api_docs/cpp/vision_results_cn.md
+++ b/docs/api_docs/cpp/vision_results_cn.md
@@ -1,3 +1,5 @@
+[English](./vision_results_en.md) | 简体中文
+
 # 视觉模型预测结果说明
 
 ## ClassifyResult 图像分类结果
@@ -6,7 +8,9 @@ ClassifyResult代码定义在`fastdeploy/vision/common/result.h`中，用于表
 
 ### C++ 定义
 
-`fastdeploy::vision::ClassifyResult`
+```c++
+fastdeploy::vision::ClassifyResult
+```
 
 ```c++
 struct ClassifyResult {
@@ -28,7 +32,9 @@ SegmentationResult代码定义在`fastdeploy/vision/common/result.h`中，用于
 
 ### C++ 定义
 
-`fastdeploy::vision::SegmentationResult`
+```c++
+fastdeploy::vision::SegmentationResult
+```
 
 ```c++
 struct SegmentationResult {
@@ -102,7 +108,9 @@ FaceAlignmentResult 代码定义在`fastdeploy/vision/common/result.h`中，用
 
 ### C++ 定义
 
-`fastdeploy::vision::FaceAlignmentResult`
+```c++
+fastdeploy::vision::FaceAlignmentResult
+```
 
 ```c++
 struct FaceAlignmentResult {
@@ -122,7 +130,9 @@ KeyPointDetectionResult 代码定义在`fastdeploy/vision/common/result.h`中，
 
 ### C++ 定义
 
-`fastdeploy::vision::KeyPointDetectionResult`
+```c++
+fastdeploy::vision::KeyPointDetectionResult
+```
 
 ```c++
 struct KeyPointDetectionResult {
@@ -152,7 +162,9 @@ struct KeyPointDetectionResult {
 FaceRecognitionResult 代码定义在`fastdeploy/vision/common/result.h`中，用于表明人脸识别模型对图像特征的embedding。
 ### C++ 定义
 
-`fastdeploy::vision::FaceRecognitionResult`
+```c++
+fastdeploy::vision::FaceRecognitionResult
+```
 
 ```c++
 struct FaceRecognitionResult {
@@ -174,7 +186,9 @@ MattingResult 代码定义在`fastdeploy/vision/common/result.h`中，用于表
 
 ### C++ 定义
 
-`fastdeploy::vision::MattingResult`
+```c++
+fastdeploy::vision::MattingResult
+```
 
 ```c++
 struct MattingResult {
@@ -232,7 +246,9 @@ FaceDetectionResult 代码定义在`fastdeploy/vision/common/result.h`中，用
 
 ### C++ 定义
 
-`fastdeploy::vision::FaceDetectionResult`
+```c++
+fastdeploy::vision::FaceDetectionResult
+```
 
 ```c++
 struct FaceDetectionResult {
@@ -258,7 +274,9 @@ HeadPoseResult 代码定义在`fastdeploy/vision/common/result.h`中，用于表
 
 ### C++ 定义
 
-`fastdeploy::vision::HeadPoseResult`
+```c++
+fastdeploy::vision::HeadPoseResult
+```
 
 ```c++
 struct HeadPoseResult {
diff --git a/docs/api_docs/cpp/vision_results_en.md b/docs/api_docs/cpp/vision_results_en.md
index 122e90293..356f66b8e 100644
--- a/docs/api_docs/cpp/vision_results_en.md
+++ b/docs/api_docs/cpp/vision_results_en.md
@@ -1,6 +1,6 @@
-# Description of Vision Results
+English | [简体中文](./vision_results_cn.md)
 
-本文档的中文版本参考[视觉模型预测结果说明](./vision_results_cn.md)
+# Description of Vision Results
 
 ## Image Classification Result
 
@@ -8,7 +8,9 @@ The ClassifyResult code is defined in `fastdeploy/vision/common/result.h`, and i
 
 ### C++ Definition
 
-`fastdeploy::vision::ClassifyResult`
+```c++
+fastdeploy::vision::ClassifyResult
+```
 
 ```c++
 struct ClassifyResult {
@@ -31,7 +33,9 @@ The SegmentationResult code is defined in `fastdeploy/vision/common/result.h`, i
 
 ### C++ Definition
 
-``fastdeploy::vision::SegmentationResult``
+```c++
+fastdeploy::vision::SegmentationResult
+```
 
 ```c++
 struct SegmentationResult {
@@ -104,7 +108,9 @@ The FaceDetectionResult code is defined in `fastdeploy/vision/common/result.h`,
 
 ### C++ Definition
 
-``fastdeploy::vision::FaceDetectionResult``
+```c++
+fastdeploy::vision::FaceDetectionResult
+```
 
 ```c++
 struct FaceDetectionResult {
@@ -131,7 +137,9 @@ The KeyPointDetectionResult code is defined in `fastdeploy/vision/common/result.
 
 ### C++ Definition
 
-``fastdeploy::vision::KeyPointDetectionResult``
+```c++
+fastdeploy::vision::KeyPointDetectionResult
+```
 
 ```c++
 struct KeyPointDetectionResult {
@@ -161,7 +169,9 @@ struct KeyPointDetectionResult {
 The FaceRecognitionResult code is defined in `fastdeploy/vision/common/result.h`, and is used to indicate the image features embedding in the face recognition model.
 ### C++ Definition
 
-`fastdeploy::vision::FaceRecognitionResult`
+```c++
+fastdeploy::vision::FaceRecognitionResult
+```
 
 ```c++
 struct FaceRecognitionResult {
@@ -180,8 +190,9 @@ struct FaceRecognitionResult {
 The MattingResult code is defined in `fastdeploy/vision/common/result.h`, and is used to indicate the predicted value of alpha transparency predicted and the predicted foreground, etc.
 
 ### C++ Definition
-
-``fastdeploy::vision::MattingResult`''
+```c++
+fastdeploy::vision::MattingResult
+```
 
 ```c++
 struct MattingResult {
@@ -239,8 +250,9 @@ struct OCRResult {
 The FaceAlignmentResult code is defined in `fastdeploy/vision/common/result.h`, and is used to indicate face landmarks.
 
 ### C++ Definition
-
-`fastdeploy::vision::FaceAlignmentResult`
+```c++
+fastdeploy::vision::FaceAlignmentResult
+```
 
 ```c++
 struct FaceAlignmentResult {
@@ -261,7 +273,9 @@ The HeadPoseResult code is defined in `fastdeploy/vision/common/result.h`, and i
 
 ### C++ Definition
 
-``fastdeploy::vision::HeadPoseResult`''
+```c++
+fastdeploy::vision::HeadPoseResult
+```
 
 ```c++
 struct HeadPoseResult {
diff --git a/docs/cn/build_and_install/huawei_ascend.md b/docs/cn/build_and_install/huawei_ascend.md
index b95311a28..4e621e305 100644
--- a/docs/cn/build_and_install/huawei_ascend.md
+++ b/docs/cn/build_and_install/huawei_ascend.md
@@ -126,7 +126,7 @@ python setup.py bdist_wheel
 ## 五.昇腾部署时开启FlyCV
 [FlyCV](https://github.com/PaddlePaddle/FlyCV) 是一款高性能计算机图像处理库, 针对ARM架构做了很多优化, 相比其他图像处理库性能更为出色.
 FastDeploy现在已经集成FlyCV, 用户可以在支持的硬件平台上使用FlyCV, 实现模型端到端推理性能的加速.
-模型端到端推理中, 预处理和后处理阶段为CPU计算, 当用户使用ARM CPU + 昇腾的硬件平台时, 我们推荐用户使用FlyCV, 可以实现端到端的推理性能加速, 详见[FLyCV使用文档](../faq/boost_cv_by_flycv.md).
+模型端到端推理中, 预处理和后处理阶段为CPU计算, 当用户使用ARM CPU + 昇腾的硬件平台时, 我们推荐用户使用FlyCV, 可以实现端到端的推理性能加速, 详见[FlyCV使用文档](../faq/boost_cv_by_flycv.md).
 
 
 ## 六.昇腾部署Demo参考
diff --git a/docs/cn/faq/vision_result_related_problems.md b/docs/cn/faq/vision_result_related_problems.md
index 59ce781d8..0426a2d18 100644
--- a/docs/cn/faq/vision_result_related_problems.md
+++ b/docs/cn/faq/vision_result_related_problems.md
@@ -4,7 +4,7 @@
 ## 将视觉模型预测结果转换为numpy格式
 
 这里以[SegmentationResult](./segmentation_result_CN.md)为例，展示如何抽取SegmentationResult中的label_map或者score_map来转为numpy格式，同时也可以利用已有数据new SegmentationResult结构体
-```
+``` python
 import fastdeploy as fd
 import cv2
 import numpy as np
diff --git a/examples/vision/segmentation/paddleseg/android/README.md b/examples/vision/segmentation/paddleseg/android/README.md
index ab06e1868..ad363015b 100644
--- a/examples/vision/segmentation/paddleseg/android/README.md
+++ b/examples/vision/segmentation/paddleseg/android/README.md
@@ -1,5 +1,5 @@
 English | [简体中文](README_CN.md)
-# PaddleSeg Android Demo for Target Detection
+# PaddleSeg Android Demo for Image Segmentation
 
 For real-time portrait segmentation on Android, this demo has good ease of use and openness. You can run your own training model in the demo.
 
@@ -10,7 +10,7 @@ For real-time portrait segmentation on Android, this demo has good ease of use a
 
 ## Deployment Steps
 
-1. Target detection PaddleSeg Demo is located in `fastdeploy/examples/vision/segmentation/paddleseg/android` directory.
+1. Image Segmentation PaddleSeg Demo is located in `fastdeploy/examples/vision/segmentation/paddleseg/android` directory.
 2. Please use Android Studio to open paddleseg/android project.
 3. Connect your phone to your computer, turn on USB debugging and file transfer mode, and connect your own mobile device on Android Studio (your phone needs to be enabled to allow software installation from USB).
 
diff --git a/examples/vision/segmentation/paddleseg/android/README_CN.md b/examples/vision/segmentation/paddleseg/android/README_CN.md
index 30938b697..1414e8ae9 100644
--- a/examples/vision/segmentation/paddleseg/android/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/android/README_CN.md
@@ -1,5 +1,5 @@
 [English](README.md) | 简体中文
-# 目标检测 PaddleSeg Android Demo 使用文档  
+# 图像分割 PaddleSeg Android Demo 使用文档  
 
 在 Android 上实现实时的人像分割功能，此 Demo 有很好的的易用性和开放性，如在 Demo 中跑自己训练好的模型等。
 
@@ -10,7 +10,7 @@
 
 ## 部署步骤
 
-1. 目标检测 PaddleSeg Demo 位于 `path/to/paddleseg/android` 目录
+1. 图像分割 PaddleSeg Demo 位于 `path/to/paddleseg/android` 目录
 2. 用 Android Studio 打开 paddleseg/android 工程
 3. 手机连接电脑，打开 USB 调试和文件传输模式，并在 Android Studio 上连接自己的手机设备（手机需要开启允许从 USB 安装软件权限）
 

From 1d26cc59203772def073f5cbeccbb78d224b1d0a Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Mon, 13 Feb 2023 11:30:38 +0000
Subject: [PATCH 17/41] Delete redundant directory

---
 .../segmentation/paddleseg/cpp/README.md      | 98 -------------------
 1 file changed, 98 deletions(-)
 delete mode 100755 examples/vision/segmentation/paddleseg/cpp/README.md

diff --git a/examples/vision/segmentation/paddleseg/cpp/README.md b/examples/vision/segmentation/paddleseg/cpp/README.md
deleted file mode 100755
index 572e38078..000000000
--- a/examples/vision/segmentation/paddleseg/cpp/README.md
+++ /dev/null
@@ -1,98 +0,0 @@
-English | [简体中文](README_CN.md)
-# PaddleSeg C++ Deployment Example
-
-This directory provides examples that `infer.cc` fast finishes the deployment of Unet on CPU/GPU and GPU accelerated by TensorRT.
-
-Before deployment, two steps require confirmation
-
-- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
-- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
-
-【Attention】For the deployment of **PP-Matting**、**PP-HumanMatting** and **ModNet**, refer to [Matting Model Deployment](../../../matting)
-
-Taking the inference on Linux as an example, the compilation test can be completed by executing the following command in this directory. FastDeploy version 1.0.0 or above (x.x.x>=1.0.0) is required to support this model.
-
-```bash
-mkdir build
-cd build
-# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
-wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
-tar xvf fastdeploy-linux-x64-x.x.x.tgz
-cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
-make -j
-
-# Download Unet model files and test images
-wget https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz
-tar -xvf Unet_cityscapes_without_argmax_infer.tgz
-wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
-
-
-# CPU inference
-./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 0
-# GPU inference
-./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 1
-# TensorRT inference on GPU
-./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 2
-# kunlunxin XPU inference
-./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 3
-# Huawei Ascend Inference
-./infer_demo Unet_cityscapes_without_argmax_infer cityscapes_demo.png 4
-```
-
-The visualized result after running is as follows
-<div  align="center">  
-<img src="https://user-images.githubusercontent.com/16222477/191712880-91ae128d-247a-43e0-b1e3-cafae78431e0.jpg", width=512px, height=256px />
-</div>
-
-The above command works for Linux or MacOS. For SDK use-pattern in Windows, refer to:
-- [How to use FastDeploy C++ SDK in Windows](../../../../../docs/cn/faq/use_sdk_on_windows.md)
-
-## PaddleSeg C++ Interface
-
-### PaddleSeg Class
-
-```c++
-fastdeploy::vision::segmentation::PaddleSegModel(
-        const string& model_file,
-        const string& params_file = "",
-        const string& config_file,
-        const RuntimeOption& runtime_option = RuntimeOption(),
-        const ModelFormat& model_format = ModelFormat::PADDLE)
-```
-
-PaddleSegModel model loading and initialization, among which model_file is the exported Paddle model format.
-
-**Parameter**
-
-> * **model_file**(str): Model file path
-> * **params_file**(str): Parameter file path
-> * **config_file**(str): Inference deployment configuration file
-> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default, which is the default configuration
-> * **model_format**(ModelFormat): Model format. Paddle format by default
-
-#### Predict Function
-
-> ```c++
-> PaddleSegModel::Predict(cv::Mat* im, DetectionResult* result)
-> ```
->
-> Model prediction interface. Input images and output detection results.
->
-> **Parameter**
->
-> > * **im**: Input images in HWC or BGR format
-> > * **result**: The segmentation result, including the predicted label of the segmentation and the corresponding probability of the label. Refer to [Vision Model Prediction Results](../../../../../docs/api/vision_results/) for the description of SegmentationResult
-
-### Class Member Variable
-#### Pre-processing Parameter
-Users can modify the following pre-processing parameters to their needs, which affects the final inference and deployment results
-
-> > * **is_vertical_screen**(bool): For PP-HumanSeg models, the input image is portrait, height greater than a width, by setting this parameter to`true`
-
-#### Post-processing Parameter
-> > * **apply_softmax**(bool): The `apply_softmax` parameter is not specified when the model is exported. Set this parameter to `true` to normalize the probability result (score_map) of the predicted output segmentation label (label_map)
-
-- [Model Description](../../)
-- [Python Deployment](../python)
-- [Vision Model Prediction Results](../../../../../docs/api/vision_results/)
-- [How to switch the model inference backend engine](../../../../../docs/cn/faq/how_to_change_backend.md)

From e8ac9be57b0eefa80751c4d58d071b0b017092e2 Mon Sep 17 00:00:00 2001
From: huangjianhui <852142024@qq.com>
Date: Mon, 13 Feb 2023 20:02:44 +0800
Subject: [PATCH 18/41] Update README.md

---
 examples/vision/segmentation/paddleseg/quantize/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/vision/segmentation/paddleseg/quantize/README.md b/examples/vision/segmentation/paddleseg/quantize/README.md
index 708d21e93..632772247 100644
--- a/examples/vision/segmentation/paddleseg/quantize/README.md
+++ b/examples/vision/segmentation/paddleseg/quantize/README.md
@@ -12,6 +12,7 @@ FastDeploy 提供了一键模型自动化压缩工具, 能够简单地通过输
 用户也可以直接下载下表中的量化模型进行部署.(点击模型名字即可下载)
 
 | 模型                 | 量化方式   |
+|:----- | :-- |
 | [PP-LiteSeg-T(STDC1)-cityscapes](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_QAT_new.tar) |量化蒸馏训练 |
 
 量化后模型的Benchmark比较，请参考[量化模型 Benchmark](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/quantize.md)

From f43ff327522c9b601abcf1d264f7b744799bd680 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Wed, 15 Feb 2023 14:27:31 +0800
Subject: [PATCH 19/41] [C# api] add c sharp api for fastdeploy (#1246)

* add c sharp api for fastdeploy

* update accroding to c apis

* add cmakelist for c sharp api

* add cmakelists for c sharp

* fix cmakelists

* fix cmakelists

* add c sharp api for fastdeploy

* add ppyoloe demo

* add ppyoloe demo

* modify demo namespace code

* add readme

* fix format

* format code

* fix doc

---------

Co-authored-by: heliqi <1101791222@qq.com>
---
 CMakeLists.txt                                |  10 +-
 FastDeployCSharp.cmake                        |  14 +
 FastDeployCSharp.cmake.in                     |  13 +
 csharp/CMakeLists.txt                         |  58 ++
 csharp/fastdeploy/enum_varaibles.cs           |  53 ++
 csharp/fastdeploy/runtime_option.cs           | 541 ++++++++++++++++++
 csharp/fastdeploy/types_internal_c.cs         | 125 ++++
 .../vision/classification/ppcls/model.cs      | 100 ++++
 .../vision/detection/ppdet/model.cs           |  94 +++
 csharp/fastdeploy/vision/result.cs            | 272 +++++++++
 csharp/fastdeploy/vision/visualize.cs         |  45 ++
 .../paddledetection/csharp/CMakeLists.txt     |  23 +
 .../paddledetection/csharp/README.md          |  97 ++++
 .../paddledetection/csharp/README_CN.md       |  99 ++++
 .../paddledetection/csharp/infer_ppyoloe.cs   |  57 ++
 15 files changed, 1600 insertions(+), 1 deletion(-)
 create mode 100644 FastDeployCSharp.cmake
 create mode 100644 FastDeployCSharp.cmake.in
 create mode 100644 csharp/CMakeLists.txt
 create mode 100644 csharp/fastdeploy/enum_varaibles.cs
 create mode 100644 csharp/fastdeploy/runtime_option.cs
 create mode 100644 csharp/fastdeploy/types_internal_c.cs
 create mode 100644 csharp/fastdeploy/vision/classification/ppcls/model.cs
 create mode 100644 csharp/fastdeploy/vision/detection/ppdet/model.cs
 create mode 100644 csharp/fastdeploy/vision/result.cs
 create mode 100644 csharp/fastdeploy/vision/visualize.cs
 create mode 100644 examples/vision/detection/paddledetection/csharp/CMakeLists.txt
 create mode 100644 examples/vision/detection/paddledetection/csharp/README.md
 create mode 100644 examples/vision/detection/paddledetection/csharp/README_CN.md
 create mode 100644 examples/vision/detection/paddledetection/csharp/infer_ppyoloe.cs

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4f85653f2..fa99c6ff7 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -74,7 +74,7 @@ option(WITH_TIMVX "Whether to compile for TIMVX deploy." OFF)
 option(WITH_KUNLUNXIN "Whether to compile for KunlunXin XPU deploy." OFF)
 option(WITH_TESTING "Whether to compile with unittest." OFF)
 option(WITH_CAPI "Whether to compile with c api." OFF)
-
+option(WITH_CSHARPAPI "Whether to compile with c# api" OFF)
 ############################# Options for Android cross compiling #########################
 if(ANDROID)
   option(WITH_OPENCV_STATIC "Whether to use OpenCV static lib for Android." OFF)
@@ -424,8 +424,15 @@ if(WITH_CAPI)
   endif()
 endif()
 
+if(WITH_CSHARPAPI)
+  if(MSVC)
+  add_subdirectory(${PROJECT_SOURCE_DIR}/csharp)
+  endif()
+endif()
+
 
 configure_file(${PROJECT_SOURCE_DIR}/FastDeploy.cmake.in ${PROJECT_SOURCE_DIR}/FastDeploy.cmake @ONLY)
+configure_file(${PROJECT_SOURCE_DIR}/FastDeployCSharp.cmake.in ${PROJECT_SOURCE_DIR}/FastDeployCSharp.cmake @ONLY)
 configure_file(${PROJECT_SOURCE_DIR}/python/fastdeploy/c_lib_wrap.py.in ${PROJECT_SOURCE_DIR}/python/fastdeploy/c_lib_wrap.py)
 configure_file(${PROJECT_SOURCE_DIR}/python/scripts/process_libraries.py.in ${PROJECT_SOURCE_DIR}/python/scripts/process_libraries.py)
 
@@ -678,6 +685,7 @@ install(
   ${PROJECT_SOURCE_DIR}/ThirdPartyNotices.txt
   ${PROJECT_SOURCE_DIR}/VERSION_NUMBER
   ${PROJECT_SOURCE_DIR}/FastDeploy.cmake
+  ${PROJECT_SOURCE_DIR}/FastDeployCSharp.cmake
   ${PROJECT_SOURCE_DIR}/cmake/FastDeployConfig.cmake
   ${PROJECT_SOURCE_DIR}/cmake/utils.cmake
   ${PROJECT_SOURCE_DIR}/cmake/openmp.cmake
diff --git a/FastDeployCSharp.cmake b/FastDeployCSharp.cmake
new file mode 100644
index 000000000..3a3708140
--- /dev/null
+++ b/FastDeployCSharp.cmake
@@ -0,0 +1,14 @@
+list(APPEND FASTDEPLOY_DOTNET_REFERENCES 
+    "Microsoft.CSharp"
+    "System"
+    "System.Core"
+    "System.Data"
+    "System.Deployment"
+    "System.Drawing"
+    "System.Net.Http"
+    "System.Xml"
+    "System.Reflection"
+    "${CMAKE_CURRENT_LIST_DIR}/csharp_lib/fastdeploy_csharp.dll")
+
+set(FASTDEPLOY_PACKAGE_REFERENCES "OpenCvSharp4_4.7.0.20230115;OpenCvSharp4.runtime.win_4.7.0.20230115")
+
diff --git a/FastDeployCSharp.cmake.in b/FastDeployCSharp.cmake.in
new file mode 100644
index 000000000..43f798b36
--- /dev/null
+++ b/FastDeployCSharp.cmake.in
@@ -0,0 +1,13 @@
+list(APPEND FASTDEPLOY_DOTNET_REFERENCES
+    "Microsoft.CSharp"
+    "System"
+    "System.Core"
+    "System.Data"
+    "System.Deployment"
+    "System.Drawing"
+    "System.Net.Http"
+    "System.Xml"
+    "System.Reflection"
+    "${CMAKE_CURRENT_LIST_DIR}/csharp_lib/fastdeploy_csharp.dll")
+
+set(FASTDEPLOY_PACKAGE_REFERENCES "OpenCvSharp4_4.7.0.20230115;OpenCvSharp4.runtime.win_4.7.0.20230115")
diff --git a/csharp/CMakeLists.txt b/csharp/CMakeLists.txt
new file mode 100644
index 000000000..1e305f474
--- /dev/null
+++ b/csharp/CMakeLists.txt
@@ -0,0 +1,58 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+##################################### Building: FastDeploy C# API #######################################
+PROJECT(fastdeploy_csharp CSharp)
+
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+option(ENABLE_VISION "Whether to enable vision models usage." OFF)
+
+message("fastdeploy_csharp_SOURCE_DIR: ${fastdeploy_csharp_SOURCE_DIR}")
+file(GLOB_RECURSE DEPLOY_CSHARPAPI_SRCS ${fastdeploy_csharp_SOURCE_DIR}/fastdeploy/*.cs)
+if(NOT ENABLE_VISION)
+    file(GLOB_RECURSE DEPLOY_VISION_CSHARPAPI_SRCS ${fastdeploy_csharp_SOURCE_DIR}/fastdeploy/vision/*.cs)
+    list(REMOVE_ITEM DEPLOY_CSHARPAPI_SRCS ${DEPLOY_VISION_CSHARPAPI_SRCS})
+endif()
+
+# Define the DLL target, including all relevant project files.
+add_library(${PROJECT_NAME} SHARED ${DEPLOY_CSHARPAPI_SRCS})
+
+# Set the C# language version (defaults to 3.0 if not set).
+set(CMAKE_CSharp_FLAGS "/langversion:10")
+# Add in some .NET reference libraries.
+set_property(TARGET ${PROJECT_NAME} PROPERTY VS_DOTNET_REFERENCES
+    "Microsoft.CSharp"
+    "System"
+    "System.Core"
+    "System.Data"
+    "System.Deployment"
+    "System.Drawing"
+    "System.Net.Http"
+    "System.Xml"
+)
+
+
+set_property(TARGET ${PROJECT_NAME}
+    PROPERTY VS_PACKAGE_REFERENCES  "OpenCvSharp4_4.7.0.20230115"
+)
+
+##################################### Installing: FastDeploy C# API #######################################
+
+install(
+    TARGETS ${PROJECT_NAME}
+    LIBRARY DESTINATION csharp_lib
+    ARCHIVE DESTINATION csharp_lib
+    RUNTIME DESTINATION csharp_lib
+)
diff --git a/csharp/fastdeploy/enum_varaibles.cs b/csharp/fastdeploy/enum_varaibles.cs
new file mode 100644
index 000000000..b0888b71c
--- /dev/null
+++ b/csharp/fastdeploy/enum_varaibles.cs
@@ -0,0 +1,53 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+namespace fastdeploy {
+
+public enum ModelFormat {
+  AUTOREC,      ///< Auto recognize the model format by model file name
+  PADDLE,       ///< Model with paddlepaddle format
+  ONNX,         ///< Model with ONNX format
+  RKNN,         ///< Model with RKNN format
+  TORCHSCRIPT,  ///< Model with TorchScript format
+  SOPHGO,       ///< Model with SOPHGO format
+}
+
+public enum rknpu2_CpuName {
+  RK356X = 0, /* run on RK356X. */
+  RK3588 = 1, /* default,run on RK3588. */
+  UNDEFINED,
+}
+
+public enum rknpu2_CoreMask {
+  RKNN_NPU_CORE_AUTO = 0,  //< default, run on NPU core randomly.
+  RKNN_NPU_CORE_0 = 1,     //< run on NPU core 0.
+  RKNN_NPU_CORE_1 = 2,     //< run on NPU core 1.
+  RKNN_NPU_CORE_2 = 4,     //< run on NPU core 2.
+  RKNN_NPU_CORE_0_1 =
+      RKNN_NPU_CORE_0 | RKNN_NPU_CORE_1,  //< run on NPU core 1 and core 2.
+  RKNN_NPU_CORE_0_1_2 =
+      RKNN_NPU_CORE_0_1 | RKNN_NPU_CORE_2,  //< run on NPU core 1 and core 2.
+  RKNN_NPU_CORE_UNDEFINED,
+}
+
+public enum LitePowerMode {
+  LITE_POWER_HIGH = 0,       ///< Use Lite Backend with high power mode
+  LITE_POWER_LOW = 1,        ///< Use Lite Backend with low power mode
+  LITE_POWER_FULL = 2,       ///< Use Lite Backend with full power mode
+  LITE_POWER_NO_BIND = 3,    ///< Use Lite Backend with no bind power mode
+  LITE_POWER_RAND_HIGH = 4,  ///< Use Lite Backend with rand high mode
+  LITE_POWER_RAND_LOW = 5    ///< Use Lite Backend with rand low power mode
+}
+
+}
\ No newline at end of file
diff --git a/csharp/fastdeploy/runtime_option.cs b/csharp/fastdeploy/runtime_option.cs
new file mode 100644
index 000000000..781365558
--- /dev/null
+++ b/csharp/fastdeploy/runtime_option.cs
@@ -0,0 +1,541 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.IO;
+using System.Runtime.InteropServices;
+
+namespace fastdeploy {
+
+public class RuntimeOption {
+
+  public RuntimeOption() {
+    fd_runtime_option_wrapper = FD_C_CreateRuntimeOptionWrapper();
+  }
+
+  ~RuntimeOption() {
+    FD_C_DestroyRuntimeOptionWrapper(fd_runtime_option_wrapper);
+  }
+
+  public void SetModelPath(string model_path, string params_path = "",
+                           ModelFormat format = ModelFormat.PADDLE) {
+    FD_C_RuntimeOptionWrapperSetModelPath(fd_runtime_option_wrapper, model_path,
+                                          params_path, format);
+  }
+
+  public void SetModelBuffer(string model_buffer, string params_buffer = "",
+                             ModelFormat format = ModelFormat.PADDLE) {
+    FD_C_RuntimeOptionWrapperSetModelBuffer(
+        fd_runtime_option_wrapper, model_buffer, params_buffer, format);
+  }
+
+  public void UseCpu() {
+    FD_C_RuntimeOptionWrapperUseCpu(fd_runtime_option_wrapper);
+  }
+
+  public void UseGpu(int gpu_id = 0) {
+    FD_C_RuntimeOptionWrapperUseGpu(fd_runtime_option_wrapper, gpu_id);
+  }
+
+  public void
+  UseRKNPU2(rknpu2_CpuName rknpu2_name = rknpu2_CpuName.RK3588,
+            rknpu2_CoreMask rknpu2_core = rknpu2_CoreMask.RKNN_NPU_CORE_0) {
+    FD_C_RuntimeOptionWrapperUseRKNPU2(fd_runtime_option_wrapper, rknpu2_name,
+                                       rknpu2_core);
+  }
+
+  public void UseTimVX() {
+    FD_C_RuntimeOptionWrapperUseTimVX(fd_runtime_option_wrapper);
+  }
+
+  public void UseAscend() {
+    FD_C_RuntimeOptionWrapperUseAscend(fd_runtime_option_wrapper);
+  }
+
+  public void
+  UseKunlunXin(int kunlunxin_id = 0, int l3_workspace_size = 0xfffc00,
+               bool locked = false, bool autotune = true,
+               string autotune_file = "", string precision = "int16",
+               bool adaptive_seqlen = false, bool enable_multi_stream = false) {
+    FD_C_RuntimeOptionWrapperUseKunlunXin(
+        fd_runtime_option_wrapper, kunlunxin_id, l3_workspace_size, locked,
+        autotune, autotune_file, precision, adaptive_seqlen,
+        enable_multi_stream);
+  }
+
+  public void UseSophgo() {
+    FD_C_RuntimeOptionWrapperUseSophgo(fd_runtime_option_wrapper);
+  }
+
+  public void SetExternalStream(IntPtr external_stream) {
+    FD_C_RuntimeOptionWrapperSetExternalStream(fd_runtime_option_wrapper,
+                                               external_stream);
+  }
+
+  public void SetCpuThreadNum(int thread_num) {
+    FD_C_RuntimeOptionWrapperSetCpuThreadNum(fd_runtime_option_wrapper,
+                                             thread_num);
+  }
+
+  public void SetOrtGraphOptLevel(int level = -1) {
+    FD_C_RuntimeOptionWrapperSetOrtGraphOptLevel(fd_runtime_option_wrapper,
+                                                 level);
+  }
+
+  public void UsePaddleBackend() {
+    FD_C_RuntimeOptionWrapperUsePaddleBackend(fd_runtime_option_wrapper);
+  }
+
+  public void UsePaddleInferBackend() {
+    FD_C_RuntimeOptionWrapperUsePaddleInferBackend(fd_runtime_option_wrapper);
+  }
+
+  public void UseOrtBackend() {
+    FD_C_RuntimeOptionWrapperUseOrtBackend(fd_runtime_option_wrapper);
+  }
+
+  public void UseSophgoBackend() {
+    FD_C_RuntimeOptionWrapperUseSophgoBackend(fd_runtime_option_wrapper);
+  }
+
+  public void UseTrtBackend() {
+    FD_C_RuntimeOptionWrapperUseTrtBackend(fd_runtime_option_wrapper);
+  }
+
+  public void UsePorosBackend() {
+    FD_C_RuntimeOptionWrapperUsePorosBackend(fd_runtime_option_wrapper);
+  }
+
+  public void UseOpenVINOBackend() {
+    FD_C_RuntimeOptionWrapperUseOpenVINOBackend(fd_runtime_option_wrapper);
+  }
+
+  public void UseLiteBackend() {
+    FD_C_RuntimeOptionWrapperUseLiteBackend(fd_runtime_option_wrapper);
+  }
+
+  public void UsePaddleLiteBackend() {
+    FD_C_RuntimeOptionWrapperUsePaddleLiteBackend(fd_runtime_option_wrapper);
+  }
+
+  public void SetPaddleMKLDNN(bool pd_mkldnn = true) {
+    FD_C_RuntimeOptionWrapperSetPaddleMKLDNN(fd_runtime_option_wrapper,
+                                             pd_mkldnn);
+  }
+
+  public void EnablePaddleToTrt() {
+    FD_C_RuntimeOptionWrapperEnablePaddleToTrt(fd_runtime_option_wrapper);
+  }
+
+  public void DeletePaddleBackendPass(string delete_pass_name) {
+    FD_C_RuntimeOptionWrapperDeletePaddleBackendPass(fd_runtime_option_wrapper,
+                                                     delete_pass_name);
+  }
+
+  public void EnablePaddleLogInfo() {
+    FD_C_RuntimeOptionWrapperEnablePaddleLogInfo(fd_runtime_option_wrapper);
+  }
+
+  public void DisablePaddleLogInfo() {
+    FD_C_RuntimeOptionWrapperDisablePaddleLogInfo(fd_runtime_option_wrapper);
+  }
+
+  public void SetPaddleMKLDNNCacheSize(int size) {
+    FD_C_RuntimeOptionWrapperSetPaddleMKLDNNCacheSize(fd_runtime_option_wrapper,
+                                                      size);
+  }
+
+  public void SetOpenVINODevice(string name = "CPU") {
+    FD_C_RuntimeOptionWrapperSetOpenVINODevice(fd_runtime_option_wrapper, name);
+  }
+
+  public void SetLiteOptimizedModelDir(string optimized_model_dir) {
+    FD_C_RuntimeOptionWrapperSetLiteOptimizedModelDir(fd_runtime_option_wrapper,
+                                                      optimized_model_dir);
+  }
+
+  public void SetLiteSubgraphPartitionPath(
+      string nnadapter_subgraph_partition_config_path) {
+    FD_C_RuntimeOptionWrapperSetLiteSubgraphPartitionPath(
+        fd_runtime_option_wrapper, nnadapter_subgraph_partition_config_path);
+  }
+
+  public void SetLiteSubgraphPartitionConfigBuffer(
+      string nnadapter_subgraph_partition_config_buffer) {
+    FD_C_RuntimeOptionWrapperSetLiteSubgraphPartitionConfigBuffer(
+        fd_runtime_option_wrapper, nnadapter_subgraph_partition_config_buffer);
+  }
+
+  public void SetLiteContextProperties(string nnadapter_context_properties) {
+    FD_C_RuntimeOptionWrapperSetLiteContextProperties(
+        fd_runtime_option_wrapper, nnadapter_context_properties);
+  }
+
+  public void SetLiteModelCacheDir(string nnadapter_model_cache_dir) {
+    FD_C_RuntimeOptionWrapperSetLiteModelCacheDir(fd_runtime_option_wrapper,
+                                                  nnadapter_model_cache_dir);
+  }
+
+  public void SetLiteMixedPrecisionQuantizationConfigPath(
+      string nnadapter_mixed_precision_quantization_config_path) {
+    FD_C_RuntimeOptionWrapperSetLiteMixedPrecisionQuantizationConfigPath(
+        fd_runtime_option_wrapper,
+        nnadapter_mixed_precision_quantization_config_path);
+  }
+
+  public void EnableLiteFP16() {
+    FD_C_RuntimeOptionWrapperEnableLiteFP16(fd_runtime_option_wrapper);
+  }
+
+  public void DisableLiteFP16() {
+    FD_C_RuntimeOptionWrapperDisableLiteFP16(fd_runtime_option_wrapper);
+  }
+
+  public void EnableLiteInt8() {
+    FD_C_RuntimeOptionWrapperEnableLiteInt8(fd_runtime_option_wrapper);
+  }
+
+  public void DisableLiteInt8() {
+    FD_C_RuntimeOptionWrapperDisableLiteInt8(fd_runtime_option_wrapper);
+  }
+
+  public void SetLitePowerMode(LitePowerMode mode) {
+    FD_C_RuntimeOptionWrapperSetLitePowerMode(fd_runtime_option_wrapper, mode);
+  }
+
+  public void EnableTrtFP16() {
+    FD_C_RuntimeOptionWrapperEnableTrtFP16(fd_runtime_option_wrapper);
+  }
+
+  public void DisableTrtFP16() {
+    FD_C_RuntimeOptionWrapperDisableTrtFP16(fd_runtime_option_wrapper);
+  }
+
+  public void SetTrtCacheFile(string cache_file_path) {
+    FD_C_RuntimeOptionWrapperSetTrtCacheFile(fd_runtime_option_wrapper,
+                                             cache_file_path);
+  }
+
+  public void EnablePinnedMemory() {
+    FD_C_RuntimeOptionWrapperEnablePinnedMemory(fd_runtime_option_wrapper);
+  }
+
+  public void DisablePinnedMemory() {
+    FD_C_RuntimeOptionWrapperDisablePinnedMemory(fd_runtime_option_wrapper);
+  }
+
+  public void EnablePaddleTrtCollectShape() {
+    FD_C_RuntimeOptionWrapperEnablePaddleTrtCollectShape(
+        fd_runtime_option_wrapper);
+  }
+
+  public void DisablePaddleTrtCollectShape() {
+    FD_C_RuntimeOptionWrapperDisablePaddleTrtCollectShape(
+        fd_runtime_option_wrapper);
+  }
+
+  public void SetOpenVINOStreams(int num_streams) {
+    FD_C_RuntimeOptionWrapperSetOpenVINOStreams(fd_runtime_option_wrapper,
+                                                num_streams);
+  }
+
+  public void UseIpu(int device_num = 1, int micro_batch_size = 1,
+                     bool enable_pipelining = false, int batches_per_step = 1) {
+    FD_C_RuntimeOptionWrapperUseIpu(fd_runtime_option_wrapper, device_num,
+                                    micro_batch_size, enable_pipelining,
+                                    batches_per_step);
+  }
+
+  public IntPtr GetWrapperPtr() { return fd_runtime_option_wrapper; }
+
+  // Below are underlying C api
+  private IntPtr fd_runtime_option_wrapper;
+
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_CreateRuntimeOptionWrapper")]
+  private static extern IntPtr FD_C_CreateRuntimeOptionWrapper();
+
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyRuntimeOptionWrapper")]
+  private static extern void
+  FD_C_DestroyRuntimeOptionWrapper(IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperSetModelPath")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperSetModelPath(IntPtr fd_runtime_option_wrapper,
+                                        string model_path, string params_path,
+                                        ModelFormat format);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperSetModelBuffer")]
+  private static extern void FD_C_RuntimeOptionWrapperSetModelBuffer(
+      IntPtr fd_runtime_option_wrapper, string model_buffer,
+      string params_buffer, ModelFormat format);
+
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_RuntimeOptionWrapperUseCpu")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperUseCpu(IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_RuntimeOptionWrapperUseGpu")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperUseGpu(IntPtr fd_runtime_option_wrapper, int gpu_id);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperUseRKNPU2")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperUseRKNPU2(IntPtr fd_runtime_option_wrapper,
+                                     rknpu2_CpuName rknpu2_name,
+                                     rknpu2_CoreMask rknpu2_core);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperUseTimVX")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperUseTimVX(IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperUseAscend")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperUseAscend(IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperUseKunlunXin")]
+  private static extern void FD_C_RuntimeOptionWrapperUseKunlunXin(
+      IntPtr fd_runtime_option_wrapper, int kunlunxin_id, int l3_workspace_size,
+      bool locked, bool autotune, string autotune_file, string precision,
+      bool adaptive_seqlen, bool enable_multi_stream);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperUseSophgo")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperUseSophgo(IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperSetExternalStream")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperSetExternalStream(IntPtr fd_runtime_option_wrapper,
+                                             IntPtr external_stream);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperSetCpuThreadNum")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperSetCpuThreadNum(IntPtr fd_runtime_option_wrapper,
+                                           int thread_num);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperSetOrtGraphOptLevel")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperSetOrtGraphOptLevel(IntPtr fd_runtime_option_wrapper,
+                                               int level);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperUsePaddleBackend")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperUsePaddleBackend(IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperUsePaddleInferBackend")]
+  private static extern void FD_C_RuntimeOptionWrapperUsePaddleInferBackend(
+      IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperUseOrtBackend")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperUseOrtBackend(IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperUseSophgoBackend")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperUseSophgoBackend(IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperUseTrtBackend")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperUseTrtBackend(IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperUsePorosBackend")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperUsePorosBackend(IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperUseOpenVINOBackend")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperUseOpenVINOBackend(IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperUseLiteBackend")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperUseLiteBackend(IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperUsePaddleLiteBackend")]
+  private static extern void FD_C_RuntimeOptionWrapperUsePaddleLiteBackend(
+      IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperSetPaddleMKLDNN")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperSetPaddleMKLDNN(IntPtr fd_runtime_option_wrapper,
+                                           bool pd_mkldnn);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperEnablePaddleToTrt")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperEnablePaddleToTrt(IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperDeletePaddleBackendPass")]
+  private static extern void FD_C_RuntimeOptionWrapperDeletePaddleBackendPass(
+      IntPtr fd_runtime_option_wrapper, string delete_pass_name);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperEnablePaddleLogInfo")]
+  private static extern void FD_C_RuntimeOptionWrapperEnablePaddleLogInfo(
+      IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperDisablePaddleLogInfo")]
+  private static extern void FD_C_RuntimeOptionWrapperDisablePaddleLogInfo(
+      IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperSetPaddleMKLDNNCacheSize")]
+  private static extern void FD_C_RuntimeOptionWrapperSetPaddleMKLDNNCacheSize(
+      IntPtr fd_runtime_option_wrapper, int size);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperSetOpenVINODevice")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperSetOpenVINODevice(IntPtr fd_runtime_option_wrapper,
+                                             string name);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperSetLiteOptimizedModelDir")]
+  private static extern void FD_C_RuntimeOptionWrapperSetLiteOptimizedModelDir(
+      IntPtr fd_runtime_option_wrapper, string optimized_model_dir);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint =
+                 "FD_C_RuntimeOptionWrapperSetLiteSubgraphPartitionPath")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperSetLiteSubgraphPartitionPath(
+      IntPtr fd_runtime_option_wrapper,
+      string nnadapter_subgraph_partition_config_path);
+
+  [DllImport(
+      "fastdeploy.dll",
+      EntryPoint =
+          "FD_C_RuntimeOptionWrapperSetLiteSubgraphPartitionConfigBuffer")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperSetLiteSubgraphPartitionConfigBuffer(
+      IntPtr fd_runtime_option_wrapper,
+      string nnadapter_subgraph_partition_config_buffer);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperSetLiteContextProperties")]
+  private static extern void FD_C_RuntimeOptionWrapperSetLiteContextProperties(
+      IntPtr fd_runtime_option_wrapper, string nnadapter_context_properties);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperSetLiteModelCacheDir")]
+  private static extern void FD_C_RuntimeOptionWrapperSetLiteModelCacheDir(
+      IntPtr fd_runtime_option_wrapper, string nnadapter_model_cache_dir);
+
+  [DllImport(
+      "fastdeploy.dll",
+      EntryPoint =
+          "FD_C_RuntimeOptionWrapperSetLiteMixedPrecisionQuantizationConfigPath")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperSetLiteMixedPrecisionQuantizationConfigPath(
+      IntPtr fd_runtime_option_wrapper,
+      string nnadapter_mixed_precision_quantization_config_path);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperEnableLiteFP16")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperEnableLiteFP16(IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperDisableLiteFP16")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperDisableLiteFP16(IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperEnableLiteInt8")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperEnableLiteInt8(IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperDisableLiteInt8")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperDisableLiteInt8(IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperSetLitePowerMode")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperSetLitePowerMode(IntPtr fd_runtime_option_wrapper,
+                                            LitePowerMode mode);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperEnableTrtFP16")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperEnableTrtFP16(IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperDisableTrtFP16")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperDisableTrtFP16(IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperSetTrtCacheFile")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperSetTrtCacheFile(IntPtr fd_runtime_option_wrapper,
+                                           string cache_file_path);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperEnablePinnedMemory")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperEnablePinnedMemory(IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperDisablePinnedMemory")]
+  private static extern void FD_C_RuntimeOptionWrapperDisablePinnedMemory(
+      IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint =
+                 "FD_C_RuntimeOptionWrapperEnablePaddleTrtCollectShape")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperEnablePaddleTrtCollectShape(
+      IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint =
+                 "FD_C_RuntimeOptionWrapperDisablePaddleTrtCollectShape")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperDisablePaddleTrtCollectShape(
+      IntPtr fd_runtime_option_wrapper);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RuntimeOptionWrapperSetOpenVINOStreams")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperSetOpenVINOStreams(IntPtr fd_runtime_option_wrapper,
+                                              int num_streams);
+
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_RuntimeOptionWrapperUseIpu")]
+  private static extern void
+  FD_C_RuntimeOptionWrapperUseIpu(IntPtr fd_runtime_option_wrapper,
+                                  int device_num, int micro_batch_size,
+                                  bool enable_pipelining, int batches_per_step);
+}
+}
diff --git a/csharp/fastdeploy/types_internal_c.cs b/csharp/fastdeploy/types_internal_c.cs
new file mode 100644
index 000000000..d1274e28b
--- /dev/null
+++ b/csharp/fastdeploy/types_internal_c.cs
@@ -0,0 +1,125 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.IO;
+using System.Runtime.InteropServices;
+using fastdeploy.vision;
+
+namespace fastdeploy {
+namespace types_internal_c {
+
+[StructLayout(LayoutKind.Sequential)]
+public struct FD_OneDimArrayUint8 {
+  public nuint size;
+  public IntPtr data;  // byte[]
+}
+
+[StructLayout(LayoutKind.Sequential)]
+public struct FD_OneDimArrayInt32 {
+  public nuint size;
+  public IntPtr data;  // int[]
+}
+
+[StructLayout(LayoutKind.Sequential)]
+public struct FD_OneDimArraySize {
+  public nuint size;
+  public IntPtr data;  // nuint[]
+}
+
+[StructLayout(LayoutKind.Sequential)]
+public struct FD_OneDimArrayInt64 {
+  public nuint size;
+  public IntPtr data;  // long[]
+}
+
+[StructLayout(LayoutKind.Sequential)]
+public struct FD_OneDimArrayFloat {
+  public nuint size;
+  public IntPtr data;  // float[]
+}
+
+[StructLayout(LayoutKind.Sequential)]
+public struct FD_Cstr {
+  public nuint size;
+  public string data;
+}
+
+[StructLayout(LayoutKind.Sequential)]
+public struct FD_OneDimArrayCstr {
+  public nuint size;
+  public IntPtr data;  // FD_Cstr[]
+}
+
+[StructLayout(LayoutKind.Sequential)]
+public struct FD_TwoDimArraySize {
+  public nuint size;
+  public IntPtr data;  // FD_OneDimArraySize[]
+}
+
+[StructLayout(LayoutKind.Sequential)]
+public struct FD_TwoDimArrayFloat {
+  public nuint size;
+  public IntPtr data;  // FD_OneDimArrayFloat[]
+}
+
+public enum FD_ResultType {
+  UNKNOWN_RESULT,
+  CLASSIFY,
+  DETECTION,
+  SEGMENTATION,
+  OCR,
+  MOT,
+  FACE_DETECTION,
+  FACE_ALIGNMENT,
+  FACE_RECOGNITION,
+  MATTING,
+  MASK,
+  KEYPOINT_DETECTION,
+  HEADPOSE
+}
+
+[StructLayout(LayoutKind.Sequential)]
+public struct FD_ClassifyResult {
+  public FD_OneDimArrayInt32 label_ids;
+  public FD_OneDimArrayFloat scores;
+  public FD_ResultType type;
+}
+
+[StructLayout(LayoutKind.Sequential)]
+public struct FD_Mask {
+  public FD_OneDimArrayUint8 data;
+  public FD_OneDimArrayInt64 shape;
+  public FD_ResultType type;
+}
+
+[StructLayout(LayoutKind.Sequential)]
+public struct FD_OneDimMask {
+  public nint size;
+  public IntPtr data;  // FD_Mask*
+}
+
+[StructLayout(LayoutKind.Sequential)]
+public struct FD_DetectionResult {
+  public FD_TwoDimArrayFloat boxes;
+  public FD_OneDimArrayFloat scores;
+  public FD_OneDimArrayInt32 label_ids;
+  public FD_OneDimMask masks;
+  [MarshalAs(UnmanagedType.U1)]
+  public bool contain_masks;
+  public FD_ResultType type;
+}
+
+}
+}
diff --git a/csharp/fastdeploy/vision/classification/ppcls/model.cs b/csharp/fastdeploy/vision/classification/ppcls/model.cs
new file mode 100644
index 000000000..4217c77a6
--- /dev/null
+++ b/csharp/fastdeploy/vision/classification/ppcls/model.cs
@@ -0,0 +1,100 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.IO;
+using System.Runtime.InteropServices;
+using System.Collections.Generic;
+using OpenCvSharp;
+using fastdeploy.types_internal_c;
+
+namespace fastdeploy {
+namespace vision {
+namespace classification {
+
+class PaddleClasModel {
+
+  public PaddleClasModel(string model_file, string params_file,
+                         string config_file, RuntimeOption custom_option = null,
+                         ModelFormat model_format = ModelFormat.PADDLE) {
+    if (custom_option == null) {
+      custom_option = new RuntimeOption();
+    }
+    fd_paddleclas_model_wrapper = FD_C_CreatePaddleClasModelWrapper(
+        model_file, params_file, config_file, custom_option.GetWrapperPtr(),
+        model_format);
+  }
+
+  ~PaddleClasModel() {
+    FD_C_DestroyPaddleClasModelWrapper(fd_paddleclas_model_wrapper);
+  }
+
+  public ClassifyResult Predict(Mat img) {
+    IntPtr fd_classify_result_wrapper_ptr = FD_C_CreateClassifyResultWrapper();
+    FD_C_PaddleClasModelWrapperPredict(
+        fd_paddleclas_model_wrapper, img.CvPtr,
+        fd_classify_result_wrapper_ptr);  // predict
+    IntPtr fd_classify_result_ptr = FD_C_ClassifyResultWrapperGetData(
+        fd_classify_result_wrapper_ptr);  // get result from wrapper
+    FD_ClassifyResult fd_classify_result =
+        (FD_ClassifyResult)Marshal.PtrToStructure(fd_classify_result_ptr,
+                                                  typeof(FD_ClassifyResult));
+    ClassifyResult classify_result =
+        ConvertResult.ConvertCResultToClassifyResult(fd_classify_result);
+    FD_C_DestroyClassifyResultWrapper(
+        fd_classify_result_wrapper_ptr);  // free fd_classify_result_wrapper_ptr
+    FD_C_DestroyClassifyResult(
+        fd_classify_result_ptr);  // free fd_classify_result_ptr
+    return classify_result;
+  }
+
+  // below are underlying C api
+  private IntPtr fd_paddleclas_model_wrapper;
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreatePaddleClasModelWrapper")]
+  private static extern IntPtr FD_C_CreatePaddleClasModelWrapper(
+      string model_file, string params_file, string config_file,
+      IntPtr fd_runtime_option_wrapper, ModelFormat model_format);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DestroyPaddleClasModelWrapper")]
+  private static extern void
+  FD_C_DestroyPaddleClasModelWrapper(IntPtr fd_paddleclas_model_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_PaddleClasModelWrapperPredict")]
+  private static extern bool
+  FD_C_PaddleClasModelWrapperPredict(IntPtr fd_paddleclas_model_wrapper,
+                                     IntPtr img,
+                                     IntPtr fd_classify_result_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_CreateClassifyResultWrapper")]
+  private static extern IntPtr FD_C_CreateClassifyResultWrapper();
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DestroyClassifyResultWrapper")]
+  private static extern void
+  FD_C_DestroyClassifyResultWrapper(IntPtr fd_classify_result_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyClassifyResult")]
+  private static extern void
+  FD_C_DestroyClassifyResult(IntPtr fd_classify_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_ClassifyResultWrapperGetData")]
+  private static extern IntPtr
+  FD_C_ClassifyResultWrapperGetData(IntPtr fd_classify_result_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateClassifyResultWrapperFromData")]
+  private static extern IntPtr
+  FD_C_CreateClassifyResultWrapperFromData(IntPtr fd_classify_result);
+}
+
+}
+}
+}
\ No newline at end of file
diff --git a/csharp/fastdeploy/vision/detection/ppdet/model.cs b/csharp/fastdeploy/vision/detection/ppdet/model.cs
new file mode 100644
index 000000000..e675746ad
--- /dev/null
+++ b/csharp/fastdeploy/vision/detection/ppdet/model.cs
@@ -0,0 +1,94 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.IO;
+using System.Runtime.InteropServices;
+using System.Collections.Generic;
+using OpenCvSharp;
+using fastdeploy.types_internal_c;
+
+namespace fastdeploy {
+namespace vision {
+namespace detection {
+
+public class PPYOLOE {
+
+  public PPYOLOE(string model_file, string params_file, string config_file,
+                 RuntimeOption custom_option = null,
+                 ModelFormat model_format = ModelFormat.PADDLE) {
+    if (custom_option == null) {
+      custom_option = new RuntimeOption();
+    }
+    fd_ppyoloe_wrapper =
+        FD_C_CreatesPPYOLOEWrapper(model_file, params_file, config_file,
+                                   custom_option.GetWrapperPtr(), model_format);
+  }
+
+  ~PPYOLOE() { FD_C_DestroyPPYOLOEWrapper(fd_ppyoloe_wrapper); }
+
+  public DetectionResult Predict(Mat img) {
+    IntPtr fd_detection_result_wrapper_ptr =
+        FD_C_CreateDetectionResultWrapper();
+    FD_C_PPYOLOEWrapperPredict(fd_ppyoloe_wrapper, img.CvPtr,
+                               fd_detection_result_wrapper_ptr);  // predict
+    IntPtr fd_detection_result_ptr = FD_C_DetectionResultWrapperGetData(
+        fd_detection_result_wrapper_ptr);  // get result from wrapper
+    FD_DetectionResult fd_detection_result =
+        (FD_DetectionResult)Marshal.PtrToStructure(fd_detection_result_ptr,
+                                                   typeof(FD_DetectionResult));
+    DetectionResult detection_result =
+        ConvertResult.ConvertCResultToDetectionResult(fd_detection_result);
+    FD_C_DestroyDetectionResultWrapper(
+        fd_detection_result_wrapper_ptr);  // free fd_detection_result_wrapper_ptr
+    FD_C_DestroyDetectionResult(
+        fd_detection_result_ptr);  // free fd_detection_result_ptr
+    return detection_result;
+  }
+
+  // below are underlying C api
+  private IntPtr fd_ppyoloe_wrapper;
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_CreatesPPYOLOEWrapper")]
+  private static extern IntPtr FD_C_CreatesPPYOLOEWrapper(
+      string model_file, string params_file, string config_file,
+      IntPtr fd_runtime_option_wrapper, ModelFormat model_format);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyPPYOLOEWrapper")]
+  private static extern void
+  FD_C_DestroyPPYOLOEWrapper(IntPtr fd_ppyoloe_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_PPYOLOEWrapperPredict")]
+  private static extern bool
+  FD_C_PPYOLOEWrapperPredict(IntPtr fd_ppyoloe_wrapper, IntPtr img,
+                             IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapper")]
+  private static extern IntPtr FD_C_CreateDetectionResultWrapper();
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DestroyDetectionResultWrapper")]
+  private static extern void
+  FD_C_DestroyDetectionResultWrapper(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyDetectionResult")]
+  private static extern void
+  FD_C_DestroyDetectionResult(IntPtr fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DetectionResultWrapperGetData")]
+  private static extern IntPtr
+  FD_C_DetectionResultWrapperGetData(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapperFromData")]
+  private static extern IntPtr
+  FD_C_CreateDetectionResultWrapperFromData(IntPtr fd_detection_result);
+}
+}
+}
+}
\ No newline at end of file
diff --git a/csharp/fastdeploy/vision/result.cs b/csharp/fastdeploy/vision/result.cs
new file mode 100644
index 000000000..5b137bc86
--- /dev/null
+++ b/csharp/fastdeploy/vision/result.cs
@@ -0,0 +1,272 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.IO;
+using System.Runtime.InteropServices;
+using System.Collections.Generic;
+using fastdeploy.types_internal_c;
+
+namespace fastdeploy {
+namespace vision {
+
+public enum ResultType {
+  UNKNOWN_RESULT,
+  CLASSIFY,
+  DETECTION,
+  SEGMENTATION,
+  OCR,
+  MOT,
+  FACE_DETECTION,
+  FACE_ALIGNMENT,
+  FACE_RECOGNITION,
+  MATTING,
+  MASK,
+  KEYPOINT_DETECTION,
+  HEADPOSE
+}
+
+public struct Mask {
+  public List<byte> data;
+  public List<long> shape;
+  public ResultType type;
+  public Mask() {
+    this.data = new List<byte>();
+    this.shape = new List<long>();
+    this.type = ResultType.MASK;
+  }
+}
+
+public struct ClassifyResult {
+  public List<int> label_ids;
+  public List<float> scores;
+  public ResultType type;
+  public ClassifyResult() {
+    this.label_ids = new List<int>();
+    this.scores = new List<float>();
+    this.type = ResultType.CLASSIFY;
+  }
+}
+
+public struct DetectionResult {
+  public List<float[]> boxes;
+  public List<float> scores;
+  public List<int> label_ids;
+  public List<Mask> masks;
+  public bool contain_masks;
+  public ResultType type;
+  public DetectionResult() {
+    this.boxes = new List<float[]>();
+    this.scores = new List<float>();
+    this.label_ids = new List<int>();
+    this.masks = new List<Mask>();
+    this.contain_masks = false;
+    this.type = ResultType.DETECTION;
+  }
+}
+
+public class ConvertResult {
+
+  public static FD_ClassifyResult
+  ConvertClassifyResultToCResult(ClassifyResult classify_result) {
+    FD_ClassifyResult fd_classify_result = new FD_ClassifyResult();
+
+    // copy label_ids
+    // Create a managed array
+    fd_classify_result.label_ids.size = (uint)classify_result.label_ids.Count;
+    int[] label_ids = new int[fd_classify_result.label_ids.size];
+    // Copy data from Link to Array
+    classify_result.label_ids.CopyTo(label_ids);
+    // Copy data to unmanaged memory
+    int size = Marshal.SizeOf(label_ids[0]) * label_ids.Length;
+    fd_classify_result.label_ids.data = Marshal.AllocHGlobal(size);
+    Marshal.Copy(label_ids, 0, fd_classify_result.label_ids.data,
+                 label_ids.Length);
+
+    // copy scores
+    // Create a managed array
+    fd_classify_result.scores.size = (uint)classify_result.scores.Count;
+    float[] scores = new float[fd_classify_result.scores.size];
+    // Copy data from Link to Array
+    classify_result.scores.CopyTo(scores);
+    // Copy data to unmanaged memory
+    size = Marshal.SizeOf(scores[0]) * scores.Length;
+    fd_classify_result.scores.data = Marshal.AllocHGlobal(size);
+    Marshal.Copy(scores, 0, fd_classify_result.scores.data, scores.Length);
+
+    fd_classify_result.type = (FD_ResultType)classify_result.type;
+
+    return fd_classify_result;
+  }
+
+  public static ClassifyResult
+  ConvertCResultToClassifyResult(FD_ClassifyResult fd_classify_result) {
+    ClassifyResult classify_result = new ClassifyResult();
+
+    // copy label_ids
+    int[] label_ids = new int[fd_classify_result.label_ids.size];
+    Marshal.Copy(fd_classify_result.label_ids.data, label_ids, 0,
+                 label_ids.Length);
+    classify_result.label_ids = new List<int>(label_ids);
+
+    // copy scores
+    float[] scores = new float[fd_classify_result.scores.size];
+    Marshal.Copy(fd_classify_result.scores.data, scores, 0, scores.Length);
+    classify_result.scores = new List<float>(scores);
+
+    classify_result.type = (ResultType)fd_classify_result.type;
+    return classify_result;
+  }
+
+  public static FD_DetectionResult
+  ConvertDetectionResultToCResult(DetectionResult detection_result) {
+    FD_DetectionResult fd_detection_result = new FD_DetectionResult();
+
+    // copy boxes
+    int boxes_coordinate_dim = 4;
+    int size;
+    fd_detection_result.boxes.size = (uint)detection_result.boxes.Count;
+    FD_OneDimArraySize[] boxes =
+        new FD_OneDimArraySize[fd_detection_result.boxes.size];
+    // Copy each box
+    for (int i = 0; i < (int)fd_detection_result.boxes.size; i++) {
+      boxes[i].size = (uint)detection_result.boxes[i].Length;
+      float[] boxes_i = new float[boxes_coordinate_dim];
+      detection_result.boxes[i].CopyTo(boxes_i, 0);
+      size = Marshal.SizeOf(boxes_i[0]) * boxes_i.Length;
+      boxes[i].data = Marshal.AllocHGlobal(size);
+      Marshal.Copy(boxes_i, 0, boxes[i].data, boxes_i.Length);
+    }
+    // Copy data to unmanaged memory
+    size = Marshal.SizeOf(boxes[0]) * boxes.Length;
+    fd_detection_result.boxes.data = Marshal.AllocHGlobal(size);
+    for (int i = 0; i < boxes.Length; i++) {
+      Marshal.StructureToPtr(
+          boxes[i],
+          fd_detection_result.boxes.data + i * Marshal.SizeOf(boxes[0]), true);
+    }
+
+    // copy scores
+    fd_detection_result.scores.size = (uint)detection_result.scores.Count;
+    float[] scores = new float[fd_detection_result.scores.size];
+    // Copy data from Link to Array
+    detection_result.scores.CopyTo(scores);
+    // Copy data to unmanaged memory
+    size = Marshal.SizeOf(scores[0]) * scores.Length;
+    fd_detection_result.scores.data = Marshal.AllocHGlobal(size);
+    Marshal.Copy(scores, 0, fd_detection_result.scores.data, scores.Length);
+
+    // copy label_ids
+    fd_detection_result.label_ids.size = (uint)detection_result.label_ids.Count;
+    int[] label_ids = new int[fd_detection_result.label_ids.size];
+    // Copy data from Link to Array
+    detection_result.label_ids.CopyTo(label_ids);
+    // Copy data to unmanaged memory
+    size = Marshal.SizeOf(label_ids[0]) * label_ids.Length;
+    fd_detection_result.label_ids.data = Marshal.AllocHGlobal(size);
+    Marshal.Copy(label_ids, 0, fd_detection_result.label_ids.data,
+                 label_ids.Length);
+
+    // copy masks
+    fd_detection_result.masks.size = detection_result.masks.Count;
+    FD_Mask[] masks = new FD_Mask[fd_detection_result.masks.size];
+    // copy each mask
+    for (int i = 0; i < (int)fd_detection_result.masks.size; i++) {
+      // copy data in mask
+      masks[i].data.size = (uint)detection_result.masks[i].data.Count;
+      byte[] masks_data_i = new byte[masks[i].data.size];
+      detection_result.masks[i].data.CopyTo(masks_data_i);
+      size = Marshal.SizeOf(masks_data_i[0]) * masks_data_i.Length;
+      masks[i].data.data = Marshal.AllocHGlobal(size);
+      Marshal.Copy(masks_data_i, 0, masks[i].data.data, masks_data_i.Length);
+      // copy shape in mask
+      masks[i].shape.size = (uint)detection_result.masks[i].shape.Count;
+      long[] masks_shape_i = new long[masks[i].shape.size];
+      detection_result.masks[i].shape.CopyTo(masks_shape_i);
+      size = Marshal.SizeOf(masks_shape_i[0]) * masks_shape_i.Length;
+      masks[i].shape.data = Marshal.AllocHGlobal(size);
+      Marshal.Copy(masks_shape_i, 0, masks[i].shape.data, masks_shape_i.Length);
+      // copy type
+      masks[i].type = (FD_ResultType)detection_result.masks[i].type;
+    }
+    if (fd_detection_result.masks.size != 0) {
+      size = Marshal.SizeOf(masks[0]) * masks.Length;
+      fd_detection_result.masks.data = Marshal.AllocHGlobal(size);
+      for (int i = 0; i < masks.Length; i++) {
+        Marshal.StructureToPtr(masks[i],
+                               fd_detection_result.masks.data +
+                                   i * Marshal.SizeOf(masks[0]),
+                               true);
+      }
+    }
+
+    fd_detection_result.contain_masks = detection_result.contain_masks;
+    fd_detection_result.type = (FD_ResultType)detection_result.type;
+    return fd_detection_result;
+  }
+
+  public static DetectionResult
+  ConvertCResultToDetectionResult(FD_DetectionResult fd_detection_result) {
+    DetectionResult detection_result = new DetectionResult();
+
+    // copy boxes
+    detection_result.boxes = new List<float[]>();
+    FD_OneDimArraySize[] boxes =
+        new FD_OneDimArraySize[fd_detection_result.boxes.size];
+    Console.WriteLine(fd_detection_result.boxes.size);
+    for (int i = 0; i < (int)fd_detection_result.boxes.size; i++) {
+      boxes[i] = (FD_OneDimArraySize)Marshal.PtrToStructure(
+          fd_detection_result.boxes.data + i * Marshal.SizeOf(boxes[0]),
+          typeof(FD_OneDimArraySize));
+      float[] box_i = new float[boxes[i].size];
+      Marshal.Copy(boxes[i].data, box_i, 0, box_i.Length);
+      detection_result.boxes.Add(box_i);
+    }
+
+    // copy scores
+    float[] scores = new float[fd_detection_result.scores.size];
+    Marshal.Copy(fd_detection_result.scores.data, scores, 0, scores.Length);
+    detection_result.scores = new List<float>(scores);
+
+    // copy label_ids
+    int[] label_ids = new int[fd_detection_result.label_ids.size];
+    Marshal.Copy(fd_detection_result.label_ids.data, label_ids, 0,
+                 label_ids.Length);
+    detection_result.label_ids = new List<int>(label_ids);
+
+    // copy masks
+    detection_result.masks = new List<Mask>();
+    FD_Mask[] fd_masks = new FD_Mask[fd_detection_result.masks.size];
+    for (int i = 0; i < (int)fd_detection_result.masks.size; i++) {
+      fd_masks[i] = (FD_Mask)Marshal.PtrToStructure(
+          fd_detection_result.masks.data + i * Marshal.SizeOf(fd_masks[0]),
+          typeof(FD_Mask));
+      Mask mask_i = new Mask();
+      byte[] mask_i_data = new byte[fd_masks[i].data.size];
+      Marshal.Copy(fd_masks[i].data.data, mask_i_data, 0, mask_i_data.Length);
+      long[] mask_i_shape = new long[fd_masks[i].shape.size];
+      Marshal.Copy(fd_masks[i].shape.data, mask_i_shape, 0,
+                   mask_i_shape.Length);
+      mask_i.type = (ResultType)fd_masks[i].type;
+      detection_result.masks.Add(mask_i);
+    }
+    detection_result.contain_masks = fd_detection_result.contain_masks;
+    detection_result.type = (ResultType)fd_detection_result.type;
+    return detection_result;
+  }
+}
+
+}
+
+}
diff --git a/csharp/fastdeploy/vision/visualize.cs b/csharp/fastdeploy/vision/visualize.cs
new file mode 100644
index 000000000..6ed5f168a
--- /dev/null
+++ b/csharp/fastdeploy/vision/visualize.cs
@@ -0,0 +1,45 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.IO;
+using System.Runtime.InteropServices;
+using System.Collections.Generic;
+using OpenCvSharp;
+using fastdeploy.types_internal_c;
+
+namespace fastdeploy {
+namespace vision {
+
+public class Visualize {
+
+  public static Mat VisDetection(Mat im, DetectionResult detection_result,
+                                 float score_threshold = 0.0f,
+                                 int line_size = 1, float font_size = 0.5f) {
+    FD_DetectionResult fd_detection_result =
+        ConvertResult.ConvertDetectionResultToCResult(detection_result);
+    IntPtr result_ptr =
+        FD_C_VisDetection(im.CvPtr, ref fd_detection_result, score_threshold,
+                          line_size, font_size);
+    return new Mat(result_ptr);
+  }
+
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_VisDetection")]
+  private static extern IntPtr
+  FD_C_VisDetection(IntPtr im, ref FD_DetectionResult fd_detection_result,
+                    float score_threshold, int line_size, float font_size);
+}
+
+}
+}
\ No newline at end of file
diff --git a/examples/vision/detection/paddledetection/csharp/CMakeLists.txt b/examples/vision/detection/paddledetection/csharp/CMakeLists.txt
new file mode 100644
index 000000000..e1245d539
--- /dev/null
+++ b/examples/vision/detection/paddledetection/csharp/CMakeLists.txt
@@ -0,0 +1,23 @@
+PROJECT(infer_demo CSharp)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+# Set the C# language version (defaults to 3.0 if not set).
+set(CMAKE_CSharp_FLAGS "/langversion:10")
+set(CMAKE_DOTNET_TARGET_FRAMEWORK "net6.0")
+set(CMAKE_DOTNET_SDK "Microsoft.NET.Sdk")
+
+# 指定下载解压后的fastdeploy库路径
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeployCSharp.cmake)
+
+
+add_executable(infer_ppyoloe_demo ${PROJECT_SOURCE_DIR}/infer_ppyoloe.cs)
+
+set_property(TARGET infer_ppyoloe_demo PROPERTY VS_DOTNET_REFERENCES
+    ${FASTDEPLOY_DOTNET_REFERENCES}
+)
+
+set_property(TARGET infer_ppyoloe_demo
+    PROPERTY VS_PACKAGE_REFERENCES  ${FASTDEPLOY_PACKAGE_REFERENCES}
+)
diff --git a/examples/vision/detection/paddledetection/csharp/README.md b/examples/vision/detection/paddledetection/csharp/README.md
new file mode 100644
index 000000000..97bdbe0d3
--- /dev/null
+++ b/examples/vision/detection/paddledetection/csharp/README.md
@@ -0,0 +1,97 @@
+English | [简体中文](README_CN.md)
+# PaddleDetection C# Deployment Example
+
+This directory provides examples that `infer_xxx.cs` to fastly finish the deployment of PaddleDetection models, including PPYOLOE on CPU/GPU.
+
+Before deployment, two steps require confirmation
+
+- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)  
+- 2.  Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
+
+Please follow below instructions to compile and test in Windows. FastDeploy version 1.0.4 or above (x.x.x>=1.0.4) is required to support this model.
+
+## 1. Download C# package management tool nuget client
+> https://dist.nuget.org/win-x86-commandline/v6.4.0/nuget.exe
+
+Add nuget program into system variable **PATH**
+
+## 2. Download model and image for test
+> https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz # (unzip it after download)
+> https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+
+## 3. Compile example code
+
+Open `x64 Native Tools Command Prompt for VS 2019` command tool on Windows, cd to the demo path of ppyoloe and execute commands
+
+```shell
+cd D:\Download\fastdeploy-win-x64-gpu-x.x.x\examples\vision\detection\paddledetection\csharp
+
+mkdir build && cd build
+cmake .. -G "Visual Studio 16 2019" -A x64 -DFASTDEPLOY_INSTALL_DIR=D:\Download\fastdeploy-win-x64-gpu-x.x.x -DCUDA_DIRECTORY="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.2"
+
+nuget restore
+msbuild infer_demo.sln /m:4 /p:Configuration=Release /p:Platform=x64
+```
+
+For more information about how to use FastDeploy SDK to compile a project with Visual Studio 2019. Please refer to
+- [Using the FastDeploy C++ SDK on Windows Platform](../../../../../docs/en/faq/use_sdk_on_windows.md)
+
+## 4. Execute compiled program
+fastdeploy.dll and related dynamic libraries are required by the program. FastDeploy provide a script to copy all required dll to your program path.
+
+```shell
+cd D:\Download\fastdeploy-win-x64-gpu-x.x.x
+
+fastdeploy_init.bat install %cd% D:\Download\fastdeploy-win-x64-gpu-x.x.x\examples\vision\detection\paddledetection\csharp\build\Release
+```
+Then you can run your program and test the model with image
+
+```shell
+cd Release
+infer_ppyoloe_demo.exe ppyoloe_crn_l_300e_coco 000000014439.jpg 0  # CPU
+infer_ppyoloe_demo.exe ppyoloe_crn_l_300e_coco 000000014439.jpg 1  # GPU
+```
+
+## PaddleDetection C# Interface
+
+### Model Class
+
+```c#
+fastdeploy.vision.detection.PPYOLOE(
+        string model_file,
+        string params_file,
+        string config_file
+        fastdeploy.RuntimeOption runtime_option = null,
+        fastdeploy.ModelFormat model_format = ModelFormat.PADDLE)
+```
+
+> PaddleDetection PPYOLOE initialization.
+
+> **Params**
+
+>> * **model_file**(str): Model file path
+>> * **params_file**(str): Parameter file path
+>> * **config_file**(str): Configuration file path, which is the deployment yaml file exported by PaddleDetection
+>> * **runtime_option**(RuntimeOption): Backend inference configuration. null by default, which is the default configuration
+>> * **model_format**(ModelFormat): Model format. Paddle format by default
+
+#### Predict Function
+
+```c#
+fastdeploy.DetectionResult Predict(OpenCvSharp.Mat im)
+```
+
+> Model prediction interface. Input images and output results directly.
+>
+> **Params**
+>
+>> * **im**(Mat): Input images in HWC or BGR format
+>
+> **Return**
+>
+>> * **result**(DetectionResult): Detection result, including detection box and confidence of each box. Refer to [Vision Model Prediction Result](../../../../../docs/api/vision_results/) for DetectionResult
+
+- [Model Description](../../)
+- [Python Deployment](../python)
+- [Vision Model prediction results](../../../../../docs/api/vision_results/)
+- [How to switch the model inference backend engine](../../../../../docs/en/faq/how_to_change_backend.md)
diff --git a/examples/vision/detection/paddledetection/csharp/README_CN.md b/examples/vision/detection/paddledetection/csharp/README_CN.md
new file mode 100644
index 000000000..36b300448
--- /dev/null
+++ b/examples/vision/detection/paddledetection/csharp/README_CN.md
@@ -0,0 +1,99 @@
+[English](README.md) | 简体中文
+# PaddleDetection C#部署示例
+
+本目录下提供`infer_xxx.cs`来调用C# API快速完成PaddleDetection模型PPYOLOE在CPU/GPU上部署的示例。
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. 根据开发环境，下载预编译部署库和samples代码，参考[FastDeploy预编译库](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+在Windows下执行如下操作完成编译测试，支持此模型需保证FastDeploy版本1.0.4以上(x.x.x>=1.0.4)
+
+## 1. 下载C#包管理程序nuget客户端
+> https://dist.nuget.org/win-x86-commandline/v6.4.0/nuget.exe
+
+下载完成后将该程序添加到环境变量**PATH**中
+
+## 2. 下载模型文件和测试图片
+> https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz # (下载后解压缩)
+> https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+
+## 3. 编译示例代码
+
+本文档编译的示例代码可在解压的库中找到，编译工具依赖VS 2019的安装，**Windows打开x64 Native Tools Command Prompt for VS 2019命令工具**，通过如下命令开始编译
+
+```shell
+cd D:\Download\fastdeploy-win-x64-gpu-x.x.x\examples\vision\detection\paddledetection\csharp
+
+mkdir build && cd build
+cmake .. -G "Visual Studio 16 2019" -A x64 -DFASTDEPLOY_INSTALL_DIR=D:\Download\fastdeploy-win-x64-gpu-x.x.x -DCUDA_DIRECTORY="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v11.2"
+
+nuget restore
+msbuild infer_demo.sln /m:4 /p:Configuration=Release /p:Platform=x64
+```
+
+关于使用Visual Studio 2019创建sln工程，或者CMake工程等方式编译的更详细信息，可参考如下文档
+- [在 Windows 使用 FastDeploy C++ SDK](../../../../../docs/cn/faq/use_sdk_on_windows.md)
+- [FastDeploy C++库在Windows上的多种使用方式](../../../../../docs/cn/faq/use_sdk_on_windows_build.md)
+
+## 4. 运行可执行程序
+
+注意Windows上运行时，需要将FastDeploy依赖的库拷贝至可执行程序所在目录, 或者配置环境变量。FastDeploy提供了工具帮助我们快速将所有依赖库拷贝至可执行程序所在目录,通过如下命令将所有依赖的dll文件拷贝至可执行程序所在的目录
+```shell
+cd D:\Download\fastdeploy-win-x64-gpu-x.x.x
+
+fastdeploy_init.bat install %cd% D:\Download\fastdeploy-win-x64-gpu-x.x.x\examples\vision\detection\paddledetection\csharp\build\Release
+```
+
+将dll拷贝到当前路径后，准备好模型和图片，使用如下命令运行可执行程序即可
+```shell
+cd Release
+infer_ppyoloe_demo.exe ppyoloe_crn_l_300e_coco 000000014439.jpg 0  # CPU
+infer_ppyoloe_demo.exe ppyoloe_crn_l_300e_coco 000000014439.jpg 1  # GPU
+```
+
+## PaddleDetection C#接口
+
+### 模型
+
+```c#
+fastdeploy.vision.detection.PPYOLOE(
+        string model_file,
+        string params_file,
+        string config_file
+        fastdeploy.RuntimeOption runtime_option = null,
+        fastdeploy.ModelFormat model_format = ModelFormat.PADDLE)
+```
+
+> PaddleDetection PPYOLOE模型加载和初始化。
+
+> **参数**
+
+>> * **model_file**(str): 模型文件路径
+>> * **params_file**(str): 参数文件路径
+>> * **config_file**(str): 配置文件路径，即PaddleDetection导出的部署yaml文件
+>> * **runtime_option**(RuntimeOption): 后端推理配置，默认为null，即采用默认配置
+>> * **model_format**(ModelFormat): 模型格式，默认为PADDLE格式
+
+#### Predict函数
+
+```c#
+fastdeploy.DetectionResult Predict(OpenCvSharp.Mat im)
+```
+
+> 模型预测接口，输入图像直接输出检测结果。
+>
+> **参数**
+>
+>> * **im**(Mat): 输入图像，注意需为HWC，BGR格式
+>
+> **返回值**
+>
+>> * **result**(DetectionResult): 检测结果，包括检测框，各个框的置信度, DetectionResult说明参考[视觉模型预测结果](../../../../../docs/api/vision_results/)
+
+
+- [模型介绍](../../)
+- [Python部署](../python)
+- [视觉模型预测结果](../../../../../docs/api/vision_results/)
+- [如何切换模型推理后端引擎](../../../../../docs/cn/faq/how_to_change_backend.md)
diff --git a/examples/vision/detection/paddledetection/csharp/infer_ppyoloe.cs b/examples/vision/detection/paddledetection/csharp/infer_ppyoloe.cs
new file mode 100644
index 000000000..c64e4c6af
--- /dev/null
+++ b/examples/vision/detection/paddledetection/csharp/infer_ppyoloe.cs
@@ -0,0 +1,57 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.IO;
+using System.Runtime.InteropServices;
+using OpenCvSharp;
+using fastdeploy;
+
+namespace Test
+{
+    public class TestPPYOLOE
+    {
+        public static void Main(string[] args)
+        {
+            if (args.Length < 3) {
+                Console.WriteLine(
+                    "Usage: infer_demo path/to/model_dir path/to/image run_option, " +
+                    "e.g ./infer_model ./ppyolo_dirname ./test.jpeg 0"
+                );
+                Console.WriteLine( "The data type of run_option is int, 0: run with cpu; 1: run with gpu");
+                return;
+            }
+            string model_dir = args[0];
+            string image_path = args[1];
+            string model_file = model_dir + "\\" + "model.pdmodel";
+            string params_file = model_dir + "\\" + "model.pdiparams";
+            string config_file = model_dir + "\\" + "infer_cfg.yml";
+            RuntimeOption runtimeoption = new RuntimeOption();
+            int device_option = Int32.Parse(args[2]);
+            if(device_option==0){
+                runtimeoption.UseCpu();
+            }else{
+                runtimeoption.UseGpu();
+            }
+            vision.detection.PPYOLOE model = new vision.detection.PPYOLOE(model_file, params_file, config_file, runtimeoption, ModelFormat.PADDLE);
+            Mat image = Cv2.ImRead(image_path);
+            vision.DetectionResult res = model.Predict(image);
+            Mat res_img = vision.Visualize.VisDetection(image, res, 0, 1, 0.5f);
+            Cv2.ImShow("result.png", res_img);
+            Cv2.WaitKey(0);
+
+        }
+
+    }
+}
\ No newline at end of file

From fb1d95c7c91f4131e87666fe5c129a9f4233b95d Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Wed, 15 Feb 2023 07:11:41 +0000
Subject: [PATCH 20/41] Update docs

---
 docs/api/vision_results/segmentation_result.md   |  4 ++--
 .../api/vision_results/segmentation_result_CN.md |  4 ++--
 docs/api_docs/cpp/vision_results_cn.md           |  2 +-
 docs/api_docs/cpp/vision_results_en.md           |  2 +-
 .../paddleseg/amlogic/a311d/README.md            | 13 ++++++++++++-
 .../segmentation/paddleseg/kunlun/README.md      | 12 +++++++++++-
 .../paddleseg/rockchip/rv1126/README.md          | 16 ++++++++++++++--
 .../serving/{README_CN.md => README.md}          |  4 ++--
 .../serving/fastdeploy_serving/README.md         |  6 +++++-
 .../serving/fastdeploy_serving/README_CN.md      |  3 +++
 .../paddleseg/serving/simple_serving/README.md   |  3 ++-
 .../serving/simple_serving/README_CN.md          |  2 ++
 .../segmentation/paddleseg/sophgo/README.md      |  5 +++++
 13 files changed, 62 insertions(+), 14 deletions(-)
 rename examples/vision/segmentation/paddleseg/serving/{README_CN.md => README.md} (93%)

diff --git a/docs/api/vision_results/segmentation_result.md b/docs/api/vision_results/segmentation_result.md
index 426d38d1e..0852254ea 100644
--- a/docs/api/vision_results/segmentation_result.md
+++ b/docs/api/vision_results/segmentation_result.md
@@ -19,7 +19,7 @@ struct SegmentationResult {
 ```
 
 - **label_map**: Member variable which indicates the segmentation category of each pixel in a single image. `label_map.size()` indicates the number of pixel points of a image.
-- **score_map**: Member variable which indicates the predicted segmentation category probability value (specified as `--output_op argmax` when export) corresponding to label_map, or the probability value normalized by softmax (specified as `--output_op softmax` when export, or as `--output_op when exporting the model). none`  when export while setting the [class member attribute](../../../examples/vision/segmentation/paddleseg/cpp/) as `apply_softmax=True` during model initialization).
+- **score_map**: Member variable which indicates the predicted segmentation category probability value corresponding to the label_map one-to-one, the member variable is not empty only when `--output_op none` is specified when exporting the PaddleSeg model, otherwise the member variable is empty.
 - **shape**: Member variable which indicates the shape of the output image as H\*W.
 - **Clear()**: Member function used to clear the results stored in the structure.
 - **Str()**: Member function used to output the information in the structure as string (for Debug).
@@ -29,5 +29,5 @@ struct SegmentationResult {
 `fastdeploy.vision.SegmentationResult`
 
 - **label_map**(list of int): Member variable which indicates the segmentation category of each pixel in a single image.
-- **score_map**(list of float): Member variable which indicates the predicted segmentation category probability value (specified as `--output_op argmax` when export) corresponding to label_map, or the probability value normalized by softmax (specified as `--output_op softmax` when export, or as `--output_op when exporting the model). none`  when export while setting the [class member attribute](../../../examples/vision/segmentation/paddleseg/cpp/) as `apply_softmax=True` during model initialization).
+- **score_map**(list of float): Member variable which indicates the predicted segmentation category probability value corresponding to the label_map one-to-one, the member variable is not empty only when `--output_op none` is specified when exporting the PaddleSeg model, otherwise the member variable is empty.
 - **shape**(list of int): Member variable which indicates the shape of the output image as H\*W.
diff --git a/docs/api/vision_results/segmentation_result_CN.md b/docs/api/vision_results/segmentation_result_CN.md
index 0b3e22ee1..42b6eaf73 100644
--- a/docs/api/vision_results/segmentation_result_CN.md
+++ b/docs/api/vision_results/segmentation_result_CN.md
@@ -20,7 +20,7 @@ struct SegmentationResult {
 ```
 
 - **label_map**: 成员变量，表示单张图片每个像素点的分割类别，`label_map.size()`表示图片像素点的个数
-- **score_map**: 成员变量，与label_map一一对应的所预测的分割类别概率值(当导出模型时指定`--output_op argmax`)或者经过softmax归一化化后的概率值(当导出模型时指定`--output_op softmax`或者导出模型时指定`--output_op none`同时模型初始化的时候设置模型[类成员属性](../../../examples/vision/segmentation/paddleseg/cpp/)`apply_softmax=True`)
+- **score_map**: 成员变量，与label_map一一对应的所预测的分割类别概率值，只有导出PaddleSeg模型时指定`--output_op none`时，该成员变量才不为空，否则该成员变量为空
 - **shape**: 成员变量，表示输出图片的shape，为H\*W
 - **Clear()**: 成员函数，用于清除结构体中存储的结果
 - **Free()**: 成员函数，用于清除结构体中存储的结果并释放内存
@@ -31,5 +31,5 @@ struct SegmentationResult {
 `fastdeploy.vision.SegmentationResult`
 
 - **label_map**(list of int): 成员变量，表示单张图片每个像素点的分割类别
-- **score_map**(list of float): 成员变量，与label_map一一对应的所预测的分割类别概率值(当导出模型时指定`--output_op argmax`)或者经过softmax归一化化后的概率值(当导出模型时指定`--output_op softmax`或者导出模型时指定`--output_op none`同时模型初始化的时候设置模型[类成员属性](../../../examples/vision/segmentation/paddleseg/python/)`apply_softmax=true`)
+- **score_map**(list of float): 成员变量，与label_map一一对应的所预测的分割类别概率值，只有导出PaddleSeg模型时指定`--output_op none`时，该成员变量才不为空，否则该成员变量为空
 - **shape**(list of int): 成员变量，表示输出图片的shape，为H\*W
diff --git a/docs/api_docs/cpp/vision_results_cn.md b/docs/api_docs/cpp/vision_results_cn.md
index 669343eb4..73b3b4e6d 100644
--- a/docs/api_docs/cpp/vision_results_cn.md
+++ b/docs/api_docs/cpp/vision_results_cn.md
@@ -49,7 +49,7 @@ struct SegmentationResult {
 ```
 
 - **label_map**: 成员变量，表示单张图片每个像素点的分割类别，`label_map.size()`表示图片像素点的个数
-- **score_map**: 成员变量，与label_map一一对应的所预测的分割类别概率值(当导出模型时指定`--output_op argmax`)或者经过softmax归一化化后的概率值(当导出模型时指定`--output_op softmax`或者导出模型时指定`--output_op none`同时模型初始化的时候设置模型[类成员属性](../../../examples/vision/segmentation/paddleseg/cpp/)`apply_softmax=True`)
+- **score_map**: 成员变量，与label_map一一对应的所预测的分割类别概率值，只有导出PaddleSeg模型时指定`--output_op none`时，该成员变量才不为空，否则该成员变量为空
 - **shape**: 成员变量，表示输出图片的shape，为H\*W
 - **Clear()**: 成员函数，用于清除结构体中存储的结果
 - **Free()**: 成员函数，用于清除结构体中存储的结果并释放内存
diff --git a/docs/api_docs/cpp/vision_results_en.md b/docs/api_docs/cpp/vision_results_en.md
index 356f66b8e..7feb3793c 100644
--- a/docs/api_docs/cpp/vision_results_en.md
+++ b/docs/api_docs/cpp/vision_results_en.md
@@ -49,7 +49,7 @@ struct SegmentationResult {
 ```
 
 - **label_map**: Member variable which indicates the segmentation category of each pixel in a single image. `label_map.size()` indicates the number of pixel points of a image.
-- **score_map**: Member variable which indicates the predicted segmentation category probability value (specified as `--output_op argmax` when export) corresponding to label_map, or the probability value normalized by softmax (specified as `--output_op softmax` when export, or as `--output_op when exporting the model). none`  when export while setting the [class member attribute](../../../examples/vision/segmentation/paddleseg/cpp/) as `apply_softmax=True` during model initialization).
+- **score_map**: Member variable which indicates the predicted segmentation category probability value corresponding to the label_map one-to-one, the member variable is not empty only when `--output_op none` is specified when exporting the PaddleSeg model, otherwise the member variable is empty.
 - **shape**: Member variable which indicates the shape of the output image as H\*W.
 - **Clear()**: Member function used to clear the results stored in the structure.
 - **Str()**: Member function used to output the information in the structure as string (for Debug).
diff --git a/examples/vision/segmentation/paddleseg/amlogic/a311d/README.md b/examples/vision/segmentation/paddleseg/amlogic/a311d/README.md
index c9a04fd41..80a85d8da 100644
--- a/examples/vision/segmentation/paddleseg/amlogic/a311d/README.md
+++ b/examples/vision/segmentation/paddleseg/amlogic/a311d/README.md
@@ -1,8 +1,19 @@
 [English](README.md) | 简体中文
 
-# PaddleSeg在晶晨A311D上通过FastDeploy部署模型
+# PaddleSeg在晶晨NPU上通过FastDeploy部署模型
+
+## PaddleSeg支持部署的晶晨芯片型号
+支持如下芯片的部署
+- Amlogic A311D
+- Amlogic C308X
+- Amlogic S905D3
+
+本示例基于晶晨A311D来介绍如何使用FastDeploy部署PaddleSeg模型
+
 晶晨A311D是一款先进的AI应用处理器。PaddleSeg支持通过FastDeploy在A311D上基于Paddle-Lite部署相关Segmentation模型
 
+>> **注意**：需要注意的是，芯原（verisilicon）作为 IP 设计厂商，本身并不提供实体SoC产品，而是授权其 IP 给芯片厂商，如：晶晨（Amlogic），瑞芯微（Rockchip）等。因此本文是适用于被芯原授权了 NPU IP 的芯片产品。只要芯片产品没有大副修改芯原的底层库，则该芯片就可以使用本文档作为 Paddle Lite 推理部署的参考和教程。在本文中，晶晨 SoC 中的 NPU 和 瑞芯微 SoC 中的 NPU 统称为芯原 NPU。
+
 ## 晶晨A311D支持的PaddleSeg模型
 
 - [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg)
diff --git a/examples/vision/segmentation/paddleseg/kunlun/README.md b/examples/vision/segmentation/paddleseg/kunlun/README.md
index cdb727988..305ce688c 100644
--- a/examples/vision/segmentation/paddleseg/kunlun/README.md
+++ b/examples/vision/segmentation/paddleseg/kunlun/README.md
@@ -1,6 +1,16 @@
 [English](README.md) | 简体中文
 
-# PaddleSeg模型高性能全场景部署方案-FastDeploy
+# PaddleSeg利用FastDeploy在昆仑芯上部署模型
+
+## PaddleSeg支持部署的昆仑芯的芯片型号
+支持如下芯片的部署
+- 昆仑 818-100（推理芯片）
+- 昆仑 818-300（训练芯片）
+
+支持如下芯片的设备
+- K100/K200 昆仑 AI 加速卡
+- R200 昆仑芯 AI 加速卡
+
 
 PaddleSeg支持利用FastDeploy在昆仑芯片上部署Segmentation模型
 
diff --git a/examples/vision/segmentation/paddleseg/rockchip/rv1126/README.md b/examples/vision/segmentation/paddleseg/rockchip/rv1126/README.md
index 12b9a0d05..e03960f09 100644
--- a/examples/vision/segmentation/paddleseg/rockchip/rv1126/README.md
+++ b/examples/vision/segmentation/paddleseg/rockchip/rv1126/README.md
@@ -1,6 +1,18 @@
 [English](README.md) | 简体中文
-# PaddleSeg在瑞芯微 RV1126上通过FastDeploy部署模型
-瑞芯微 RV1126 是一款编解码芯片，专门面相人工智能的机器视觉领域。PaddleSeg支持通过FastDeploy在RV1126上基于Paddle-Lite部署相关Segmentation模型
+# PaddleSeg在瑞芯微NPU上通过FastDeploy部署模型
+
+## PaddleSeg支持部署的瑞芯微的芯片型号
+支持如下芯片的部署
+- Rockchip RV1109
+- Rockchip RV1126
+- Rockchip RK1808
+
+>> **注意**：需要注意的是，芯原（verisilicon）作为 IP 设计厂商，本身并不提供实体SoC产品，而是授权其 IP 给芯片厂商，如：晶晨（Amlogic），瑞芯微（Rockchip）等。因此本文是适用于被芯原授权了 NPU IP 的芯片产品。只要芯片产品没有大副修改芯原的底层库，则该芯片就可以使用本文档作为 Paddle Lite 推理部署的参考和教程。在本文中，晶晨 SoC 中的 NPU 和 瑞芯微 SoC 中的 NPU 统称为芯原 NPU。
+瑞芯微 RV1126 是一款编解码芯片，专门面相人工智能的机器视觉领域。
+
+本示例基于RV1126来介绍如何使用FastDeploy部署PaddleSeg模型
+
+PaddleSeg支持通过FastDeploy在RV1126上基于Paddle-Lite部署相关Segmentation模型
 
 ## 瑞芯微 RV1126支持的PaddleSeg模型
 
diff --git a/examples/vision/segmentation/paddleseg/serving/README_CN.md b/examples/vision/segmentation/paddleseg/serving/README.md
similarity index 93%
rename from examples/vision/segmentation/paddleseg/serving/README_CN.md
rename to examples/vision/segmentation/paddleseg/serving/README.md
index 803465941..00074183d 100644
--- a/examples/vision/segmentation/paddleseg/serving/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/serving/README.md
@@ -2,8 +2,8 @@
 # PaddleSeg 使用 FastDeploy 服务化部署 Segmentation 模型
 ## FastDeploy 服务化部署介绍
 在线推理作为企业或个人线上部署模型的最后一环，是工业界必不可少的环节，其中最重要的就是服务化推理框架。FastDeploy 目前提供两种服务化部署方式：simple_serving和fastdeploy_serving
-- simple_serving基于Flask框架具有简单高效的特点，可以快速验证线上部署模型的可行性。
-- fastdeploy_serving基于Triton Inference Server框架，是一套完备且性能卓越的服务化部署框架，可用于实际生产。
+- simple_serving：适用于只需要通过http等调用AI推理任务，没有高并发需求的场景。simple_serving基于Flask框架具有简单高效的特点，可以快速验证线上部署模型的可行性
+- fastdeploy_serving：适用于高并发、高吞吐量请求的场景。基于Triton Inference Server框架，是一套可用于实际生产的完备且性能卓越的服务化部署框架
 
 ## 模型版本说明
 
diff --git a/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/README.md b/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/README.md
index c5b6dd41f..120b3994a 100644
--- a/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/README.md
+++ b/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/README.md
@@ -1,5 +1,9 @@
 English | [简体中文](README_CN.md)
-# PaddleSegmentation Serving Deployment Demo
+# PaddleSeg Serving Deployment Demo
+
+The PaddleSeg serving deployment Demo is built with FastDeploy Serving. FastDeploy Serving is a service-oriented deployment framework suitable for high-concurrency and high-throughput requests encapsulated based on the Triton Inference Server framework. It is a complete and high-performance service-oriented deployment framework that can be used in actual production. If you don’t need high-concurrency and high-throughput scenarios, and just want to quickly test the feasibility of online deployment of the model, please refer to [fastdeploy_serving](../simple_serving/)
+
+## Environment
 
 Before serving deployment, it is necessary to confirm the hardware and software environment requirements of the service image and the image pull command, please refer to [FastDeploy service deployment](https://github.com/PaddlePaddle/FastDeploy/blob/develop/serving/README.md)
 
diff --git a/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/README_CN.md b/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/README_CN.md
index ae346cb5b..a07a773e5 100644
--- a/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/serving/fastdeploy_serving/README_CN.md
@@ -1,6 +1,9 @@
 [English](README.md) | 简体中文
 # PaddleSeg 服务化部署示例
 
+PaddleSeg 服务化部署示例是利用FastDeploy Serving搭建的服务化部署示例。FastDeploy Serving是基于Triton Inference Server框架封装的适用于高并发、高吞吐量请求的服务化部署框架，是一套可用于实际生产的完备且性能卓越的服务化部署框架。如没有高并发，高吞吐场景的需求，只想快速检验模型线上部署的可行性，请参考[fastdeploy_serving](../simple_serving/)
+
+## 部署环境准备
 在服务化部署前，需确认服务化镜像的软硬件环境要求和镜像拉取命令，请参考[FastDeploy服务化部署](https://github.com/PaddlePaddle/FastDeploy/blob/develop/serving/README_CN.md)
 
 
diff --git a/examples/vision/segmentation/paddleseg/serving/simple_serving/README.md b/examples/vision/segmentation/paddleseg/serving/simple_serving/README.md
index 686164ad7..ea8223ecc 100644
--- a/examples/vision/segmentation/paddleseg/serving/simple_serving/README.md
+++ b/examples/vision/segmentation/paddleseg/serving/simple_serving/README.md
@@ -1,7 +1,8 @@
 English | [简体中文](README_CN.md)
 
-# PaddleSegmentation Python Simple Serving Demo
+# PaddleSeg Python Simple Serving Demo
 
+PaddleSeg Python Simple serving is an example of serving deployment built by FastDeploy based on the Flask framework that can quickly verify the feasibility of online model deployment. It completes AI inference tasks based on http requests, and is suitable for simple scenarios without concurrent inference task. For high concurrency and high throughput scenarios, please refer to [fastdeploy_serving](../fastdeploy_serving/)
 
 ## Environment
 
diff --git a/examples/vision/segmentation/paddleseg/serving/simple_serving/README_CN.md b/examples/vision/segmentation/paddleseg/serving/simple_serving/README_CN.md
index db06103ed..afc1325dc 100644
--- a/examples/vision/segmentation/paddleseg/serving/simple_serving/README_CN.md
+++ b/examples/vision/segmentation/paddleseg/serving/simple_serving/README_CN.md
@@ -2,6 +2,8 @@
 
 # PaddleSeg Python轻量服务化部署示例
 
+PaddleSeg Python轻量服务化部署是FastDeploy基于Flask框架搭建的可快速验证线上模型部署可行性的服务化部署示例，基于http请求完成AI推理任务，适用于无并发推理的简单场景，如有高并发，高吞吐场景的需求请参考[fastdeploy_serving](../fastdeploy_serving/)
+
 ## 部署环境准备
 
 在部署前，需确认软硬件环境，同时下载预编译python wheel 包，参考文档[FastDeploy预编译库安装](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#FastDeploy预编译库安装)
diff --git a/examples/vision/segmentation/paddleseg/sophgo/README.md b/examples/vision/segmentation/paddleseg/sophgo/README.md
index 366656a75..67f636486 100644
--- a/examples/vision/segmentation/paddleseg/sophgo/README.md
+++ b/examples/vision/segmentation/paddleseg/sophgo/README.md
@@ -1,5 +1,10 @@
 [English](README.md) | 简体中文
 # PaddleSeg在算能（Sophgo）硬件上通过FastDeploy部署模型
+
+## PaddleSeg支持部署的Sophgo的芯片型号
+支持如下芯片的部署
+- Sophgo 1684X
+
 PaddleSeg支持通过FastDeploy在算能TPU上部署相关Segmentation模型
 
 ## 算能硬件支持的PaddleSeg模型

From 3835d066540a1a40886681285035d8ebeec589ab Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Wed, 15 Feb 2023 07:33:45 +0000
Subject: [PATCH 21/41] Save png format image in matting example to support
 transparent background

---
 examples/vision/matting/modnet/cpp/infer.cc                 | 6 +++---
 examples/vision/matting/modnet/python/infer.py              | 2 +-
 examples/vision/matting/rvm/cpp/infer.cc                    | 6 +++---
 examples/vision/matting/rvm/python/infer.py                 | 2 +-
 examples/vision/segmentation/ppmatting/cpu-gpu/cpp/infer.cc | 6 +++---
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/examples/vision/matting/modnet/cpp/infer.cc b/examples/vision/matting/modnet/cpp/infer.cc
index aa280eddd..1fef35218 100644
--- a/examples/vision/matting/modnet/cpp/infer.cc
+++ b/examples/vision/matting/modnet/cpp/infer.cc
@@ -35,7 +35,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file,
   auto vis_im_with_bg =
       fastdeploy::vision::SwapBackground(im, bg, res);
   cv::imwrite("visualized_result.jpg", vis_im_with_bg);
-  cv::imwrite("visualized_result_fg.jpg", vis_im);
+  cv::imwrite("visualized_result_fg.png", vis_im);
   std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
                "and ./visualized_result_fg.jpg"
             << std::endl;
@@ -65,7 +65,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file,
   auto vis_im_with_bg =
       fastdeploy::vision::SwapBackground(im, bg, res);
   cv::imwrite("visualized_result.jpg", vis_im_with_bg);
-  cv::imwrite("visualized_result_fg.jpg", vis_im);
+  cv::imwrite("visualized_result_fg.png", vis_im);
   std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
                "and ./visualized_result_fg.jpg"
             << std::endl;
@@ -96,7 +96,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file,
   auto vis_im_with_bg =
       fastdeploy::vision::SwapBackground(im, bg, res);
   cv::imwrite("visualized_result.jpg", vis_im_with_bg);
-  cv::imwrite("visualized_result_fg.jpg", vis_im);
+  cv::imwrite("visualized_result_fg.png", vis_im);
   std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
                "and ./visualized_result_fg.jpg"
             << std::endl;
diff --git a/examples/vision/matting/modnet/python/infer.py b/examples/vision/matting/modnet/python/infer.py
index 37c749010..0be874271 100644
--- a/examples/vision/matting/modnet/python/infer.py
+++ b/examples/vision/matting/modnet/python/infer.py
@@ -57,7 +57,7 @@ print(result)
 # 可视化结果
 vis_im = fd.vision.vis_matting_alpha(im, result)
 vis_im_with_bg = fd.vision.swap_background(im, bg, result)
-cv2.imwrite("visualized_result_fg.jpg", vis_im)
+cv2.imwrite("visualized_result_fg.png", vis_im)
 cv2.imwrite("visualized_result_replaced_bg.jpg", vis_im_with_bg)
 print(
     "Visualized result save in ./visualized_result_replaced_bg.jpg and ./visualized_result_fg.jpg"
diff --git a/examples/vision/matting/rvm/cpp/infer.cc b/examples/vision/matting/rvm/cpp/infer.cc
index 9e2a2aa5d..4ca975216 100755
--- a/examples/vision/matting/rvm/cpp/infer.cc
+++ b/examples/vision/matting/rvm/cpp/infer.cc
@@ -41,7 +41,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file,
   auto vis_im_with_bg =
       fastdeploy::vision::SwapBackground(im_bak, bg, res);
   cv::imwrite("visualized_result.jpg", vis_im_with_bg);
-  cv::imwrite("visualized_result_fg.jpg", vis_im);
+  cv::imwrite("visualized_result_fg.png", vis_im);
   std::cout << "Visualized result save in ./visualized_result.jpg "
                "and ./visualized_result_fg.jpg"
             << std::endl;
@@ -69,7 +69,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file,
   auto vis_im_with_bg =
       fastdeploy::vision::SwapBackground(im_bak, bg, res);
   cv::imwrite("visualized_result.jpg", vis_im_with_bg);
-  cv::imwrite("visualized_result_fg.jpg", vis_im);
+  cv::imwrite("visualized_result_fg.png", vis_im);
   std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
                "and ./visualized_result_fg.jpg"
             << std::endl;
@@ -103,7 +103,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file,
   auto vis_im_with_bg =
       fastdeploy::vision::SwapBackground(im_bak, bg, res);
   cv::imwrite("visualized_result.jpg", vis_im_with_bg);
-  cv::imwrite("visualized_result_fg.jpg", vis_im);
+  cv::imwrite("visualized_result_fg.png", vis_im);
   std::cout << "Visualized result save in ./visualized_result.jpg "
                "and ./visualized_result_fg.jpg"
             << std::endl;
diff --git a/examples/vision/matting/rvm/python/infer.py b/examples/vision/matting/rvm/python/infer.py
index 0e9eb6b21..b2ade3c0b 100755
--- a/examples/vision/matting/rvm/python/infer.py
+++ b/examples/vision/matting/rvm/python/infer.py
@@ -105,7 +105,7 @@ if args.image is not None:
     # 可视化结果
     vis_im = fd.vision.vis_matting(im, result)
     vis_im_with_bg = fd.vision.swap_background(im, bg, result)
-    cv2.imwrite("visualized_result_fg.jpg", vis_im)
+    cv2.imwrite("visualized_result_fg.png", vis_im)
     cv2.imwrite("visualized_result_replaced_bg.jpg", vis_im_with_bg)
     print(
         "Visualized result save in ./visualized_result_replaced_bg.jpg and ./visualized_result_fg.jpg"
diff --git a/examples/vision/segmentation/ppmatting/cpu-gpu/cpp/infer.cc b/examples/vision/segmentation/ppmatting/cpu-gpu/cpp/infer.cc
index 975f0e76b..9f1e69d5e 100644
--- a/examples/vision/segmentation/ppmatting/cpu-gpu/cpp/infer.cc
+++ b/examples/vision/segmentation/ppmatting/cpu-gpu/cpp/infer.cc
@@ -44,7 +44,7 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file,
   auto vis_im = fastdeploy::vision::VisMatting(im, res);
   auto vis_im_with_bg = fastdeploy::vision::SwapBackground(im, bg, res);
   cv::imwrite("visualized_result.jpg", vis_im_with_bg);
-  cv::imwrite("visualized_result_fg.jpg", vis_im);
+  cv::imwrite("visualized_result_fg.png", vis_im);
   std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
                "and ./visualized_result_fg.jpg"
             << std::endl;
@@ -74,7 +74,7 @@ void KunlunXinInfer(const std::string& model_dir, const std::string& image_file,
   auto vis_im = fastdeploy::vision::VisMatting(im, res);
   auto vis_im_with_bg = fastdeploy::vision::SwapBackground(im, bg, res);
   cv::imwrite("visualized_result.jpg", vis_im_with_bg);
-  cv::imwrite("visualized_result_fg.jpg", vis_im);
+  cv::imwrite("visualized_result_fg.png", vis_im);
   std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
                "and ./visualized_result_fg.jpg"
             << std::endl;
@@ -143,7 +143,7 @@ void TrtInfer(const std::string& model_dir, const std::string& image_file,
   auto vis_im = fastdeploy::vision::VisMatting(im, res);
   auto vis_im_with_bg = fastdeploy::vision::SwapBackground(im, bg, res);
   cv::imwrite("visualized_result.jpg", vis_im_with_bg);
-  cv::imwrite("visualized_result_fg.jpg", vis_im);
+  cv::imwrite("visualized_result_fg.png", vis_im);
   std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
                "and ./visualized_result_fg.jpg"
             << std::endl;

From da94fc46cfd1f0d5d727aa26898a04b29de944c1 Mon Sep 17 00:00:00 2001
From: WJJ1995 <wjjisloser@163.com>
Date: Wed, 15 Feb 2023 17:25:49 +0800
Subject: [PATCH 22/41] [Benchmark] Support PaddleClas cpp benchmark (#1324)

* add GPL lisence

* add GPL-3.0 lisence

* add GPL-3.0 lisence

* add GPL-3.0 lisence

* support yolov8

* add pybind for yolov8

* add yolov8 readme

* add cpp benchmark

* add cpu and gpu mem

* public part split

* add runtime mode

* fixed bugs

* add cpu_thread_nums

* deal with comments

* deal with comments

* deal with comments

* rm useless code

* add FASTDEPLOY_DECL

* add FASTDEPLOY_DECL

* fixed for windows

* mv rss to pss

* mv rss to pss

* Update utils.cc

* use thread to collect mem

* Add ResourceUsageMonitor

* rm useless code

* fixed bug

* fixed typo

* update ResourceUsageMonitor

* fixed bug

* fixed bug

* add note for ResourceUsageMonitor

* deal with comments

* add macros

* deal with comments

* deal with comments

* deal with comments

* re-lint

* rm pmap and use mem api

* rm pmap and use mem api

* add mem api

* Add PrintBenchmarkInfo func

* Add PrintBenchmarkInfo func

* Add PrintBenchmarkInfo func

* deal with comments

* fixed enable_paddle_to_trt

* add log for paddle_trt

* support ppcls benchmark

* use new trt option api

* update benchmark info

* simplify benchmark.cc

* simplify benchmark.cc

* deal with comments

---------

Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
---
 benchmark/cpp/CMakeLists.txt        |  3 +++
 benchmark/cpp/benchmark_ppcls.cc    | 36 +++++++++++++++++++++++++++++
 benchmark/cpp/benchmark_ppyolov8.cc | 11 ++++-----
 benchmark/cpp/benchmark_yolov5.cc   | 13 ++++-------
 benchmark/cpp/flags.h               | 13 +++++++----
 benchmark/cpp/option.h              | 19 ++++++++++-----
 benchmark/python/benchmark_ppcls.py |  5 +++-
 benchmark/python/benchmark_ppdet.py |  3 ++-
 benchmark/python/benchmark_ppocr.py |  5 ++--
 benchmark/python/benchmark_ppseg.py |  5 ++--
 benchmark/python/benchmark_uie.py   |  5 ++--
 benchmark/python/benchmark_yolo.py  |  3 ++-
 12 files changed, 86 insertions(+), 35 deletions(-)
 create mode 100755 benchmark/cpp/benchmark_ppcls.cc
 mode change 100644 => 100755 benchmark/python/benchmark_ppocr.py
 mode change 100644 => 100755 benchmark/python/benchmark_uie.py

diff --git a/benchmark/cpp/CMakeLists.txt b/benchmark/cpp/CMakeLists.txt
index c79e679c3..23e1ccf79 100755
--- a/benchmark/cpp/CMakeLists.txt
+++ b/benchmark/cpp/CMakeLists.txt
@@ -10,11 +10,14 @@ include_directories(${FASTDEPLOY_INCS})
 
 add_executable(benchmark_yolov5 ${PROJECT_SOURCE_DIR}/benchmark_yolov5.cc)
 add_executable(benchmark_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_ppyolov8.cc)
+add_executable(benchmark_ppcls ${PROJECT_SOURCE_DIR}/benchmark_ppcls.cc)
 
 if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
   target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags pthread)
   target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags pthread)
+  target_link_libraries(benchmark_ppcls ${FASTDEPLOY_LIBS} gflags pthread)
 else()
   target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags)
   target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags)
+  target_link_libraries(benchmark_ppcls ${FASTDEPLOY_LIBS} gflags pthread)
 endif()
diff --git a/benchmark/cpp/benchmark_ppcls.cc b/benchmark/cpp/benchmark_ppcls.cc
new file mode 100755
index 000000000..a62fcf80f
--- /dev/null
+++ b/benchmark/cpp/benchmark_ppcls.cc
@@ -0,0 +1,36 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "flags.h"
+#include "macros.h"
+#include "option.h"
+
+int main(int argc, char* argv[]) {
+  // Initialization
+  auto option = fastdeploy::RuntimeOption();
+  if (!CreateRuntimeOption(&option, argc, argv, true)) {
+    return -1;
+  }
+  auto im = cv::imread(FLAGS_image);
+  // Set max_batch_size 1 for best performance
+  option.trt_option.max_batch_size = 1;
+  auto model_file = FLAGS_model + sep + "inference.pdmodel";
+  auto params_file = FLAGS_model + sep + "inference.pdiparams";
+  auto config_file = FLAGS_model + sep + "inference_cls.yaml";
+  auto model_ppcls = fastdeploy::vision::classification::PaddleClasModel(
+      model_file, params_file, config_file, option);
+  fastdeploy::vision::ClassifyResult res;
+  BENCHMARK_MODEL(model_ppcls, model_ppcls.Predict(im, &res))
+  return 0;
+}
\ No newline at end of file
diff --git a/benchmark/cpp/benchmark_ppyolov8.cc b/benchmark/cpp/benchmark_ppyolov8.cc
index 5541696d5..b93121f54 100755
--- a/benchmark/cpp/benchmark_ppyolov8.cc
+++ b/benchmark/cpp/benchmark_ppyolov8.cc
@@ -12,20 +12,17 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "macros.h"
 #include "flags.h"
+#include "macros.h"
 #include "option.h"
 
 int main(int argc, char* argv[]) {
-  google::ParseCommandLineFlags(&argc, &argv, true);
-  auto im = cv::imread(FLAGS_image);
   // Initialization
   auto option = fastdeploy::RuntimeOption();
-  if (!CreateRuntimeOption(&option)) {
-    PrintUsage();
-    return false;
+  if (!CreateRuntimeOption(&option, argc, argv, true)) {
+    return -1;
   }
-  PrintBenchmarkInfo();
+  auto im = cv::imread(FLAGS_image);
   auto model_file = FLAGS_model + sep + "model.pdmodel";
   auto params_file = FLAGS_model + sep + "model.pdiparams";
   auto config_file = FLAGS_model + sep + "infer_cfg.yml";
diff --git a/benchmark/cpp/benchmark_yolov5.cc b/benchmark/cpp/benchmark_yolov5.cc
index 6ab3c5990..3dc84c487 100755
--- a/benchmark/cpp/benchmark_yolov5.cc
+++ b/benchmark/cpp/benchmark_yolov5.cc
@@ -12,20 +12,17 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "macros.h"
 #include "flags.h"
+#include "macros.h"
 #include "option.h"
 
 int main(int argc, char* argv[]) {
-  google::ParseCommandLineFlags(&argc, &argv, true);
-  auto im = cv::imread(FLAGS_image);
   // Initialization
   auto option = fastdeploy::RuntimeOption();
-  if (!CreateRuntimeOption(&option)) {
-    PrintUsage();
-    return false;
+  if (!CreateRuntimeOption(&option, argc, argv, true)) {
+    return -1;
   }
-  PrintBenchmarkInfo();
+  auto im = cv::imread(FLAGS_image);
   auto model_yolov5 =
       fastdeploy::vision::detection::YOLOv5(FLAGS_model, "", option);
   fastdeploy::vision::DetectionResult res;
@@ -34,4 +31,4 @@ int main(int argc, char* argv[]) {
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
   return 0;
-}
+}
\ No newline at end of file
diff --git a/benchmark/cpp/flags.h b/benchmark/cpp/flags.h
index fd20e685c..e4c2f8d19 100755
--- a/benchmark/cpp/flags.h
+++ b/benchmark/cpp/flags.h
@@ -15,11 +15,12 @@
 #pragma once
 
 #include "gflags/gflags.h"
+#include "fastdeploy/benchmark/utils.h"
 
 #ifdef WIN32
-const char sep = '\\';
+static const char sep = '\\';
 #else
-const char sep = '/';
+static const char sep = '/';
 #endif
 
 DEFINE_string(model, "", "Directory of the inference model.");
@@ -44,7 +45,7 @@ DEFINE_bool(
     collect_memory_info, false, "Whether to collect memory info");
 DEFINE_int32(sampling_interval, 50, "How often to collect memory info(ms).");
 
-void PrintUsage() {
+static void PrintUsage() {
   std::cout << "Usage: infer_demo --model model_path --image img_path --device "
                "[cpu|gpu|xpu] --backend "
                "[default|ort|paddle|ov|trt|paddle_trt|lite] "
@@ -55,7 +56,7 @@ void PrintUsage() {
   std::cout << "Default value of use_fp16: false" << std::endl;
 }
 
-void PrintBenchmarkInfo() {
+static void PrintBenchmarkInfo() {
   // Get model name
   std::vector<std::string> model_names;
   fastdeploy::benchmark::Split(FLAGS_model, model_names, sep);
@@ -76,7 +77,9 @@ void PrintBenchmarkInfo() {
     ss << "device_id: " << FLAGS_device_id << std::endl;
   }
   ss << "backend: " << FLAGS_backend << std::endl;
-  ss << "cpu_thread_nums: " << FLAGS_cpu_thread_nums << std::endl;
+  if (FLAGS_device == "cpu") {
+    ss << "cpu_thread_nums: " << FLAGS_cpu_thread_nums << std::endl;
+  }
   ss << "use_fp16: " << FLAGS_use_fp16 << std::endl;
   ss << "collect_memory_info: " << FLAGS_collect_memory_info << std::endl;
   if (FLAGS_collect_memory_info) {
diff --git a/benchmark/cpp/option.h b/benchmark/cpp/option.h
index 9989255e5..ef865ed6e 100755
--- a/benchmark/cpp/option.h
+++ b/benchmark/cpp/option.h
@@ -16,7 +16,9 @@
 
 #include "fastdeploy/vision.h"
 
-static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
+static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option,
+                        int argc, char* argv[], bool remove_flags) {
+  google::ParseCommandLineFlags(&argc, &argv, remove_flags);
   if (FLAGS_profile_mode == "runtime") {
     option->EnableProfiling(FLAGS_include_h2d_d2h, FLAGS_repeat, FLAGS_warmup);
   }
@@ -29,10 +31,11 @@ static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
     } else if (FLAGS_backend == "trt" || FLAGS_backend == "paddle_trt") {
       option->UseTrtBackend();
       if (FLAGS_backend == "paddle_trt") {
-        option->EnablePaddleToTrt();
+        option->UsePaddleInferBackend();
+        option->paddle_infer_option.enable_trt = true;
       }
       if (FLAGS_use_fp16) {
-        option->EnableTrtFP16();
+        option->trt_option.enable_fp16 = true;
       }
     } else if (FLAGS_backend == "default") {
       return true;
@@ -40,6 +43,7 @@ static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
       std::cout << "While inference with GPU, only support "
                    "default/ort/paddle/trt/paddle_trt now, "
                 << FLAGS_backend << " is not supported." << std::endl;
+      PrintUsage();
       return false;
     }
   } else if (FLAGS_device == "cpu") {
@@ -53,7 +57,7 @@ static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
     } else if (FLAGS_backend == "lite") {
       option->UsePaddleLiteBackend();
       if (FLAGS_use_fp16) {
-        option->EnableLiteFP16();
+        option->paddle_lite_option.enable_fp16 = true;
       }
     } else if (FLAGS_backend == "default") {
       return true;
@@ -61,6 +65,7 @@ static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
       std::cout << "While inference with CPU, only support "
                    "default/ort/ov/paddle/lite now, "
                 << FLAGS_backend << " is not supported." << std::endl;
+      PrintUsage();
       return false;
     }
   } else if (FLAGS_device == "xpu") {
@@ -72,7 +77,7 @@ static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
     } else if (FLAGS_backend == "lite") {
       option->UsePaddleLiteBackend();
       if (FLAGS_use_fp16) {
-        option->EnableLiteFP16();
+        option->paddle_lite_option.enable_fp16 = true;
       }
     } else if (FLAGS_backend == "default") {
       return true;
@@ -80,13 +85,15 @@ static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
       std::cout << "While inference with XPU, only support "
                    "default/ort/paddle/lite now, "
                 << FLAGS_backend << " is not supported." << std::endl;
+      PrintUsage();
       return false;
     }
   } else {
     std::cerr << "Only support device CPU/GPU/XPU now, " << FLAGS_device
               << " is not supported." << std::endl;
+    PrintUsage();
     return false;
   }
-
+  PrintBenchmarkInfo();
   return true;
 }
diff --git a/benchmark/python/benchmark_ppcls.py b/benchmark/python/benchmark_ppcls.py
index 20a62c9fc..34477456b 100755
--- a/benchmark/python/benchmark_ppcls.py
+++ b/benchmark/python/benchmark_ppcls.py
@@ -102,7 +102,10 @@ def build_option(args):
         elif backend in ["trt", "paddle_trt"]:
             option.use_trt_backend()
             if backend == "paddle_trt":
-                option.enable_paddle_to_trt()
+                option.use_paddle_infer_backend()
+                option.paddle_infer_option.enable_trt = True
+                # Set max_batch_size 1 for best performance
+                option.trt_option.max_batch_size = 1
             if enable_trt_fp16:
                 option.enable_trt_fp16()
         elif backend == "default":
diff --git a/benchmark/python/benchmark_ppdet.py b/benchmark/python/benchmark_ppdet.py
index c2b1da6b1..eca33df09 100755
--- a/benchmark/python/benchmark_ppdet.py
+++ b/benchmark/python/benchmark_ppdet.py
@@ -115,7 +115,8 @@ def build_option(args):
         elif backend in ["trt", "paddle_trt"]:
             option.use_trt_backend()
             if backend == "paddle_trt":
-                option.enable_paddle_to_trt()
+                option.use_paddle_infer_backend()
+                option.paddle_infer_option.enable_trt = True
             if enable_trt_fp16:
                 option.enable_trt_fp16()
         elif backend == "default":
diff --git a/benchmark/python/benchmark_ppocr.py b/benchmark/python/benchmark_ppocr.py
old mode 100644
new mode 100755
index 2de86be68..90f11de34
--- a/benchmark/python/benchmark_ppocr.py
+++ b/benchmark/python/benchmark_ppocr.py
@@ -92,8 +92,9 @@ def build_option(args):
         elif backend in ["trt", "paddle_trt"]:
             option.use_trt_backend()
             if backend == "paddle_trt":
-                option.enable_paddle_trt_collect_shape()
-                option.enable_paddle_to_trt()
+                option.paddle_infer_option.collect_trt_shape = True
+                option.use_paddle_infer_backend()
+                option.paddle_infer_option.enable_trt = True
             if enable_trt_fp16:
                 option.enable_trt_fp16()
         elif backend == "default":
diff --git a/benchmark/python/benchmark_ppseg.py b/benchmark/python/benchmark_ppseg.py
index 9408e7b64..4ff4a3808 100755
--- a/benchmark/python/benchmark_ppseg.py
+++ b/benchmark/python/benchmark_ppseg.py
@@ -96,8 +96,9 @@ def build_option(args):
                 option.set_trt_input_shape("x", [1, 3, 192, 192],
                                            [1, 3, 192, 192], [1, 3, 192, 192])
             if backend == "paddle_trt":
-                option.enable_paddle_trt_collect_shape()
-                option.enable_paddle_to_trt()
+                option.paddle_infer_option.collect_trt_shape = True
+                option.use_paddle_infer_backend()
+                option.paddle_infer_option.enable_trt = True
             if enable_trt_fp16:
                 option.enable_trt_fp16()
         elif backend == "default":
diff --git a/benchmark/python/benchmark_uie.py b/benchmark/python/benchmark_uie.py
old mode 100644
new mode 100755
index 44c562d7e..e197ef16e
--- a/benchmark/python/benchmark_uie.py
+++ b/benchmark/python/benchmark_uie.py
@@ -76,8 +76,9 @@ def build_option(args):
     else:
         option.use_trt_backend()
         if args.backend == 'paddle_trt':
-            option.enable_paddle_to_trt()
-            option.enable_paddle_trt_collect_shape()
+            option.paddle_infer_option.collect_trt_shape = True
+            option.use_paddle_infer_backend()
+            option.paddle_infer_option.enable_trt = True
         trt_file = os.path.join(args.model_dir, "infer.trt")
         option.set_trt_input_shape(
             'input_ids',
diff --git a/benchmark/python/benchmark_yolo.py b/benchmark/python/benchmark_yolo.py
index e0ee0ce23..a07127200 100755
--- a/benchmark/python/benchmark_yolo.py
+++ b/benchmark/python/benchmark_yolo.py
@@ -85,7 +85,8 @@ def build_option(args):
         elif backend in ["trt", "paddle_trt"]:
             option.use_trt_backend()
             if backend == "paddle_trt":
-                option.enable_paddle_to_trt()
+                option.use_paddle_infer_backend()
+                option.paddle_infer_option.enable_trt = True
             if enable_trt_fp16:
                 option.enable_trt_fp16()
         elif backend == "default":

From 32af8707117e7a8d656531d86cd058ff3233e2e4 Mon Sep 17 00:00:00 2001
From: Zheng-Bicheng <58363586+Zheng-Bicheng@users.noreply.github.com>
Date: Wed, 15 Feb 2023 17:55:58 +0800
Subject: [PATCH 23/41] [Doc] Update RKYOLO Docs (#1330)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 更新docs

* 修正docs错误

* 更新docs

* 更新python example脚本和ppyoloe转换脚本

* 更新PaddleDetection文档

* 更新文档

* 更新文档

* 更新文档

* 更新文档

* 更新文档

* 更新RKYOLO系列模型文档

* 更新PaddleDetection python example
---
 .../paddledetection/rknpu2/README_CN.md       | 15 ++-
 .../paddledetection/rknpu2/cpp/CMakeLists.txt |  3 +
 .../paddledetection/rknpu2/cpp/README_CN.md   | 11 +--
 .../rknpu2/cpp/infer_ppyoloe_demo.cc          | 95 +++++++++++++++++++
 .../paddledetection/rknpu2/python/infer.py    |  4 +-
 examples/vision/detection/rkyolo/README_CN.md | 14 ++-
 6 files changed, 128 insertions(+), 14 deletions(-)
 create mode 100644 examples/vision/detection/paddledetection/rknpu2/cpp/infer_ppyoloe_demo.cc

diff --git a/examples/vision/detection/paddledetection/rknpu2/README_CN.md b/examples/vision/detection/paddledetection/rknpu2/README_CN.md
index 6a932fe3a..781368ddc 100644
--- a/examples/vision/detection/paddledetection/rknpu2/README_CN.md
+++ b/examples/vision/detection/paddledetection/rknpu2/README_CN.md
@@ -50,7 +50,7 @@ paddle2onnx --model_dir picodet_s_416_coco_lcnet \
 # 固定shape
 python -m paddle2onnx.optimize --input_model picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \
                                 --output_model picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \
-                                --input_shape_dict "{'image':[1,3,416,416]}"
+                                --input_shape_dict "{'image':[1,3,416,416], 'scale_factor':[1,2]}"
 ```
 
 ### 编写yaml文件
@@ -73,11 +73,12 @@ std:
 ```
 
 **修改outputs参数**
+
 由于Paddle2ONNX版本的不同，转换模型的输出节点名称也有所不同，请使用[Netron](https://netron.app)对模型进行可视化，并找到以下蓝色方框标记的NonMaxSuppression节点，红色方框的节点名称即为目标名称。
 
 例如，使用Netron可视化后，得到以下图片:
 
-![](https://user-images.githubusercontent.com/58363586/212599781-e1952da7-6eae-4951-8ca7-bab7e6940692.png)
+![](https://ai-studio-static-online.cdn.bcebos.com/8bce6b904a6b479e8b30da9f7c719fad57517ffb2f234aeca3b8ace0761754d5)
 
 找到蓝色方框标记的NonMaxSuppression节点，可以看到红色方框标记的两个节点名称为p2o.Div.79和p2o.Concat.9,因此需要修改outputs参数，修改后如下:
 
@@ -96,6 +97,16 @@ python tools/rknpu2/export.py --config_path tools/rknpu2/config/picodet_s_416_co
                               --target_platform rk3588
 ```
 
+## RKNN模型列表
+
+为了方便大家测试，我们提供picodet和ppyoloe两个模型，解压后即可使用:
+
+| 模型名称                        | 下载地址                                                                              |
+|-----------------------------|-----------------------------------------------------------------------------------|
+| picodet_s_416_coco_lcnet    | https://bj.bcebos.com/paddlehub/fastdeploy/rknpu2/picodet_s_416_coco_lcnet.zip    |
+| ppyoloe_plus_crn_s_80e_coco | https://bj.bcebos.com/paddlehub/fastdeploy/rknpu2/ppyoloe_plus_crn_s_80e_coco.zip |
+
+
 
 ## 其他链接
 
diff --git a/examples/vision/detection/paddledetection/rknpu2/cpp/CMakeLists.txt b/examples/vision/detection/paddledetection/rknpu2/cpp/CMakeLists.txt
index 41db4b136..6ecb3d52c 100644
--- a/examples/vision/detection/paddledetection/rknpu2/cpp/CMakeLists.txt
+++ b/examples/vision/detection/paddledetection/rknpu2/cpp/CMakeLists.txt
@@ -14,3 +14,6 @@ target_link_libraries(infer_picodet_demo ${FASTDEPLOY_LIBS})
 
 add_executable(infer_yolov8_demo ${PROJECT_SOURCE_DIR}/infer_yolov8_demo.cc)
 target_link_libraries(infer_yolov8_demo ${FASTDEPLOY_LIBS})
+
+add_executable(infer_ppyoloe_demo ${PROJECT_SOURCE_DIR}/infer_ppyoloe_demo.cc)
+target_link_libraries(infer_ppyoloe_demo ${FASTDEPLOY_LIBS})
diff --git a/examples/vision/detection/paddledetection/rknpu2/cpp/README_CN.md b/examples/vision/detection/paddledetection/rknpu2/cpp/README_CN.md
index 18ddfa77a..b98f0632c 100644
--- a/examples/vision/detection/paddledetection/rknpu2/cpp/README_CN.md
+++ b/examples/vision/detection/paddledetection/rknpu2/cpp/README_CN.md
@@ -12,7 +12,7 @@
 以上步骤请参考[RK2代NPU部署库编译](../../../../../../docs/cn/build_and_install/rknpu2.md)实现
 
 ```bash
-以picodet为例进行推理部署
+# 以picodet为例进行推理部署
 
 mkdir build
 cd build
@@ -23,6 +23,8 @@ cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
 make -j
 
 # 下载PPYOLOE模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/rknpu2/picodet_s_416_coco_lcnet.zip
+unzip picodet_s_416_coco_lcnet.zip
 wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
 
 # CPU推理
@@ -31,13 +33,6 @@ wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/0000000
 ./infer_picodet_demo ./picodet_s_416_coco_lcnet 000000014439.jpg 1
 ```
 
-## 运行例程
-
-```bash
-cd ./build/install
-./infer_picodet model/picodet_s_416_coco_lcnet images/000000014439.jpg
-```
-
 ## 文档导航
 
 - [模型介绍](../../)
diff --git a/examples/vision/detection/paddledetection/rknpu2/cpp/infer_ppyoloe_demo.cc b/examples/vision/detection/paddledetection/rknpu2/cpp/infer_ppyoloe_demo.cc
new file mode 100644
index 000000000..098c6a44e
--- /dev/null
+++ b/examples/vision/detection/paddledetection/rknpu2/cpp/infer_ppyoloe_demo.cc
@@ -0,0 +1,95 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+
+void ONNXInfer(const std::string& model_dir, const std::string& image_file) {
+  std::string model_file = model_dir + "/yolov8_n_500e_coco.onnx";
+  std::string params_file;
+  std::string config_file = model_dir + "/infer_cfg.yml";
+  auto option = fastdeploy::RuntimeOption();
+  option.UseCpu();
+  auto format = fastdeploy::ModelFormat::ONNX;
+
+  auto model = fastdeploy::vision::detection::PPYOLOE(
+      model_file, params_file, config_file, option, format);
+
+  fastdeploy::TimeCounter tc;
+  tc.Start();
+  auto im = cv::imread(image_file);
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+  tc.End();
+  tc.PrintInfo("PPDet in ONNX");
+
+  std::cout << res.Str() << std::endl;
+  cv::imwrite("infer_onnx.jpg", vis_im);
+  std::cout << "Visualized result saved in ./infer_onnx.jpg" << std::endl;
+}
+
+void RKNPU2Infer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + "/ppyoloe_plus_crn_s_80e_coco_rk3588_quantized.rknn";
+  auto params_file = "";
+  auto config_file = model_dir + "/infer_cfg.yml";
+
+  auto option = fastdeploy::RuntimeOption();
+  option.UseRKNPU2();
+
+  auto format = fastdeploy::ModelFormat::RKNN;
+
+  auto model = fastdeploy::vision::detection::PPYOLOE(
+      model_file, params_file, config_file, option, format);
+
+  model.GetPreprocessor().DisablePermute();
+  model.GetPreprocessor().DisableNormalize();
+  model.GetPostprocessor().ApplyDecodeAndNMS();
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::DetectionResult res;
+  fastdeploy::TimeCounter tc;
+  tc.Start();
+  if (!model.Predict(&im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+  tc.End();
+  tc.PrintInfo("PPDet in RKNPU2");
+
+  std::cout << res.Str() << std::endl;
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+  cv::imwrite("infer_rknpu2.jpg", vis_im);
+  std::cout << "Visualized result saved in ./infer_rknpu2.jpg" << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+  if (argc < 4) {
+    std::cout
+        << "Usage: infer_demo path/to/model_dir path/to/image run_option, "
+           "e.g ./infer_model ./picodet_model_dir ./test.jpeg"
+        << std::endl;
+    return -1;
+  }
+
+  if (std::atoi(argv[3]) == 0) {
+    ONNXInfer(argv[1], argv[2]);
+  } else if (std::atoi(argv[3]) == 1) {
+    RKNPU2Infer(argv[1], argv[2]);
+  }
+  return 0;
+}
diff --git a/examples/vision/detection/paddledetection/rknpu2/python/infer.py b/examples/vision/detection/paddledetection/rknpu2/python/infer.py
index 2dfb54281..2617acd95 100644
--- a/examples/vision/detection/paddledetection/rknpu2/python/infer.py
+++ b/examples/vision/detection/paddledetection/rknpu2/python/infer.py
@@ -22,11 +22,11 @@ def parse_arguments():
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--model_file",
-        default="./picodet_s_416_coco_lcnet_non_postprocess/picodet_xs_416_coco_lcnet.onnx",
+        default="./picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet_rk3588_unquantized.rknn",
         help="Path of rknn model.")
     parser.add_argument(
         "--config_file",
-        default="./picodet_s_416_coco_lcnet_non_postprocess/infer_cfg.yml",
+        default="./picodet_s_416_coco_lcnet/infer_cfg.yml",
         help="Path of config.")
     parser.add_argument(
         "--image",
diff --git a/examples/vision/detection/rkyolo/README_CN.md b/examples/vision/detection/rkyolo/README_CN.md
index 1cc44a920..982d0577f 100644
--- a/examples/vision/detection/rkyolo/README_CN.md
+++ b/examples/vision/detection/rkyolo/README_CN.md
@@ -6,11 +6,21 @@ RKYOLO参考[rknn_model_zoo](https://github.com/airockchip/rknn_model_zoo/tree/m
 
 ## 支持模型列表
 
+FastDeploy目前支持以下三个模型的部署:
+
 * RKYOLOV5
+* RKYOLOX
+* RKYOLOv7
 
-## 模型转换example
+为了方便大家测试，我们提供了三个转换过后的模型，大家可以直接下载使用。
+如果你有转换模型的需求，请参考[RKNN_model_convert](https://github.com/airockchip/rknn_model_zoo/tree/main/models/CV/object_detection/yolo/RKNN_model_convert)
+
+| 模型名称               | 下载地址                                                                |
+|--------------------|---------------------------------------------------------------------|
+| yolov5-s-relu-int8 | https://bj.bcebos.com/paddlehub/fastdeploy/rknpu2/yolov5-s-relu.zip |
+| yolov7-tiny-int8   | https://bj.bcebos.com/paddlehub/fastdeploy/rknpu2/yolov7-tiny.zip   |
+| yolox-s-int8       | https://bj.bcebos.com/paddlehub/fastdeploy/rknpu2/yolox-s.zip       |
 
-请参考[RKNN_model_convert](https://github.com/airockchip/rknn_model_zoo/tree/main/models/CV/object_detection/yolo/RKNN_model_convert)
 
 
 ## 其他链接

From a79d17b8c616495552cb24245a959df490193cf9 Mon Sep 17 00:00:00 2001
From: Zheng-Bicheng <58363586+Zheng-Bicheng@users.noreply.github.com>
Date: Wed, 15 Feb 2023 17:59:36 +0800
Subject: [PATCH 24/41] [Bug Fix] Update pphumanseg rknpu2 example (#1255)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

更新pphumanseg example格式
---
 FastDeployCSharp.cmake                        |  3 +-
 .../rockchip/rknpu2/cpp/CMakeLists.txt        | 32 ++--------
 .../paddleseg/rockchip/rknpu2/cpp/README.md   | 59 ++++---------------
 .../paddleseg/rockchip/rknpu2/cpp/infer.cc    | 22 ++++---
 4 files changed, 30 insertions(+), 86 deletions(-)

diff --git a/FastDeployCSharp.cmake b/FastDeployCSharp.cmake
index 3a3708140..43f798b36 100644
--- a/FastDeployCSharp.cmake
+++ b/FastDeployCSharp.cmake
@@ -1,4 +1,4 @@
-list(APPEND FASTDEPLOY_DOTNET_REFERENCES 
+list(APPEND FASTDEPLOY_DOTNET_REFERENCES
     "Microsoft.CSharp"
     "System"
     "System.Core"
@@ -11,4 +11,3 @@ list(APPEND FASTDEPLOY_DOTNET_REFERENCES
     "${CMAKE_CURRENT_LIST_DIR}/csharp_lib/fastdeploy_csharp.dll")
 
 set(FASTDEPLOY_PACKAGE_REFERENCES "OpenCvSharp4_4.7.0.20230115;OpenCvSharp4.runtime.win_4.7.0.20230115")
-
diff --git a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/CMakeLists.txt b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/CMakeLists.txt
index 7fc1f6e22..b723e4691 100644
--- a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/CMakeLists.txt
+++ b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/CMakeLists.txt
@@ -1,36 +1,12 @@
 CMAKE_MINIMUM_REQUIRED(VERSION 3.10)
-project(rknpu_test)
+project(infer_demo)
 
 set(CMAKE_CXX_STANDARD 14)
 
 # 指定下载解压后的fastdeploy库路径
-set(FASTDEPLOY_INSTALL_DIR "thirdpartys/fastdeploy-0.0.3")
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
 
 include(${FASTDEPLOY_INSTALL_DIR}/FastDeployConfig.cmake)
 include_directories(${FastDeploy_INCLUDE_DIRS})
-add_executable(rknpu_test infer.cc)
-target_link_libraries(rknpu_test
-        ${FastDeploy_LIBS}
-        )
-
-set(CMAKE_INSTALL_PREFIX ${CMAKE_SOURCE_DIR}/build/install)
-
-install(TARGETS rknpu_test DESTINATION ./)
-
-install(DIRECTORY model DESTINATION ./)
-install(DIRECTORY images DESTINATION ./)
-
-file(GLOB FASTDEPLOY_LIBS ${FASTDEPLOY_INSTALL_DIR}/lib/*)
-message("${FASTDEPLOY_LIBS}")
-install(PROGRAMS ${FASTDEPLOY_LIBS} DESTINATION lib)
-
-file(GLOB ONNXRUNTIME_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/onnxruntime/lib/*)
-install(PROGRAMS ${ONNXRUNTIME_LIBS} DESTINATION lib)
-
-install(DIRECTORY ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/opencv/lib DESTINATION ./)
-
-file(GLOB PADDLETOONNX_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/paddle2onnx/lib/*)
-install(PROGRAMS ${PADDLETOONNX_LIBS} DESTINATION lib)
-
-file(GLOB RKNPU2_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/rknpu2_runtime/RK3588/lib/*)
-install(PROGRAMS ${RKNPU2_LIBS} DESTINATION lib)
+add_executable(infer_demo infer.cc)
+target_link_libraries(infer_demo ${FastDeploy_LIBS})
\ No newline at end of file
diff --git a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/README.md b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/README.md
index b6d4c18bf..41da93579 100644
--- a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/README.md
+++ b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/README.md
@@ -10,67 +10,30 @@
 
 以上步骤请参考[RK2代NPU部署库编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/rknpu2.md)实现
 
-## 生成基本目录文件
+## 转换模型
 
-该例程由以下几个部分组成
-```text
-.
-├── CMakeLists.txt
-├── build  # 编译文件夹
-├── image  # 存放图片的文件夹
-├── infer_cpu_npu.cc
-├── infer_cpu_npu.h
-├── main.cc
-├── model  # 存放模型文件的文件夹
-└── thirdpartys  # 存放sdk的文件夹
-```
+模型转换代码请参考[模型转换文档](../README_CN.md)
 
-首先需要先生成目录结构
-```bash
-mkdir build
-mkdir images
-mkdir model
-mkdir thirdpartys
-```
+## 编译SDK
 
-## 编译
-
-### 编译并拷贝SDK到thirdpartys文件夹
-
-请参考[RK2代NPU部署库编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/rknpu2.md)仓库编译SDK，编译完成后，将在build目录下生成fastdeploy-x-x-x目录，请移动它至thirdpartys目录下.
-
-### 拷贝模型文件，以及配置文件至model文件夹
-在Paddle动态图模型 -> Paddle静态图模型 -> ONNX模型的过程中，将生成ONNX文件以及对应的yaml配置文件，请将配置文件存放到model文件夹内。
-转换为RKNN后的模型文件也需要拷贝至model，输入以下命令下载使用(模型文件为RK3588，RK3568需要重新[转换PPSeg RKNN模型](../README.md))。
-
-### 准备测试图片至image文件夹
-```bash
-wget https://paddleseg.bj.bcebos.com/dygraph/pp_humanseg_v2/images.zip
-unzip -qo images.zip
-```
+请参考[RK2代NPU部署库编译](../../../../../../docs/cn/faq/rknpu2/build.md)编译SDK.
 
 ### 编译example
 
 ```bash
-cd build
-cmake ..
+mkdir build && cd build
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
 make -j8
-make install
-```
 
-## 运行例程
+wget https://paddleseg.bj.bcebos.com/dygraph/pp_humanseg_v2/images.zip
+unzip -qo images.zip
 
-```bash
-cd ./build/install
-./rknpu_test model/Portrait_PP_HumanSegV2_Lite_256x144_infer/ images/portrait_heng.jpg
+./infer_demo model/Portrait_PP_HumanSegV2_Lite_256x144_infer/ images/portrait_heng.jpg
 ```
 
 ## 注意事项
-RKNPU上对模型的输入要求是使用NHWC格式，且图片归一化操作会在转RKNN模型时，内嵌到模型中，因此我们在使用FastDeploy部署时，需要先调用DisableNormalizeAndPermute(C++)或`disable_normalize_and_permute(Python)，在预处理阶段禁用归一化以及数据格式的转换
+RKNPU上对模型的输入要求是使用NHWC格式，且图片归一化操作会在转RKNN模型时，内嵌到模型中，因此我们在使用FastDeploy部署时，需要先调用DisableNormalizeAndPermute(C++)或`disable_normalize_and_permute(Python)，在预处理阶段禁用归一化以及数据格式的转换。
 
-## 快速链接
-- [FastDeploy部署PaddleSeg模型概览](../../)
+- [模型介绍](../../)
 - [Python部署](../python)
 - [转换PPSeg RKNN模型文档](../README.md)
-- [PaddleSeg C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1segmentation.html)
-)
diff --git a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/infer.cc b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/infer.cc
index b501fc899..42b38ba30 100644
--- a/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/infer.cc
+++ b/examples/vision/segmentation/paddleseg/rockchip/rknpu2/cpp/infer.cc
@@ -16,8 +16,7 @@
 #include "fastdeploy/vision.h"
 
 void ONNXInfer(const std::string& model_dir, const std::string& image_file) {
-  std::string model_file =
-      model_dir + "/Portrait_PP_HumanSegV2_Lite_256x144_infer.onnx";
+  std::string model_file = model_dir + "/Portrait_PP_HumanSegV2_Lite_256x144_infer.onnx";
   std::string params_file;
   std::string config_file = model_dir + "/deploy.yaml";
   auto option = fastdeploy::RuntimeOption();
@@ -44,12 +43,13 @@ void ONNXInfer(const std::string& model_dir, const std::string& image_file) {
   tc.PrintInfo("PPSeg in ONNX");
 
   cv::imwrite("infer_onnx.jpg", vis_im);
-  std::cout << "Visualized result saved in ./infer_onnx.jpg" << std::endl;
+  std::cout
+      << "Visualized result saved in ./infer_onnx.jpg"
+      << std::endl;
 }
 
 void RKNPU2Infer(const std::string& model_dir, const std::string& image_file) {
-  std::string model_file =
-      model_dir + "/Portrait_PP_HumanSegV2_Lite_256x144_infer_rk3588.rknn";
+  std::string model_file = model_dir + "/Portrait_PP_HumanSegV2_Lite_256x144_infer_rk3588.rknn";
   std::string params_file;
   std::string config_file = model_dir + "/deploy.yaml";
   auto option = fastdeploy::RuntimeOption();
@@ -78,7 +78,9 @@ void RKNPU2Infer(const std::string& model_dir, const std::string& image_file) {
   tc.PrintInfo("PPSeg in RKNPU2");
 
   cv::imwrite("infer_rknn.jpg", vis_im);
-  std::cout << "Visualized result saved in ./infer_rknn.jpg" << std::endl;
+  std::cout
+      << "Visualized result saved in ./infer_rknn.jpg"
+      << std::endl;
 }
 
 int main(int argc, char* argv[]) {
@@ -90,7 +92,11 @@ int main(int argc, char* argv[]) {
     return -1;
   }
 
-  RKNPU2Infer(argv[1], argv[2]);
-  //  ONNXInfer(argv[1], argv[2]);
+  if (std::atoi(argv[3]) == 0) {
+    ONNXInfer(argv[1], argv[2]);
+  } else if (std::atoi(argv[3]) == 1) {
+    RKNPU2Infer(argv[1], argv[2]);
+  }
   return 0;
 }
+

From c948b72e6c5285dbd50b2a90306ecfdf1eda9fbf Mon Sep 17 00:00:00 2001
From: Zheng-Bicheng <zheng_bicheng@outlook.com>
Date: Wed, 15 Feb 2023 18:08:02 +0800
Subject: [PATCH 25/41] =?UTF-8?q?=E6=9B=B4=E6=96=B0PPHumanSeg=20Example?=
 =?UTF-8?q?=E4=BB=A3=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docs/cn/faq/rknpu2/rknpu2.md | 37 +++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/docs/cn/faq/rknpu2/rknpu2.md b/docs/cn/faq/rknpu2/rknpu2.md
index 99554e5ba..1426e0cd3 100644
--- a/docs/cn/faq/rknpu2/rknpu2.md
+++ b/docs/cn/faq/rknpu2/rknpu2.md
@@ -8,25 +8,28 @@ RKNPU2模型导出只支持在x86Linux平台上进行导出，安装流程请参
 ONNX模型不能直接调用RK芯片中的NPU进行运算，需要把ONNX模型转换为RKNN模型，具体流程请查看[RKNPU2转换文档](./export.md)
 
 ## RKNPU2已经支持的模型列表
-以下环境测试的速度均为端到端，测试环境如下:
-* 设备型号: RK3588
-* ARM CPU使用ONNX框架进行测试
+
+FastDeploy在RK3588s上进行了测试，测试环境如下:
+
+* 设备型号: RK3588-s
 * NPU均使用单核进行测试
 
-| 任务场景                 | 模型                                                                                               | 模型版本(表示已经测试的版本)          | ARM CPU/RKNN速度(ms) |
-|----------------------|--------------------------------------------------------------------------------------------------|--------------------------|--------------------|
-| Detection            | [Picodet](../../../../examples/vision/detection/paddledetection/rknpu2/README.md)                | Picodet-s                | 162/112            |
-| Detection            | [PaddleDetection Yolov8](../../../../examples/vision/detection/paddledetection/rknpu2/README.md) | yolov8-n                 | -/100              |
-| Detection            | [PPYOLOE](../../../../examples/vision/detection/paddledetection/rknpu2/README.md)                | ppyoloe-s(int8)          | -/77               |
-| Detection            | [RKYOLOV5](../../../../examples/vision/detection/rkyolo/README.md)                               | YOLOV5-S-Relu(int8)      | -/57               |
-| Detection            | [RKYOLOX](../../../../examples/vision/detection/rkyolo/README.md)                                | -                        | -/-                |
-| Detection            | [RKYOLOV7](../../../../examples/vision/detection/rkyolo/README.md)                               | -                        | -/-                |
-| Segmentation         | [Unet](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md)                      | Unet-cityscapes          | -/-                |
-| Segmentation         | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md)         | portrait(int8)           | 133/43             |
-| Segmentation         | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md)         | human(int8)              | 133/43             |
-| Face Detection       | [SCRFD](../../../../examples/vision/facedet/scrfd/rknpu2/README.md)                              | SCRFD-2.5G-kps-640(int8) | 108/42             |
-| Face FaceRecognition | [InsightFace](../../../../examples/vision/faceid/insightface/rknpu2/README_CN.md)                | ms1mv3_arcface_r18(int8) | 81/12              |
-| Classification       | [ResNet](../../../../examples/vision/classification/paddleclas/rknpu2/README.md)                 | ResNet50_vd              | -/33               |
+以下环境测试的速度均为端到端测试速度根据芯片体质的不同，速度会上下有所浮动，仅供参考。
+
+| 任务场景                 | 模型及其example                                                                                      | 模型版本                     | 是否量化 | RKNN速度(ms) |
+|----------------------|--------------------------------------------------------------------------------------------------|--------------------------|------|------------|
+| Classification       | [ResNet](../../../../examples/vision/classification/paddleclas/rknpu2/README.md)                 | ResNet50_vd              | 否    | 33         |
+| Detection            | [Picodet](../../../../examples/vision/detection/paddledetection/rknpu2/README.md)                | Picodet-s                | 否    | 112        |
+| Detection            | [PaddleDetection Yolov8](../../../../examples/vision/detection/paddledetection/rknpu2/README.md) | yolov8-n                 | 否    | 100        |
+| Detection            | [PPYOLOE](../../../../examples/vision/detection/paddledetection/rknpu2/README.md)                | ppyoloe-s(int8)          | 是    | 141        |
+| Detection            | [RKYOLOV5](../../../../examples/vision/detection/rkyolo/README.md)                               | YOLOV5-S-Relu(int8)      | 是    | 57         |
+| Detection            | [RKYOLOX](../../../../examples/vision/detection/rkyolo/README.md)                                | yolox-s                  | 是    | 130        |
+| Detection            | [RKYOLOV7](../../../../examples/vision/detection/rkyolo/README.md)                               | yolov7-tiny              | 是    | 58         |
+| Segmentation         | [Unet](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md)                      | Unet-cityscapes          | 否    | -          |
+| Segmentation         | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md)         | portrait(int8)           | 是    | 43         |
+| Segmentation         | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md)         | human(int8)              | 是    | 43         |
+| Face Detection       | [SCRFD](../../../../examples/vision/facedet/scrfd/rknpu2/README.md)                              | SCRFD-2.5G-kps-640(int8) | 是    | 42         |
+| Face FaceRecognition | [InsightFace](../../../../examples/vision/faceid/insightface/rknpu2/README_CN.md)                | ms1mv3_arcface_r18(int8) | 是    | 12         |
 
 ## 预编译库下载
 

From 5f667b5a0c894429fb84a810bb1ca196c77567bd Mon Sep 17 00:00:00 2001
From: leiqing <54695910+leiqing1@users.noreply.github.com>
Date: Wed, 15 Feb 2023 18:52:00 +0800
Subject: [PATCH 26/41] Update README_CN.md

---
 docs/README_CN.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/README_CN.md b/docs/README_CN.md
index 227db0407..e0f7584f2 100755
--- a/docs/README_CN.md
+++ b/docs/README_CN.md
@@ -13,6 +13,7 @@
 - [瑞芯微RK3588、RK356X部署环境编译安装](cn/build_and_install/rknpu2.md)
 - [晶晨A311D部署环境编译安装](cn/build_and_install/a311d.md)
 - [华为昇腾部署环境编译安装](cn/build_and_install/huawei_ascend.md)
+- [算能TPU部署环境编译安装](cn/build_and_install/sophgo.md)
 - [Jetson部署环境编译安装](cn/build_and_install/jetson.md)
 - [Android平台部署环境编译安装](cn/build_and_install/android.md)
 - [服务化部署镜像编译安装](../serving/docs/zh_CN/compile.md)

From bdfb7b00088c57d4ad55d68de366bb09c81562b9 Mon Sep 17 00:00:00 2001
From: Zheng-Bicheng <58363586+Zheng-Bicheng@users.noreply.github.com>
Date: Wed, 15 Feb 2023 19:18:34 +0800
Subject: [PATCH 27/41] [Doc] Update RKNPU2 Docs (#1337)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

更新PPHumanSeg Example代码
---
 docs/cn/faq/rknpu2/rknpu2.md | 37 +++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/docs/cn/faq/rknpu2/rknpu2.md b/docs/cn/faq/rknpu2/rknpu2.md
index 99554e5ba..1426e0cd3 100644
--- a/docs/cn/faq/rknpu2/rknpu2.md
+++ b/docs/cn/faq/rknpu2/rknpu2.md
@@ -8,25 +8,28 @@ RKNPU2模型导出只支持在x86Linux平台上进行导出，安装流程请参
 ONNX模型不能直接调用RK芯片中的NPU进行运算，需要把ONNX模型转换为RKNN模型，具体流程请查看[RKNPU2转换文档](./export.md)
 
 ## RKNPU2已经支持的模型列表
-以下环境测试的速度均为端到端，测试环境如下:
-* 设备型号: RK3588
-* ARM CPU使用ONNX框架进行测试
+
+FastDeploy在RK3588s上进行了测试，测试环境如下:
+
+* 设备型号: RK3588-s
 * NPU均使用单核进行测试
 
-| 任务场景                 | 模型                                                                                               | 模型版本(表示已经测试的版本)          | ARM CPU/RKNN速度(ms) |
-|----------------------|--------------------------------------------------------------------------------------------------|--------------------------|--------------------|
-| Detection            | [Picodet](../../../../examples/vision/detection/paddledetection/rknpu2/README.md)                | Picodet-s                | 162/112            |
-| Detection            | [PaddleDetection Yolov8](../../../../examples/vision/detection/paddledetection/rknpu2/README.md) | yolov8-n                 | -/100              |
-| Detection            | [PPYOLOE](../../../../examples/vision/detection/paddledetection/rknpu2/README.md)                | ppyoloe-s(int8)          | -/77               |
-| Detection            | [RKYOLOV5](../../../../examples/vision/detection/rkyolo/README.md)                               | YOLOV5-S-Relu(int8)      | -/57               |
-| Detection            | [RKYOLOX](../../../../examples/vision/detection/rkyolo/README.md)                                | -                        | -/-                |
-| Detection            | [RKYOLOV7](../../../../examples/vision/detection/rkyolo/README.md)                               | -                        | -/-                |
-| Segmentation         | [Unet](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md)                      | Unet-cityscapes          | -/-                |
-| Segmentation         | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md)         | portrait(int8)           | 133/43             |
-| Segmentation         | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md)         | human(int8)              | 133/43             |
-| Face Detection       | [SCRFD](../../../../examples/vision/facedet/scrfd/rknpu2/README.md)                              | SCRFD-2.5G-kps-640(int8) | 108/42             |
-| Face FaceRecognition | [InsightFace](../../../../examples/vision/faceid/insightface/rknpu2/README_CN.md)                | ms1mv3_arcface_r18(int8) | 81/12              |
-| Classification       | [ResNet](../../../../examples/vision/classification/paddleclas/rknpu2/README.md)                 | ResNet50_vd              | -/33               |
+以下环境测试的速度均为端到端测试速度根据芯片体质的不同，速度会上下有所浮动，仅供参考。
+
+| 任务场景                 | 模型及其example                                                                                      | 模型版本                     | 是否量化 | RKNN速度(ms) |
+|----------------------|--------------------------------------------------------------------------------------------------|--------------------------|------|------------|
+| Classification       | [ResNet](../../../../examples/vision/classification/paddleclas/rknpu2/README.md)                 | ResNet50_vd              | 否    | 33         |
+| Detection            | [Picodet](../../../../examples/vision/detection/paddledetection/rknpu2/README.md)                | Picodet-s                | 否    | 112        |
+| Detection            | [PaddleDetection Yolov8](../../../../examples/vision/detection/paddledetection/rknpu2/README.md) | yolov8-n                 | 否    | 100        |
+| Detection            | [PPYOLOE](../../../../examples/vision/detection/paddledetection/rknpu2/README.md)                | ppyoloe-s(int8)          | 是    | 141        |
+| Detection            | [RKYOLOV5](../../../../examples/vision/detection/rkyolo/README.md)                               | YOLOV5-S-Relu(int8)      | 是    | 57         |
+| Detection            | [RKYOLOX](../../../../examples/vision/detection/rkyolo/README.md)                                | yolox-s                  | 是    | 130        |
+| Detection            | [RKYOLOV7](../../../../examples/vision/detection/rkyolo/README.md)                               | yolov7-tiny              | 是    | 58         |
+| Segmentation         | [Unet](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md)                      | Unet-cityscapes          | 否    | -          |
+| Segmentation         | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md)         | portrait(int8)           | 是    | 43         |
+| Segmentation         | [PP-HumanSegV2Lite](../../../../examples/vision/segmentation/paddleseg/rknpu2/README.md)         | human(int8)              | 是    | 43         |
+| Face Detection       | [SCRFD](../../../../examples/vision/facedet/scrfd/rknpu2/README.md)                              | SCRFD-2.5G-kps-640(int8) | 是    | 42         |
+| Face FaceRecognition | [InsightFace](../../../../examples/vision/faceid/insightface/rknpu2/README_CN.md)                | ms1mv3_arcface_r18(int8) | 是    | 12         |
 
 ## 预编译库下载
 

From 4ccfbead16524411c50d9bae999348e415e4003d Mon Sep 17 00:00:00 2001
From: Zheng-Bicheng <zheng_bicheng@outlook.com>
Date: Wed, 15 Feb 2023 19:25:44 +0800
Subject: [PATCH 28/41] =?UTF-8?q?=E6=9B=B4=E6=96=B0Preprocess=E4=BB=A3?=
 =?UTF-8?q?=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../keypointdet/pptinypose/pptinypose.cc      | 19 ++++++++++++-------
 .../keypointdet/pptinypose/pptinypose.h       | 18 +++++++++++++++++-
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc b/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc
index a6de59c9e..d3668f8a5 100644
--- a/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc
+++ b/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc
@@ -21,6 +21,7 @@ PPTinyPose::PPTinyPose(const std::string& model_file,
                         Backend::LITE};
   valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
   valid_kunlunxin_backends = {Backend::LITE};
+  valid_rknpu_backends = {Backend::RKNPU2};
   runtime_option = custom_option;
   runtime_option.model_format = model_format;
   runtime_option.model_file = model_file;
@@ -66,14 +67,18 @@ bool PPTinyPose::BuildPreprocessPipelineFromConfig() {
   for (const auto& op : cfg["Preprocess"]) {
     std::string op_name = op["type"].as<std::string>();
     if (op_name == "NormalizeImage") {
-      auto mean = op["mean"].as<std::vector<float>>();
-      auto std = op["std"].as<std::vector<float>>();
-      bool is_scale = op["is_scale"].as<bool>();
-      processors_.push_back(std::make_shared<Normalize>(mean, std, is_scale));
+      if (!disable_normalize_) {
+        auto mean = op["mean"].as<std::vector<float>>();
+        auto std = op["std"].as<std::vector<float>>();
+        bool is_scale = op["is_scale"].as<bool>();
+        processors_.push_back(std::make_shared<Normalize>(mean, std, is_scale));
+      }
     } else if (op_name == "Permute") {
-      // permute = cast<float> + HWC2CHW
-      processors_.push_back(std::make_shared<Cast>("float"));
-      processors_.push_back(std::make_shared<HWC2CHW>());
+      if (!disable_permute_) {
+        // permute = cast<float> + HWC2CHW
+        processors_.push_back(std::make_shared<Cast>("float"));
+        processors_.push_back(std::make_shared<HWC2CHW>());
+      }
     } else if (op_name == "TopDownEvalAffine") {
       auto trainsize = op["trainsize"].as<std::vector<int>>();
       int height = trainsize[1];
diff --git a/fastdeploy/vision/keypointdet/pptinypose/pptinypose.h b/fastdeploy/vision/keypointdet/pptinypose/pptinypose.h
index bfa8a8e60..df114b2bd 100644
--- a/fastdeploy/vision/keypointdet/pptinypose/pptinypose.h
+++ b/fastdeploy/vision/keypointdet/pptinypose/pptinypose.h
@@ -30,7 +30,7 @@ namespace keypointdetection {
  */
 class FASTDEPLOY_DECL PPTinyPose : public FastDeployModel {
  public:
- /** \brief Set path of model file and configuration file, and the configuration of runtime
+  /** \brief Set path of model file and configuration file, and the configuration of runtime
    *
    * \param[in] model_file Path of model file, e.g pptinypose/model.pdmodel
    * \param[in] params_file Path of parameter file, e.g pptinypose/model.pdiparams, if the model format is ONNX, this parameter will be ignored
@@ -68,6 +68,18 @@ class FASTDEPLOY_DECL PPTinyPose : public FastDeployModel {
    */
   bool use_dark = true;
 
+  /// This function will disable normalize in preprocessing step.
+  void DisableNormalize() {
+    disable_normalize_ = true;
+    BuildPreprocessPipelineFromConfig();
+  }
+
+  /// This function will disable hwc2chw in preprocessing step.
+  void DisablePermute() {
+    disable_permute_ = true;
+    BuildPreprocessPipelineFromConfig();
+  }
+
  protected:
   bool Initialize();
   /// Build the preprocess pipeline from the loaded model
@@ -84,6 +96,10 @@ class FASTDEPLOY_DECL PPTinyPose : public FastDeployModel {
  private:
   std::vector<std::shared_ptr<Processor>> processors_;
   std::string config_file_;
+  // for recording the switch of hwc2chw
+  bool disable_permute_ = false;
+  // for recording the switch of normalize
+  bool disable_normalize_ = false;
 };
 }  // namespace keypointdetection
 }  // namespace vision

From f900199c02b088ae4005588064b72d81ae8af14b Mon Sep 17 00:00:00 2001
From: Zheng-Bicheng <zheng_bicheng@outlook.com>
Date: Wed, 15 Feb 2023 19:53:48 +0800
Subject: [PATCH 29/41] =?UTF-8?q?=E6=9B=B4=E6=96=B0example=20=E5=92=8C?=
 =?UTF-8?q?=E6=A8=A1=E5=9E=8B=E8=BD=AC=E6=8D=A2=E4=BB=A3=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../tiny_pose/rknpu2/README.md                | 55 ++++++++++++
 .../tiny_pose/rknpu2/cpp/CMakeLists.txt       | 13 +++
 .../tiny_pose/rknpu2/cpp/README.md            | 85 +++++++++++++++++++
 .../tiny_pose/rknpu2/cpp/pptinypose_infer.cc  | 70 +++++++++++++++
 .../paddleseg/sophgo/python/README.md         |  2 +-
 .../PP_TinyPose_256x192_unquantized.yaml      | 15 ++++
 6 files changed, 239 insertions(+), 1 deletion(-)
 create mode 100644 examples/vision/keypointdetection/tiny_pose/rknpu2/README.md
 create mode 100644 examples/vision/keypointdetection/tiny_pose/rknpu2/cpp/CMakeLists.txt
 create mode 100644 examples/vision/keypointdetection/tiny_pose/rknpu2/cpp/README.md
 create mode 100755 examples/vision/keypointdetection/tiny_pose/rknpu2/cpp/pptinypose_infer.cc
 create mode 100644 tools/rknpu2/config/PP_TinyPose_256x192_unquantized.yaml

diff --git a/examples/vision/keypointdetection/tiny_pose/rknpu2/README.md b/examples/vision/keypointdetection/tiny_pose/rknpu2/README.md
new file mode 100644
index 000000000..f7f270c0a
--- /dev/null
+++ b/examples/vision/keypointdetection/tiny_pose/rknpu2/README.md
@@ -0,0 +1,55 @@
+[English](README.md) | 简体中文
+# PP-TinyPose RKNPU2部署示例
+
+## 模型版本说明
+
+- [PaddleDetection release/2.5](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5)
+
+目前FastDeploy支持如下模型的部署
+
+- [PP-TinyPose系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/keypoint/tiny_pose/README.md)
+
+## 准备PP-TinyPose部署模型
+
+PP-TinyPose模型导出，请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/deploy/EXPORT_MODEL.md)
+
+**注意**:PP-TinyPose导出的模型包含`model.pdmodel`、`model.pdiparams`和`infer_cfg.yml`三个文件，FastDeploy会从yaml文件中获取模型在推理时需要的预处理信息。
+
+## 模型转换example
+
+### Paddle模型转换为ONNX模型
+
+由于Rockchip提供的rknn-toolkit2工具暂时不支持Paddle模型直接导出为RKNN模型，因此需要先将Paddle模型导出为ONNX模型，再将ONNX模型转为RKNN模型。
+
+```bash
+# 下载Paddle静态图模型并解压
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz
+tar -xvf PP_TinyPose_256x192_infer.tgz
+
+# 静态图转ONNX模型，注意，这里的save_file请和压缩包名对齐
+paddle2onnx --model_dir PP_TinyPose_256x192_infer \
+            --model_filename model.pdmodel \
+            --params_filename model.pdiparams \
+            --save_file PP_TinyPose_256x192_infer/PP_TinyPose_256x192_infer.onnx \
+            --enable_dev_version True
+
+# 固定shape
+python -m paddle2onnx.optimize --input_model PP_TinyPose_256x192_infer/PP_TinyPose_256x192_infer.onnx \
+                                --output_model PP_TinyPose_256x192_infer/PP_TinyPose_256x192_infer.onnx \
+                                --input_shape_dict "{'image':[1,3,256,192]}"
+```
+
+### ONNX模型转RKNN模型
+
+为了方便大家使用，我们提供了python脚本，通过我们预配置的config文件，你将能够快速地转换ONNX模型到RKNN模型
+
+```bash
+python tools/rknpu2/export.py --config_path tools/rknpu2/config/PP_TinyPose_256x192_unquantized.yaml \
+                              --target_platform rk3588
+```
+
+## 详细部署文档
+
+- [模型详细介绍](../README_CN.md)
+- [Python部署](python)
+- [C++部署](cpp)
\ No newline at end of file
diff --git a/examples/vision/keypointdetection/tiny_pose/rknpu2/cpp/CMakeLists.txt b/examples/vision/keypointdetection/tiny_pose/rknpu2/cpp/CMakeLists.txt
new file mode 100644
index 000000000..0f492f44b
--- /dev/null
+++ b/examples/vision/keypointdetection/tiny_pose/rknpu2/cpp/CMakeLists.txt
@@ -0,0 +1,13 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.12)
+
+# 指定下载解压后的fastdeploy库路径
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+# 添加FastDeploy依赖头文件
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_tinypose_demo ${PROJECT_SOURCE_DIR}/pptinypose_infer.cc)
+target_link_libraries(infer_tinypose_demo ${FASTDEPLOY_LIBS})
diff --git a/examples/vision/keypointdetection/tiny_pose/rknpu2/cpp/README.md b/examples/vision/keypointdetection/tiny_pose/rknpu2/cpp/README.md
new file mode 100644
index 000000000..547229504
--- /dev/null
+++ b/examples/vision/keypointdetection/tiny_pose/rknpu2/cpp/README.md
@@ -0,0 +1,85 @@
+[English](README.md) | 简体中文
+# PP-TinyPose C++部署示例
+
+本目录下提供`pptinypose_infer.cc`快速完成PP-TinyPose通过NPU加速部署的`单图单人关键点检测`示例
+>> **注意**: PP-Tinypose单模型目前只支持单图单人关键点检测，因此输入的图片应只包含一个人或者进行过裁剪的图像。多人关键点检测请参考[PP-TinyPose Pipeline](../../../det_keypoint_unite/cpp/README.md)
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. 根据开发环境，下载预编译部署库和samples代码，参考[FastDeploy预编译库](../../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+
+以Linux上推理为例，在本目录执行如下命令即可完成编译测试，支持此模型需保证FastDeploy版本1.0.3以上(x.x.x>=1.0.3)
+
+```bash
+mkdir build
+cd build
+# 下载FastDeploy预编译库，用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-x.x.x.tgz
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j
+
+# 下载PP-TinyPose模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz
+tar -xvf PP_TinyPose_256x192_infer.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/hrnet_demo.jpg
+
+
+# CPU推理
+./infer_tinypose_demo PP_TinyPose_256x192_infer hrnet_demo.jpg
+```
+
+运行完成可视化结果如下图所示
+<div  align="center">  
+<img src="https://user-images.githubusercontent.com/16222477/196386764-dd51ad56-c410-4c54-9580-643f282f5a83.jpeg", width=359px, height=423px />
+</div>
+
+以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:  
+- [如何在Windows中使用FastDeploy C++ SDK](../../../../../docs/cn/faq/use_sdk_on_windows.md)
+
+## PP-TinyPose C++接口
+
+### PP-TinyPose类
+
+```c++
+fastdeploy::vision::keypointdetection::PPTinyPose(
+        const string& model_file,
+        const string& params_file = "",
+        const string& config_file,
+        const RuntimeOption& runtime_option = RuntimeOption(),
+        const ModelFormat& model_format = ModelFormat::PADDLE)
+```
+
+PPTinyPose模型加载和初始化，其中model_file为导出的Paddle模型格式。
+
+**参数**
+
+> * **model_file**(str): 模型文件路径
+> * **params_file**(str): 参数文件路径
+> * **config_file**(str): 推理部署配置文件
+> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
+> * **model_format**(ModelFormat): 模型格式，默认为Paddle格式
+
+#### Predict函数
+
+> ```c++
+> PPTinyPose::Predict(cv::Mat* im, KeyPointDetectionResult* result)
+> ```
+>
+> 模型预测接口，输入图像直接输出关键点检测结果。
+>
+> **参数**
+>
+> > * **im**: 输入图像，注意需为HWC，BGR格式
+> > * **result**: 关键点检测结果，包括关键点的坐标以及关键点对应的概率值, KeyPointDetectionResult说明参考[视觉模型预测结果](../../../../../docs/api/vision_results/)
+
+### 类成员属性
+#### 后处理参数
+> > * **use_dark**(bool): 是否使用DARK进行后处理[参考论文](https://arxiv.org/abs/1910.06278)
+
+- [模型介绍](../../)
+- [Python部署](../python)
+- [视觉模型预测结果](../../../../../docs/api/vision_results/)
+- [如何切换模型推理后端引擎](../../../../../docs/cn/faq/how_to_change_backend.md)
diff --git a/examples/vision/keypointdetection/tiny_pose/rknpu2/cpp/pptinypose_infer.cc b/examples/vision/keypointdetection/tiny_pose/rknpu2/cpp/pptinypose_infer.cc
new file mode 100755
index 000000000..3f8f66eb4
--- /dev/null
+++ b/examples/vision/keypointdetection/tiny_pose/rknpu2/cpp/pptinypose_infer.cc
@@ -0,0 +1,70 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+
+void RKNPU2Infer(const std::string& tinypose_model_dir,
+                 const std::string& image_file) {
+  auto tinypose_model_file =
+      tinypose_model_dir + "/picodet_s_416_coco_lcnet_rk3588.rknn";
+  auto tinypose_params_file = "";
+  auto tinypose_config_file = tinypose_model_dir + "infer_cfg.yml";
+  auto option = fastdeploy::RuntimeOption();
+  option.UseRKNPU2();
+  auto tinypose_model = fastdeploy::vision::keypointdetection::PPTinyPose(
+      tinypose_model_file, tinypose_params_file, tinypose_config_file, option);
+
+  if (!tinypose_model.Initialized()) {
+    std::cerr << "TinyPose Model Failed to initialize." << std::endl;
+    return;
+  }
+
+  tinypose_model.DisablePermute();
+  tinypose_model.DisableNormalize();
+
+  auto im = cv::imread(image_file);
+  fastdeploy::vision::KeyPointDetectionResult res;
+  if (!tinypose_model.Predict(&im, &res)) {
+    std::cerr << "TinyPose Prediction Failed." << std::endl;
+    return;
+  } else {
+    std::cout << "TinyPose Prediction Done!" << std::endl;
+  }
+
+  std::cout << res.Str() << std::endl;
+
+  auto tinypose_vis_im = fastdeploy::vision::VisKeypointDetection(im, res, 0.5);
+  cv::imwrite("tinypose_vis_result.jpg", tinypose_vis_im);
+  std::cout << "TinyPose visualized result saved in ./tinypose_vis_result.jpg"
+            << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+  if (argc < 4) {
+    std::cout << "Usage: infer_demo path/to/pptinypose_model_dir path/to/image "
+                 "run_option, "
+                 "e.g ./infer_model ./pptinypose_model_dir ./test.jpeg 0"
+              << std::endl;
+    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
+                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run "
+                 "with kunlunxin."
+              << std::endl;
+    return -1;
+  }
+
+  if (std::atoi(argv[3]) == 0) {
+    RKNPU2Infer(argv[1], argv[2]);
+  }
+  return 0;
+}
diff --git a/examples/vision/segmentation/paddleseg/sophgo/python/README.md b/examples/vision/segmentation/paddleseg/sophgo/python/README.md
index e646d6a90..55abb90f7 100644
--- a/examples/vision/segmentation/paddleseg/sophgo/python/README.md
+++ b/examples/vision/segmentation/paddleseg/sophgo/python/README.md
@@ -16,7 +16,7 @@ cd path/to/paddleseg/sophgo/python
 wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
 
 # PaddleSeg模型转换为bmodel模型
-将Paddle模型转换为SOPHGO bmodel模型，转换步骤参考[文档](../README_CN.md#将paddleseg推理模型转换为bmodel模型步骤)
+将Paddle模型转换为SOPHGO bmodel模型，转换步骤参考[文档](../README.md#将paddleseg推理模型转换为bmodel模型步骤)
 
 # 推理
 python3 infer.py --model_file ./bmodel/pp_liteseg_1684x_f32.bmodel --config_file ./bmodel/deploy.yaml --image cityscapes_demo.png
diff --git a/tools/rknpu2/config/PP_TinyPose_256x192_unquantized.yaml b/tools/rknpu2/config/PP_TinyPose_256x192_unquantized.yaml
new file mode 100644
index 000000000..28c2ab943
--- /dev/null
+++ b/tools/rknpu2/config/PP_TinyPose_256x192_unquantized.yaml
@@ -0,0 +1,15 @@
+mean:
+  -
+    - 123.675
+    - 116.28
+    - 103.53
+std:
+  -
+    - 58.395
+    - 57.12
+    - 57.375
+model_path: ./PP_TinyPose_256x192_infer/PP_TinyPose_256x192_infer.onnx
+outputs_nodes: ['conv2d_441.tmp_1']
+do_quantization: False
+dataset:
+output_folder: "./PP_TinyPose_256x192_infer"

From 53333c5db6a1311f605c4c283224d62ecd513b19 Mon Sep 17 00:00:00 2001
From: Zheng-Bicheng <zheng_bicheng@outlook.com>
Date: Wed, 15 Feb 2023 20:37:42 +0800
Subject: [PATCH 30/41] =?UTF-8?q?=E6=9B=B4=E6=96=B0example=20=E5=92=8C?=
 =?UTF-8?q?=E6=A8=A1=E5=9E=8B=E8=BD=AC=E6=8D=A2=E4=BB=A3=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../tiny_pose/rknpu2/cpp/README.md            | 16 +++++++---------
 .../tiny_pose/rknpu2/cpp/pptinypose_infer.cc  | 19 +++++++------------
 .../keypointdet/pptinypose/pptinypose.cc      | 12 ++++++++++++
 3 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/examples/vision/keypointdetection/tiny_pose/rknpu2/cpp/README.md b/examples/vision/keypointdetection/tiny_pose/rknpu2/cpp/README.md
index 547229504..843ebe682 100644
--- a/examples/vision/keypointdetection/tiny_pose/rknpu2/cpp/README.md
+++ b/examples/vision/keypointdetection/tiny_pose/rknpu2/cpp/README.md
@@ -15,9 +15,7 @@
 ```bash
 mkdir build
 cd build
-# 下载FastDeploy预编译库，用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
-wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
-tar xvf fastdeploy-linux-x64-x.x.x.tgz
+
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
 make -j
 
@@ -27,8 +25,8 @@ tar -xvf PP_TinyPose_256x192_infer.tgz
 wget https://bj.bcebos.com/paddlehub/fastdeploy/hrnet_demo.jpg
 
 
-# CPU推理
-./infer_tinypose_demo PP_TinyPose_256x192_infer hrnet_demo.jpg
+# NPU推理
+sudo ./infer_tinypose_demo ./PP_TinyPose_256x192_infer ./hrnet_demo.jpg
 ```
 
 运行完成可视化结果如下图所示
@@ -79,7 +77,7 @@ PPTinyPose模型加载和初始化，其中model_file为导出的Paddle模型格
 #### 后处理参数
 > > * **use_dark**(bool): 是否使用DARK进行后处理[参考论文](https://arxiv.org/abs/1910.06278)
 
-- [模型介绍](../../)
-- [Python部署](../python)
-- [视觉模型预测结果](../../../../../docs/api/vision_results/)
-- [如何切换模型推理后端引擎](../../../../../docs/cn/faq/how_to_change_backend.md)
+- [模型介绍](../../../)
+- [Python部署](../../python)
+- [视觉模型预测结果](../../../../../../docs/api/vision_results/)
+- [如何切换模型推理后端引擎](../../../../../../docs/cn/faq/how_to_change_backend.md)
diff --git a/examples/vision/keypointdetection/tiny_pose/rknpu2/cpp/pptinypose_infer.cc b/examples/vision/keypointdetection/tiny_pose/rknpu2/cpp/pptinypose_infer.cc
index 3f8f66eb4..f0b0a263e 100755
--- a/examples/vision/keypointdetection/tiny_pose/rknpu2/cpp/pptinypose_infer.cc
+++ b/examples/vision/keypointdetection/tiny_pose/rknpu2/cpp/pptinypose_infer.cc
@@ -17,13 +17,14 @@
 void RKNPU2Infer(const std::string& tinypose_model_dir,
                  const std::string& image_file) {
   auto tinypose_model_file =
-      tinypose_model_dir + "/picodet_s_416_coco_lcnet_rk3588.rknn";
+      tinypose_model_dir + "/PP_TinyPose_256x192_infer_rk3588_unquantized.rknn";
   auto tinypose_params_file = "";
-  auto tinypose_config_file = tinypose_model_dir + "infer_cfg.yml";
+  auto tinypose_config_file = tinypose_model_dir + "/infer_cfg.yml";
   auto option = fastdeploy::RuntimeOption();
   option.UseRKNPU2();
   auto tinypose_model = fastdeploy::vision::keypointdetection::PPTinyPose(
-      tinypose_model_file, tinypose_params_file, tinypose_config_file, option);
+      tinypose_model_file, tinypose_params_file, tinypose_config_file, option,
+      fastdeploy::RKNN);
 
   if (!tinypose_model.Initialized()) {
     std::cerr << "TinyPose Model Failed to initialize." << std::endl;
@@ -51,20 +52,14 @@ void RKNPU2Infer(const std::string& tinypose_model_dir,
 }
 
 int main(int argc, char* argv[]) {
-  if (argc < 4) {
+  if (argc < 3) {
     std::cout << "Usage: infer_demo path/to/pptinypose_model_dir path/to/image "
                  "run_option, "
-                 "e.g ./infer_model ./pptinypose_model_dir ./test.jpeg 0"
-              << std::endl;
-    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run "
-                 "with kunlunxin."
+                 "e.g ./infer_model ./pptinypose_model_dir ./test.jpeg"
               << std::endl;
     return -1;
   }
 
-  if (std::atoi(argv[3]) == 0) {
-    RKNPU2Infer(argv[1], argv[2]);
-  }
+  RKNPU2Infer(argv[1], argv[2]);
   return 0;
 }
diff --git a/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc b/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc
index d3668f8a5..598d8ae2b 100644
--- a/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc
+++ b/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc
@@ -139,6 +139,18 @@ bool PPTinyPose::Postprocess(std::vector<FDTensor>& infer_result,
            "Only support batch = 1 in FastDeploy now.");
   result->Clear();
 
+  if (infer_result.size() == 1) {
+    FDTensor result_copy = infer_result[0];
+    std::cout << "Reshape result_copy!" << std::endl;
+    result_copy.Reshape({result_copy.shape[0], result_copy.shape[1],
+                         result_copy.shape[2] * result_copy.shape[3]});
+    std::cout << "Resize infer_result!" << std::endl;
+    infer_result.resize(2);
+    std::cout << "Do ArgMax!" << std::endl;
+    function::ArgMax(result_copy,&infer_result[1],-1);
+    std::cout << "Done!" << std::endl;
+  }
+
   // Calculate output length
   int outdata_size =
       std::accumulate(infer_result[0].shape.begin(),

From 2b1631b563e2a9528c1b37962e3194fb4d65edb0 Mon Sep 17 00:00:00 2001
From: Zheng-Bicheng <zheng_bicheng@outlook.com>
Date: Wed, 15 Feb 2023 20:55:26 +0800
Subject: [PATCH 31/41] =?UTF-8?q?=E6=9B=B4=E6=96=B0pptinypose=E6=A8=A1?=
 =?UTF-8?q?=E5=9E=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../tiny_pose/rknpu2/cpp/pptinypose_infer.cc                | 1 +
 fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc      | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/examples/vision/keypointdetection/tiny_pose/rknpu2/cpp/pptinypose_infer.cc b/examples/vision/keypointdetection/tiny_pose/rknpu2/cpp/pptinypose_infer.cc
index f0b0a263e..d3c7f7b8c 100755
--- a/examples/vision/keypointdetection/tiny_pose/rknpu2/cpp/pptinypose_infer.cc
+++ b/examples/vision/keypointdetection/tiny_pose/rknpu2/cpp/pptinypose_infer.cc
@@ -18,6 +18,7 @@ void RKNPU2Infer(const std::string& tinypose_model_dir,
                  const std::string& image_file) {
   auto tinypose_model_file =
       tinypose_model_dir + "/PP_TinyPose_256x192_infer_rk3588_unquantized.rknn";
+  std::cout << tinypose_model_file << std::endl;
   auto tinypose_params_file = "";
   auto tinypose_config_file = tinypose_model_dir + "/infer_cfg.yml";
   auto option = fastdeploy::RuntimeOption();
diff --git a/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc b/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc
index 598d8ae2b..f4b3bfc6c 100644
--- a/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc
+++ b/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc
@@ -135,10 +135,12 @@ bool PPTinyPose::Postprocess(std::vector<FDTensor>& infer_result,
                              KeyPointDetectionResult* result,
                              const std::vector<float>& center,
                              const std::vector<float>& scale) {
-  FDASSERT(infer_result[1].shape[0] == 1,
+  FDASSERT(infer_result[0].shape[0] == 1,
            "Only support batch = 1 in FastDeploy now.");
   result->Clear();
 
+  std::cout << "Postprocess" << std::endl;
+  std::cout << "infer_result.size() is " << infer_result.size() << std::endl;
   if (infer_result.size() == 1) {
     FDTensor result_copy = infer_result[0];
     std::cout << "Reshape result_copy!" << std::endl;
@@ -206,12 +208,14 @@ bool PPTinyPose::Predict(cv::Mat* im, KeyPointDetectionResult* result) {
             << ModelName() << "." << std::endl;
     return false;
   }
+
   std::vector<FDTensor> infer_result;
   if (!Infer(processed_data, &infer_result)) {
     FDERROR << "Failed to inference while using model:" << ModelName() << "."
             << std::endl;
     return false;
   }
+
   if (!Postprocess(infer_result, result, center, scale)) {
     FDERROR << "Failed to postprocess while using model:" << ModelName() << "."
             << std::endl;

From 8c42b708f6b17d5c36f5eb35d9dea56d6983fd23 Mon Sep 17 00:00:00 2001
From: Zheng-Bicheng <zheng_bicheng@outlook.com>
Date: Wed, 15 Feb 2023 21:01:06 +0800
Subject: [PATCH 32/41] =?UTF-8?q?=E5=88=A0=E9=99=A4=E6=97=A0=E7=94=A8?=
 =?UTF-8?q?=E4=BB=A3=E7=A0=81=EF=BC=8C=E6=9B=B4=E6=96=B0python=E8=84=9A?=
 =?UTF-8?q?=E6=9C=AC?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../tiny_pose/rknpu2/README.md                |  4 +-
 .../tiny_pose/rknpu2/python/README_CN.md      | 70 +++++++++++++++++++
 .../rknpu2/python/pptinypose_infer.py         | 50 +++++++++++++
 .../keypointdet/pptinypose/pptinypose.cc      |  6 --
 4 files changed, 122 insertions(+), 8 deletions(-)
 create mode 100644 examples/vision/keypointdetection/tiny_pose/rknpu2/python/README_CN.md
 create mode 100755 examples/vision/keypointdetection/tiny_pose/rknpu2/python/pptinypose_infer.py

diff --git a/examples/vision/keypointdetection/tiny_pose/rknpu2/README.md b/examples/vision/keypointdetection/tiny_pose/rknpu2/README.md
index f7f270c0a..100234ac8 100644
--- a/examples/vision/keypointdetection/tiny_pose/rknpu2/README.md
+++ b/examples/vision/keypointdetection/tiny_pose/rknpu2/README.md
@@ -51,5 +51,5 @@ python tools/rknpu2/export.py --config_path tools/rknpu2/config/PP_TinyPose_256x
 ## 详细部署文档
 
 - [模型详细介绍](../README_CN.md)
-- [Python部署](python)
-- [C++部署](cpp)
\ No newline at end of file
+- [Python部署](./python)
+- [C++部署](./cpp)
\ No newline at end of file
diff --git a/examples/vision/keypointdetection/tiny_pose/rknpu2/python/README_CN.md b/examples/vision/keypointdetection/tiny_pose/rknpu2/python/README_CN.md
new file mode 100644
index 000000000..1a0f37d0b
--- /dev/null
+++ b/examples/vision/keypointdetection/tiny_pose/rknpu2/python/README_CN.md
@@ -0,0 +1,70 @@
+[English](README.md) | 简体中文
+# PP-TinyPose Python部署示例
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. 根据开发环境，下载预编译部署库和samples代码，参考[FastDeploy预编译库](../../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+本目录下提供`pptinypose_infer.py`快速完成PP-TinyPose在NPU加速部署的`单图单人关键点检测`示例。执行如下脚本即可完成
+
+>> **注意**: PP-Tinypose单模型目前只支持单图单人关键点检测，因此输入的图片应只包含一个人或者进行过裁剪的图像。多人关键点检测请参考[PP-TinyPose Pipeline](../../../det_keypoint_unite/python/README.md)
+
+```bash
+# 下载PP-TinyPose模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/hrnet_demo.jpg
+
+# CPU推理
+python pptinypose_infer.py --tinypose_model_dir PP_TinyPose_256x192_infer --image hrnet_demo.jpg
+```
+
+运行完成可视化结果如下图所示
+<div  align="center">  
+<img src="https://user-images.githubusercontent.com/16222477/196386764-dd51ad56-c410-4c54-9580-643f282f5a83.jpeg", width=359px, height=423px />
+</div>
+
+## PP-TinyPose Python接口
+
+```python
+fd.vision.keypointdetection.PPTinyPose(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+```
+
+PP-TinyPose模型加载和初始化，其中model_file, params_file以及config_file为训练模型导出的Paddle inference文件，具体请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.5/deploy/EXPORT_MODEL.md)
+
+**参数**
+
+> * **model_file**(str): 模型文件路径
+> * **params_file**(str): 参数文件路径
+> * **config_file**(str): 推理部署配置文件
+> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
+> * **model_format**(ModelFormat): 模型格式，默认为Paddle格式
+
+### predict函数
+
+> ```python
+> PPTinyPose.predict(input_image)
+> ```
+>
+> 模型预测结口，输入图像直接输出检测结果。
+>
+> **参数**
+>
+> > * **input_image**(np.ndarray): 输入数据，注意需为HWC，BGR格式
+
+> **返回**
+>
+> > 返回`fastdeploy.vision.KeyPointDetectionResult`结构体，结构体说明参考文档[视觉模型预测结果](../../../../../docs/api/vision_results/)
+
+### 类成员属性
+#### 后处理参数
+用户可按照自己的实际需求，修改下列后处理参数，从而影响最终的推理和部署效果
+
+> > * **use_dark**(bool): 是否使用DARK进行后处理[参考论文](https://arxiv.org/abs/1910.06278)
+
+
+## 其它文档
+
+- [PP-TinyPose 模型介绍](..)
+- [PP-TinyPose C++部署](../cpp)
+- [模型预测结果说明](../../../../../docs/api/vision_results/)
+- [如何切换模型推理后端引擎](../../../../../docs/cn/faq/how_to_change_backend.md)
diff --git a/examples/vision/keypointdetection/tiny_pose/rknpu2/python/pptinypose_infer.py b/examples/vision/keypointdetection/tiny_pose/rknpu2/python/pptinypose_infer.py
new file mode 100755
index 000000000..a9e75dfa7
--- /dev/null
+++ b/examples/vision/keypointdetection/tiny_pose/rknpu2/python/pptinypose_infer.py
@@ -0,0 +1,50 @@
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+    import argparse
+    import ast
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--tinypose_model_dir",
+        required=True,
+        help="path of paddletinypose model directory")
+    parser.add_argument(
+        "--image", required=True, help="path of test image file.")
+    return parser.parse_args()
+
+
+def build_tinypose_option(args):
+    option = fd.RuntimeOption()
+    option.use_rknpu()
+    return option
+
+
+args = parse_arguments()
+
+tinypose_model_file = os.path.join(args.tinypose_model_dir, "PP_TinyPose_256x192_infer_rk3588_unquantized.rknn")
+tinypose_params_file = os.path.join(args.tinypose_model_dir, "")
+tinypose_config_file = os.path.join(args.tinypose_model_dir, "infer_cfg.yml")
+# 配置runtime，加载模型
+runtime_option = build_tinypose_option(args)
+tinypose_model = fd.vision.keypointdetection.PPTinyPose(
+    tinypose_model_file,
+    tinypose_params_file,
+    tinypose_config_file,
+    runtime_option=runtime_option,
+    model_format=fd.ModelFormat.RKNN)
+tinypose_model.disable_normalize()
+tinypose_model.disable_permute()
+
+# 预测图片检测结果
+im = cv2.imread(args.image)
+tinypose_result = tinypose_model.predict(im)
+print("Paddle TinyPose Result:\n", tinypose_result)
+
+# 预测结果可视化
+vis_im = fd.vision.vis_keypoint_detection(
+    im, tinypose_result, conf_threshold=0.5)
+cv2.imwrite("visualized_result.jpg", vis_im)
+print("TinyPose visualized result save in ./visualized_result.jpg")
diff --git a/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc b/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc
index f4b3bfc6c..1d1ad5c3f 100644
--- a/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc
+++ b/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc
@@ -139,18 +139,12 @@ bool PPTinyPose::Postprocess(std::vector<FDTensor>& infer_result,
            "Only support batch = 1 in FastDeploy now.");
   result->Clear();
 
-  std::cout << "Postprocess" << std::endl;
-  std::cout << "infer_result.size() is " << infer_result.size() << std::endl;
   if (infer_result.size() == 1) {
     FDTensor result_copy = infer_result[0];
-    std::cout << "Reshape result_copy!" << std::endl;
     result_copy.Reshape({result_copy.shape[0], result_copy.shape[1],
                          result_copy.shape[2] * result_copy.shape[3]});
-    std::cout << "Resize infer_result!" << std::endl;
     infer_result.resize(2);
-    std::cout << "Do ArgMax!" << std::endl;
     function::ArgMax(result_copy,&infer_result[1],-1);
-    std::cout << "Done!" << std::endl;
   }
 
   // Calculate output length

From 8faca05280e2f13ada67f81a354c8068a46e11ef Mon Sep 17 00:00:00 2001
From: Zheng-Bicheng <zheng_bicheng@outlook.com>
Date: Wed, 15 Feb 2023 21:03:15 +0800
Subject: [PATCH 33/41] =?UTF-8?q?=E6=9B=B4=E6=96=B0pybind?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../vision/keypointdet/pptinypose/pptinypose_pybind.cc    | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fastdeploy/vision/keypointdet/pptinypose/pptinypose_pybind.cc b/fastdeploy/vision/keypointdet/pptinypose/pptinypose_pybind.cc
index 7fc6a2aab..8f0634c85 100644
--- a/fastdeploy/vision/keypointdet/pptinypose/pptinypose_pybind.cc
+++ b/fastdeploy/vision/keypointdet/pptinypose/pptinypose_pybind.cc
@@ -36,6 +36,14 @@ void BindPPTinyPose(pybind11::module& m) {
             self.Predict(&mat, &res, detection_result);
             return res;
           })
+      .def("disable_normalize",
+           [](vision::keypointdetection::PPTinyPose& self) {
+             self.DisableNormalize();
+           })
+      .def("disable_permute",
+           [](vision::keypointdetection::PPTinyPose& self) {
+             self.DisablePermute();
+           })
       .def_readwrite("use_dark",
                      &vision::keypointdetection::PPTinyPose::use_dark);
 }

From 2b7c23683a91932d8a20737cec2e2f5a337e073b Mon Sep 17 00:00:00 2001
From: Zheng-Bicheng <zheng_bicheng@outlook.com>
Date: Thu, 16 Feb 2023 10:38:10 +0800
Subject: [PATCH 34/41] =?UTF-8?q?=E6=8C=89=E7=85=A7=E8=A6=81=E6=B1=82?=
 =?UTF-8?q?=E6=9B=B4=E6=96=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../vision/keypointdet/pptinypose/pptinypose.cc      |  2 +-
 .../vision/keypointdetection/pptinypose/__init__.py  | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc b/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc
index 1d1ad5c3f..c7360c9f3 100644
--- a/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc
+++ b/fastdeploy/vision/keypointdet/pptinypose/pptinypose.cc
@@ -144,7 +144,7 @@ bool PPTinyPose::Postprocess(std::vector<FDTensor>& infer_result,
     result_copy.Reshape({result_copy.shape[0], result_copy.shape[1],
                          result_copy.shape[2] * result_copy.shape[3]});
     infer_result.resize(2);
-    function::ArgMax(result_copy,&infer_result[1],-1);
+    function::ArgMax(result_copy, &infer_result[1], -1);
   }
 
   // Calculate output length
diff --git a/python/fastdeploy/vision/keypointdetection/pptinypose/__init__.py b/python/fastdeploy/vision/keypointdetection/pptinypose/__init__.py
index 6dfe58d98..679605a7c 100644
--- a/python/fastdeploy/vision/keypointdetection/pptinypose/__init__.py
+++ b/python/fastdeploy/vision/keypointdetection/pptinypose/__init__.py
@@ -71,3 +71,15 @@ class PPTinyPose(FastDeployModel):
         assert isinstance(
             value, bool), "The value to set `use_dark` must be type of bool."
         self._model.use_dark = value
+
+    def disable_normalize(self):
+        """
+        This function will disable normalize in preprocessing step.
+        """
+        self.disable_normalize()
+
+    def disable_permute(self):
+        """
+        This function will disable hwc2chw in preprocessing step.
+        """
+        self.disable_permute()

From ee85a3cade270c6e5c629df8f188a51f4b95d264 Mon Sep 17 00:00:00 2001
From: DefTruth <31974251+DefTruth@users.noreply.github.com>
Date: Thu, 16 Feb 2023 17:16:14 +0800
Subject: [PATCH 35/41] [Benchmark] Add precision evaluation api from benchmark
 (#1310)

* [Benchmark] Init benchmark precision api

* [Benchmark] Init benchmark precision api

* [Benchmark] Add benchmark precision api

* [Benchmark] Calculate the statis of diff

* [Benchmark] Calculate the statis of diff

* [Benchmark] Calculate the statis of diff

* [Benchmark] Calculate the statis of diff

* [Benchmark] Calculate the statis of diff

* [Benchmark] Add SplitDataLine utils

* [Benchmark] Add LexSortByXY func

* [Benchmark] Add LexSortByXY func

* [Benchmark] Add LexSortDetectionResultByXY func

* [Benchmark] Add LexSortDetectionResultByXY func

* [Benchmark] Add tensor diff presicion test

* [Benchmark] fixed conflicts

* [Benchmark] fixed calc tensor diff

* fixed build bugs

* fixed ci bugs when WITH_TESTING=ON
---
 benchmark/cpp/CMakeLists.txt                  |   5 +-
 benchmark/cpp/benchmark_ppyolov8.cc           |   0
 benchmark/cpp/benchmark_precision_ppyolov8.cc |  87 +++++
 benchmark/cpp/benchmark_yolov5.cc             |   0
 cmake/summary.cmake                           |   1 +
 fastdeploy/benchmark/utils.cc                 | 334 +++++++++++++++++-
 fastdeploy/benchmark/utils.h                  |  49 +++
 fastdeploy/core/fd_tensor.cc                  |  19 -
 fastdeploy/utils/utils.h                      |  20 ++
 .../vision/detection/ppdet/postprocessor.cc   |   1 +
 fastdeploy/vision/utils/sort_det_res.cc       |  64 +++-
 fastdeploy/vision/utils/utils.h               |  11 +-
 .../build_android_cpp_with_benchmark.sh       |   3 +-
 tests/CMakeLists.txt                          |  10 +
 14 files changed, 575 insertions(+), 29 deletions(-)
 mode change 100755 => 100644 benchmark/cpp/CMakeLists.txt
 mode change 100755 => 100644 benchmark/cpp/benchmark_ppyolov8.cc
 create mode 100644 benchmark/cpp/benchmark_precision_ppyolov8.cc
 mode change 100755 => 100644 benchmark/cpp/benchmark_yolov5.cc
 mode change 100755 => 100644 fastdeploy/benchmark/utils.cc

diff --git a/benchmark/cpp/CMakeLists.txt b/benchmark/cpp/CMakeLists.txt
old mode 100755
new mode 100644
index 23e1ccf79..0fa7029db
--- a/benchmark/cpp/CMakeLists.txt
+++ b/benchmark/cpp/CMakeLists.txt
@@ -11,13 +11,16 @@ include_directories(${FASTDEPLOY_INCS})
 add_executable(benchmark_yolov5 ${PROJECT_SOURCE_DIR}/benchmark_yolov5.cc)
 add_executable(benchmark_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_ppyolov8.cc)
 add_executable(benchmark_ppcls ${PROJECT_SOURCE_DIR}/benchmark_ppcls.cc)
+add_executable(benchmark_precision_ppyolov8 ${PROJECT_SOURCE_DIR}/benchmark_precision_ppyolov8.cc)
 
 if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
   target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags pthread)
   target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags pthread)
   target_link_libraries(benchmark_ppcls ${FASTDEPLOY_LIBS} gflags pthread)
+  target_link_libraries(benchmark_precision_ppyolov8 ${FASTDEPLOY_LIBS} gflags pthread)
 else()
   target_link_libraries(benchmark_yolov5 ${FASTDEPLOY_LIBS} gflags)
   target_link_libraries(benchmark_ppyolov8 ${FASTDEPLOY_LIBS} gflags)
-  target_link_libraries(benchmark_ppcls ${FASTDEPLOY_LIBS} gflags pthread)
+  target_link_libraries(benchmark_ppcls ${FASTDEPLOY_LIBS} gflags)
+  target_link_libraries(benchmark_precision_ppyolov8 ${FASTDEPLOY_LIBS} gflags)
 endif()
diff --git a/benchmark/cpp/benchmark_ppyolov8.cc b/benchmark/cpp/benchmark_ppyolov8.cc
old mode 100755
new mode 100644
diff --git a/benchmark/cpp/benchmark_precision_ppyolov8.cc b/benchmark/cpp/benchmark_precision_ppyolov8.cc
new file mode 100644
index 000000000..caea3be19
--- /dev/null
+++ b/benchmark/cpp/benchmark_precision_ppyolov8.cc
@@ -0,0 +1,87 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "flags.h"
+#include "macros.h"
+#include "option.h"
+
+namespace vision = fastdeploy::vision;
+namespace benchmark = fastdeploy::benchmark;
+
+int main(int argc, char* argv[]) {
+#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
+  // Initialization
+  auto option = fastdeploy::RuntimeOption();
+  if (!CreateRuntimeOption(&option, argc, argv, true)) {
+    return -1;
+  }
+  auto im = cv::imread(FLAGS_image);
+  auto model_file = FLAGS_model + sep + "model.pdmodel";
+  auto params_file = FLAGS_model + sep + "model.pdiparams";
+  auto config_file = FLAGS_model + sep + "infer_cfg.yml";
+  auto model_ppyolov8 = vision::detection::PaddleYOLOv8(model_file, params_file,
+                                                        config_file, option);
+  vision::DetectionResult res;
+  // Run once at least
+  model_ppyolov8.Predict(im, &res);
+  // 1. Test result diff
+  std::cout << "=============== Test result diff =================\n";
+  // Save result to -> disk.
+  std::string det_result_path = "ppyolov8_result.txt";
+  benchmark::ResultManager::SaveDetectionResult(res, det_result_path);
+  // Load result from <- disk.
+  vision::DetectionResult res_loaded;
+  benchmark::ResultManager::LoadDetectionResult(&res_loaded, det_result_path);
+  // Calculate diff between two results.
+  auto det_diff =
+      benchmark::ResultManager::CalculateDiffStatis(&res, &res_loaded);
+  std::cout << "diff: mean=" << det_diff.mean << ",max=" << det_diff.max
+            << ",min=" << det_diff.min << std::endl;
+  // 2. Test tensor diff
+  std::cout << "=============== Test tensor diff =================\n";
+  std::vector<vision::DetectionResult> bacth_res;
+  std::vector<fastdeploy::FDTensor> input_tensors, output_tensors;
+  std::vector<cv::Mat> imgs;
+  imgs.push_back(im);
+  std::vector<vision::FDMat> fd_images = vision::WrapMat(imgs);
+
+  model_ppyolov8.GetPreprocessor().Run(&fd_images, &input_tensors);
+  input_tensors[0].name = "image";
+  input_tensors[1].name = "scale_factor";
+  input_tensors[2].name = "im_shape";
+  input_tensors.pop_back();
+  model_ppyolov8.Infer(input_tensors, &output_tensors);
+  model_ppyolov8.GetPostprocessor().Run(output_tensors, &bacth_res);
+  // Save tensor to -> disk.
+  auto& tensor_dump = output_tensors[0];
+  std::string det_tensor_path = "ppyolov8_tensor.txt";
+  benchmark::ResultManager::SaveFDTensor(tensor_dump, det_tensor_path);
+  // Load tensor from <- disk.
+  fastdeploy::FDTensor tensor_loaded;
+  benchmark::ResultManager::LoadFDTensor(&tensor_loaded, det_tensor_path);
+  // Calculate diff between two tensors.
+  auto det_tensor_diff = benchmark::ResultManager::CalculateDiffStatis(
+      &tensor_dump, &tensor_loaded);
+  std::cout << "diff: mean=" << det_tensor_diff.mean
+            << ",max=" << det_tensor_diff.max << ",min=" << det_tensor_diff.min
+            << std::endl;
+  // 3. Run profiling
+  BENCHMARK_MODEL(model_ppyolov8, model_ppyolov8.Predict(im, &res))
+  auto vis_im = vision::VisDetection(im, res);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+#endif
+
+  return 0;
+}
\ No newline at end of file
diff --git a/benchmark/cpp/benchmark_yolov5.cc b/benchmark/cpp/benchmark_yolov5.cc
old mode 100755
new mode 100644
diff --git a/cmake/summary.cmake b/cmake/summary.cmake
index 1482539c1..6bda8fdcb 100755
--- a/cmake/summary.cmake
+++ b/cmake/summary.cmake
@@ -41,6 +41,7 @@ function(fastdeploy_summary)
   message(STATUS "  ENABLE_OPENVINO_BACKEND   : ${ENABLE_OPENVINO_BACKEND}")
   message(STATUS "  ENABLE_BENCHMARK          : ${ENABLE_BENCHMARK}")
   message(STATUS "  WITH_GPU                  : ${WITH_GPU}")
+  message(STATUS "  WITH_TESTING              : ${WITH_TESTING}")
   message(STATUS "  WITH_ASCEND               : ${WITH_ASCEND}")
   message(STATUS "  WITH_TIMVX                : ${WITH_TIMVX}")
   message(STATUS "  WITH_KUNLUNXIN            : ${WITH_KUNLUNXIN}")
diff --git a/fastdeploy/benchmark/utils.cc b/fastdeploy/benchmark/utils.cc
old mode 100755
new mode 100644
index 8857f10c4..825cb5977
--- a/fastdeploy/benchmark/utils.cc
+++ b/fastdeploy/benchmark/utils.cc
@@ -19,10 +19,15 @@
 #include <cmath>
 
 #include "fastdeploy/benchmark/utils.h"
+#include "fastdeploy/utils/path.h"
+#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
+#include "fastdeploy/vision/utils/utils.h"
+#endif
 
 namespace fastdeploy {
 namespace benchmark {
 
+#if defined(ENABLE_BENCHMARK)
 std::string Strip(const std::string& str, char ch) {
   int i = 0;
   while (str[i] == ch) {
@@ -35,8 +40,7 @@ std::string Strip(const std::string& str, char ch) {
   return str.substr(i, j + 1 - i);
 }
 
-void Split(const std::string& s, std::vector<std::string>& tokens,
-           char delim) {
+void Split(const std::string& s, std::vector<std::string>& tokens, char delim) {
   tokens.clear();
   size_t lastPos = s.find_first_not_of(delim, 0);
   size_t pos = s.find(delim, lastPos);
@@ -146,6 +150,332 @@ std::string ResourceUsageMonitor::GetCurrentGpuMemoryInfo(int device_id) {
 #endif
   return result;
 }
+#endif  // ENABLE_BENCHMARK
+
+/// Utils for precision evaluation
+#if defined(ENABLE_BENCHMARK)
+static const char KEY_VALUE_SEP = '#';
+static const char VALUE_SEP = ',';
+
+std::vector<std::string> ReadLines(const std::string& path) {
+  std::ifstream fin(path);
+  std::vector<std::string> lines;
+  std::string line;
+  if (fin.is_open()) {
+    while (getline(fin, line)) {
+      lines.push_back(line);
+    }
+  } else {
+    FDERROR << "Failed to open file " << path << std::endl;
+    std::abort();
+  }
+  fin.close();
+  return lines;
+}
+
+std::map<std::string, std::vector<std::string>> SplitDataLine(
+    const std::string& data_line) {
+  std::map<std::string, std::vector<std::string>> dict;
+  std::vector<std::string> tokens, value_tokens;
+  Split(data_line, tokens, KEY_VALUE_SEP);
+  std::string key = tokens[0];
+  std::string value = tokens[1];
+  Split(value, value_tokens, VALUE_SEP);
+  dict[key] = value_tokens;
+  return dict;
+}
+
+bool ResultManager::SaveFDTensor(const FDTensor& tensor,
+                                 const std::string& path) {
+  if (tensor.CpuData() == nullptr || tensor.Numel() <= 0) {
+    FDERROR << "Input tensor is empty!" << std::endl;
+    return false;
+  }
+  std::ofstream fs(path, std::ios::out);
+  if (!fs.is_open()) {
+    FDERROR << "Fail to open file:" << path << std::endl;
+    return false;
+  }
+  fs.precision(20);
+  if (tensor.Dtype() != FDDataType::FP32 &&
+      tensor.Dtype() != FDDataType::INT32 &&
+      tensor.Dtype() != FDDataType::INT64) {
+    FDERROR << "Only support FP32/INT32/INT64 now, but got "
+            << Str(tensor.dtype) << std::endl;
+    return false;
+  }
+  // name
+  fs << "name" << KEY_VALUE_SEP << tensor.name << "\n";
+  // shape
+  fs << "shape" << KEY_VALUE_SEP;
+  for (int i = 0; i < tensor.shape.size(); ++i) {
+    if (i < tensor.shape.size() - 1) {
+      fs << tensor.shape[i] << VALUE_SEP;
+    } else {
+      fs << tensor.shape[i];
+    }
+  }
+  fs << "\n";
+  // dtype
+  fs << "dtype" << KEY_VALUE_SEP << Str(tensor.dtype) << "\n";
+  // data
+  fs << "data" << KEY_VALUE_SEP;
+  const void* data_ptr = tensor.CpuData();
+  for (int i = 0; i < tensor.Numel(); ++i) {
+    if (tensor.Dtype() == FDDataType::INT64) {
+      if (i < tensor.Numel() - 1) {
+        fs << (static_cast<const int64_t*>(data_ptr))[i] << VALUE_SEP;
+      } else {
+        fs << (static_cast<const int64_t*>(data_ptr))[i];
+      }
+    } else if (tensor.Dtype() == FDDataType::INT32) {
+      if (i < tensor.Numel() - 1) {
+        fs << (static_cast<const int32_t*>(data_ptr))[i] << VALUE_SEP;
+      } else {
+        fs << (static_cast<const int32_t*>(data_ptr))[i];
+      }
+    } else {  // FP32
+      if (i < tensor.Numel() - 1) {
+        fs << (static_cast<const float*>(data_ptr))[i] << VALUE_SEP;
+      } else {
+        fs << (static_cast<const float*>(data_ptr))[i];
+      }
+    }
+  }
+  fs << "\n";
+  fs.close();
+  return true;
+}
+
+bool ResultManager::LoadFDTensor(FDTensor* tensor, const std::string& path) {
+  if (!CheckFileExists(path)) {
+    FDERROR << "Can't found file from" << path << std::endl;
+    return false;
+  }
+  auto lines = ReadLines(path);
+  std::map<std::string, std::vector<std::string>> data;
+  // name
+  data = SplitDataLine(lines[0]);
+  tensor->name = data.begin()->first;
+  // shape
+  data = SplitDataLine(lines[1]);
+  tensor->shape.clear();
+  for (const auto& s : data.begin()->second) {
+    tensor->shape.push_back(std::stol(s));
+  }
+  // dtype
+  data = SplitDataLine(lines[2]);
+  if (data.begin()->second.at(0) == Str(FDDataType::INT64)) {
+    tensor->dtype = FDDataType::INT64;
+  } else if (data.begin()->second.at(0) == Str(FDDataType::INT32)) {
+    tensor->dtype = FDDataType::INT32;
+  } else if (data.begin()->second.at(0) == Str(FDDataType::FP32)) {
+    tensor->dtype = FDDataType::FP32;
+  } else {
+    FDERROR << "Only support FP32/INT64/INT32 now, but got "
+            << data.begin()->second.at(0) << std::endl;
+    return false;
+  }
+  // data
+  data = SplitDataLine(lines[3]);
+  tensor->Allocate(tensor->shape, tensor->dtype, tensor->name);
+  if (tensor->dtype == FDDataType::INT64) {
+    int64_t* mutable_data_ptr = static_cast<int64_t*>(tensor->MutableData());
+    for (int i = 0; i < data.begin()->second.size(); ++i) {
+      mutable_data_ptr[i] = std::stol(data.begin()->second[i]);
+    }
+  } else if (tensor->dtype == FDDataType::INT32) {
+    int32_t* mutable_data_ptr = static_cast<int32_t*>(tensor->MutableData());
+    for (int i = 0; i < data.begin()->second.size(); ++i) {
+      mutable_data_ptr[i] = std::stoi(data.begin()->second[i]);
+    }
+  } else {  // FP32
+    float* mutable_data_ptr = static_cast<float*>(tensor->MutableData());
+    for (int i = 0; i < data.begin()->second.size(); ++i) {
+      mutable_data_ptr[i] = std::stof(data.begin()->second[i]);
+    }
+  }
+  return true;
+}
+
+TensorDiff ResultManager::CalculateDiffStatis(FDTensor* lhs, FDTensor* rhs) {
+  if (lhs->Numel() != rhs->Numel() || lhs->Dtype() != rhs->Dtype()) {
+    FDASSERT(false,
+             "The size and dtype of input FDTensor must be equal!"
+             " But got size %d, %d, dtype %s, %s",
+             lhs->Numel(), rhs->Numel(), Str(lhs->Dtype()).c_str(),
+             Str(rhs->Dtype()).c_str())
+  }
+  FDDataType dtype = lhs->Dtype();
+  int numel = lhs->Numel();
+  if (dtype != FDDataType::FP32 && dtype != FDDataType::INT64 &&
+      dtype != FDDataType::INT32) {
+    FDASSERT(false, "Only support FP32/INT64/INT32 now, but got %s",
+             Str(dtype).c_str())
+  }
+  if (dtype == FDDataType::INT64) {
+    std::vector<int64_t> tensor_diff(numel);
+    const int64_t* lhs_data_ptr = static_cast<const int64_t*>(lhs->CpuData());
+    const int64_t* rhs_data_ptr = static_cast<const int64_t*>(rhs->CpuData());
+    for (int i = 0; i < numel; ++i) {
+      tensor_diff[i] = lhs_data_ptr[i] - rhs_data_ptr[i];
+    }
+    TensorDiff diff;
+    CalculateStatisInfo<int64_t>(tensor_diff.data(), numel, &(diff.mean),
+                                 &(diff.max), &(diff.min));
+    return diff;
+  } else if (dtype == FDDataType::INT32) {
+    std::vector<int32_t> tensor_diff(numel);
+    const int32_t* lhs_data_ptr = static_cast<const int32_t*>(lhs->CpuData());
+    const int32_t* rhs_data_ptr = static_cast<const int32_t*>(rhs->CpuData());
+    for (int i = 0; i < numel; ++i) {
+      tensor_diff[i] = lhs_data_ptr[i] - rhs_data_ptr[i];
+    }
+    TensorDiff diff;
+    CalculateStatisInfo<float>(tensor_diff.data(), numel, &(diff.mean),
+                               &(diff.max), &(diff.min));
+    return diff;
+  } else {  // FP32
+    std::vector<float> tensor_diff(numel);
+    const float* lhs_data_ptr = static_cast<const float*>(lhs->CpuData());
+    const float* rhs_data_ptr = static_cast<const float*>(rhs->CpuData());
+    for (int i = 0; i < numel; ++i) {
+      tensor_diff[i] = lhs_data_ptr[i] - rhs_data_ptr[i];
+    }
+    TensorDiff diff;
+    CalculateStatisInfo<float>(tensor_diff.data(), numel, &(diff.mean),
+                               &(diff.max), &(diff.min));
+    return diff;
+  }
+}
+
+#if defined(ENABLE_VISION)
+bool ResultManager::SaveDetectionResult(const vision::DetectionResult& res,
+                                        const std::string& path) {
+  if (res.boxes.empty()) {
+    FDERROR << "DetectionResult can not be empty!" << std::endl;
+    return false;
+  }
+  std::ofstream fs(path, std::ios::out);
+  if (!fs.is_open()) {
+    FDERROR << "Fail to open file:" << path << std::endl;
+    return false;
+  }
+  fs.precision(20);
+  // boxes
+  fs << "boxes" << KEY_VALUE_SEP;
+  for (int i = 0; i < res.boxes.size(); ++i) {
+    for (int j = 0; j < 4; ++j) {
+      if ((i == res.boxes.size() - 1) && (j == 3)) {
+        fs << res.boxes[i][j];
+      } else {
+        fs << res.boxes[i][j] << VALUE_SEP;
+      }
+    }
+  }
+  fs << "\n";
+  // scores
+  fs << "scores" << KEY_VALUE_SEP;
+  for (int i = 0; i < res.scores.size(); ++i) {
+    if (i < res.scores.size() - 1) {
+      fs << res.scores[i] << VALUE_SEP;
+    } else {
+      fs << res.scores[i];
+    }
+  }
+  fs << "\n";
+  // label_ids
+  fs << "label_ids" << KEY_VALUE_SEP;
+  for (int i = 0; i < res.label_ids.size(); ++i) {
+    if (i < res.label_ids.size() - 1) {
+      fs << res.label_ids[i] << VALUE_SEP;
+    } else {
+      fs << res.label_ids[i];
+    }
+  }
+  fs << "\n";
+  // TODO(qiuyanjun): dump masks
+  fs.close();
+  return true;
+}
+
+bool ResultManager::LoadDetectionResult(vision::DetectionResult* res,
+                                        const std::string& path) {
+  if (!CheckFileExists(path)) {
+    FDERROR << "Can't found file from" << path << std::endl;
+    return false;
+  }
+  auto lines = ReadLines(path);
+  std::map<std::string, std::vector<std::string>> data;
+
+  // boxes
+  data = SplitDataLine(lines[0]);
+  int boxes_num = data.begin()->second.size() / 4;
+  res->Resize(boxes_num);
+  for (int i = 0; i < boxes_num; ++i) {
+    res->boxes[i][0] = std::stof(data.begin()->second[i * 4 + 0]);
+    res->boxes[i][1] = std::stof(data.begin()->second[i * 4 + 1]);
+    res->boxes[i][2] = std::stof(data.begin()->second[i * 4 + 2]);
+    res->boxes[i][3] = std::stof(data.begin()->second[i * 4 + 3]);
+  }
+  // scores
+  data = SplitDataLine(lines[1]);
+  for (int i = 0; i < data.begin()->second.size(); ++i) {
+    res->scores[i] = std::stof(data.begin()->second[i]);
+  }
+  // label_ids
+  data = SplitDataLine(lines[2]);
+  for (int i = 0; i < data.begin()->second.size(); ++i) {
+    res->label_ids[i] = std::stoi(data.begin()->second[i]);
+  }
+  // TODO(qiuyanjun): load masks
+  return true;
+}
+
+DetectionDiff ResultManager::CalculateDiffStatis(vision::DetectionResult* lhs,
+                                                 vision::DetectionResult* rhs,
+                                                 float score_threshold) {
+  // lex sort by x(w) & y(h)
+  vision::utils::LexSortDetectionResultByXY(lhs);
+  vision::utils::LexSortDetectionResultByXY(rhs);
+  // get value diff & trunc it by score_threshold
+  const int boxes_num = std::min(lhs->boxes.size(), rhs->boxes.size());
+  std::vector<float> boxes_diff;
+  std::vector<float> scores_diff;
+  std::vector<int32_t> labels_diff;
+  // TODO(qiuyanjun): process the diff of masks.
+  for (int i = 0; i < boxes_num; ++i) {
+    if (lhs->scores[i] > score_threshold && rhs->scores[i] > score_threshold) {
+      scores_diff.push_back(lhs->scores[i] - rhs->scores[i]);
+      labels_diff.push_back(lhs->label_ids[i] - rhs->label_ids[i]);
+      boxes_diff.push_back(lhs->boxes[i][0] - rhs->boxes[i][0]);
+      boxes_diff.push_back(lhs->boxes[i][1] - rhs->boxes[i][1]);
+      boxes_diff.push_back(lhs->boxes[i][2] - rhs->boxes[i][2]);
+      boxes_diff.push_back(lhs->boxes[i][3] - rhs->boxes[i][3]);
+    }
+  }
+  FDASSERT(boxes_diff.size() > 0,
+           "Can't get any valid boxes while score_threshold is %f, "
+           "The boxes.size of lhs is %d, the boxes.size of rhs is %d",
+           score_threshold, lhs->boxes.size(), rhs->boxes.size())
+
+  DetectionDiff diff;
+  CalculateStatisInfo<float>(boxes_diff.data(), boxes_diff.size(),
+                             &(diff.boxes.mean), &(diff.boxes.max),
+                             &(diff.boxes.min));
+  CalculateStatisInfo<float>(scores_diff.data(), scores_diff.size(),
+                             &(diff.scores.mean), &(diff.scores.max),
+                             &(diff.scores.min));
+  CalculateStatisInfo<int32_t>(labels_diff.data(), labels_diff.size(),
+                               &(diff.labels.mean), &(diff.labels.max),
+                               &(diff.labels.min));
+  diff.mean = diff.boxes.mean;
+  diff.max = diff.boxes.max;
+  diff.min = diff.boxes.min;
+  return diff;
+}
+#endif  // ENABLE_VISION
+#endif  // ENABLE_BENCHMARK
 
 }  // namespace benchmark
 }  // namespace fastdeploy
diff --git a/fastdeploy/benchmark/utils.h b/fastdeploy/benchmark/utils.h
index 4037cd09c..fc7835745 100755
--- a/fastdeploy/benchmark/utils.h
+++ b/fastdeploy/benchmark/utils.h
@@ -16,9 +16,15 @@
 #include <memory>
 #include <thread>  // NOLINT
 #include "fastdeploy/utils/utils.h"
+#include "fastdeploy/core/fd_tensor.h"
+#if defined(ENABLE_BENCHMARK) && defined(ENABLE_VISION)
+#include "fastdeploy/vision/common/result.h"
+#endif
 
 namespace fastdeploy {
 namespace benchmark {
+
+#if defined(ENABLE_BENCHMARK)
 /*! @brief ResourceUsageMonitor object used when to collect memory info.
  */
 class FASTDEPLOY_DECL ResourceUsageMonitor {
@@ -86,5 +92,48 @@ FASTDEPLOY_DECL void Split(const std::string& s,
                            std::vector<std::string>& tokens,
                            char delim = ' ');
 
+/// Diff values for precision evaluation
+struct FASTDEPLOY_DECL BaseDiff {};
+
+struct FASTDEPLOY_DECL EvalStatis {
+  double mean = -1.0;
+  double min = -1.0;
+  double max = -1.0;
+};
+
+struct FASTDEPLOY_DECL TensorDiff: public BaseDiff, public EvalStatis {};
+
+#if defined(ENABLE_VISION)
+struct FASTDEPLOY_DECL DetectionDiff: public BaseDiff, public EvalStatis {
+  EvalStatis boxes;
+  EvalStatis scores;
+  EvalStatis labels;
+};
+#endif  // ENABLE_VISION
+#endif  // ENABLE_BENCHMARK
+
+/// Utils for precision evaluation
+struct FASTDEPLOY_DECL ResultManager {
+#if defined(ENABLE_BENCHMARK)
+  /// Save & Load functions for FDTensor result.
+  static bool SaveFDTensor(const FDTensor& tensor, const std::string& path);
+  static bool LoadFDTensor(FDTensor* tensor, const std::string& path);
+  /// Calculate diff value between two FDTensor results.
+  static TensorDiff CalculateDiffStatis(FDTensor* lhs,
+                                        FDTensor* rhs);
+#if defined(ENABLE_VISION)
+  /// Save & Load functions for basic results.
+  static bool SaveDetectionResult(const vision::DetectionResult& res,
+                                  const std::string& path);
+  static bool LoadDetectionResult(vision::DetectionResult* res,
+                                  const std::string& path);
+  /// Calculate diff value between two basic results.
+  static DetectionDiff CalculateDiffStatis(vision::DetectionResult* lhs,
+                                           vision::DetectionResult* rhs,
+                                           float score_threshold = 0.3f);
+#endif  // ENABLE_VISION
+#endif  // ENABLE_BENCHMARK
+};
+
 }  // namespace benchmark
 }  // namespace fastdeploy
diff --git a/fastdeploy/core/fd_tensor.cc b/fastdeploy/core/fd_tensor.cc
index 8b111025d..c21caf3e9 100644
--- a/fastdeploy/core/fd_tensor.cc
+++ b/fastdeploy/core/fd_tensor.cc
@@ -211,25 +211,6 @@ bool FDTensor::Reshape(const std::vector<int64_t>& new_shape) {
   return true;
 }
 
-template <typename T>
-void CalculateStatisInfo(const void* src_ptr, int size, double* mean,
-                         double* max, double* min) {
-  const T* ptr = static_cast<const T*>(src_ptr);
-  *mean = 0;
-  *max = -99999999;
-  *min = 99999999;
-  for (int i = 0; i < size; ++i) {
-    if (*(ptr + i) > *max) {
-      *max = *(ptr + i);
-    }
-    if (*(ptr + i) < *min) {
-      *min = *(ptr + i);
-    }
-    *mean += *(ptr + i);
-  }
-  *mean = *mean / size;
-}
-
 void FDTensor::PrintInfo(const std::string& prefix) const {
   double mean = 0;
   double max = -99999999;
diff --git a/fastdeploy/utils/utils.h b/fastdeploy/utils/utils.h
index 5b0f03e68..d44b7f187 100644
--- a/fastdeploy/utils/utils.h
+++ b/fastdeploy/utils/utils.h
@@ -214,4 +214,24 @@ std::string Str(const std::vector<T>& shape) {
   return oss.str();
 }
 
+template <typename T>
+void CalculateStatisInfo(const void* src_ptr, int size, double* mean,
+                         double* max, double* min) {
+  const T* ptr = static_cast<const T*>(src_ptr);
+  *mean = static_cast<double>(0);
+  *max = static_cast<double>(-99999999);
+  *min = static_cast<double>(99999999);
+  for (int i = 0; i < size; ++i) {
+    if (*(ptr + i) > *max) {
+      *max = *(ptr + i);
+    }
+    if (*(ptr + i) < *min) {
+      *min = *(ptr + i);
+    }
+    *mean += *(ptr + i);
+  }
+  *mean = *mean / size;
+}
+
+
 }  // namespace fastdeploy
diff --git a/fastdeploy/vision/detection/ppdet/postprocessor.cc b/fastdeploy/vision/detection/ppdet/postprocessor.cc
index eccfede6a..914e1cebe 100644
--- a/fastdeploy/vision/detection/ppdet/postprocessor.cc
+++ b/fastdeploy/vision/detection/ppdet/postprocessor.cc
@@ -82,6 +82,7 @@ bool PaddleDetPostprocessor::Run(const std::vector<FDTensor>& tensors,
     const auto* data = static_cast<const int64_t*>(tensors[1].CpuData());
     for (size_t i = 0; i < tensors[1].shape[0]; ++i) {
       num_boxes[i] = static_cast<int>(data[i]);
+      total_num_boxes += num_boxes[i];
     }
   }
 
diff --git a/fastdeploy/vision/utils/sort_det_res.cc b/fastdeploy/vision/utils/sort_det_res.cc
index 93dbb6969..dd33478a3 100644
--- a/fastdeploy/vision/utils/sort_det_res.cc
+++ b/fastdeploy/vision/utils/sort_det_res.cc
@@ -28,6 +28,7 @@ void Merge(DetectionResult* result, size_t low, size_t mid, size_t high) {
   size_t i = low;
   size_t j = mid + 1;
   size_t k = i;
+  // TODO(qiuyanjun): add masks process
   for (; i <= mid && j <= high; k++) {
     if (temp_scores[i] >= temp_scores[j]) {
       scores[k] = temp_scores[i];
@@ -70,12 +71,73 @@ void SortDetectionResult(DetectionResult* result) {
   size_t low = 0;
   size_t high = result->scores.size();
   if (high == 0) {
-      return;
+    return;
   }
   high = high - 1;
   MergeSort(result, low, high);
 }
 
+bool LexSortByXYCompare(const std::array<float, 4>& box_a,
+                        const std::array<float, 4>& box_b) {
+  // WARN: The status shoule be false if (a==b).
+  // https://blog.csdn.net/xxxwrq/article/details/83080640
+  auto is_equal = [](const float& a, const float& b) -> bool {
+    return std::abs(a - b) < 1e-6f;
+  };
+  const float& x0_a = box_a[0];
+  const float& y0_a = box_a[1];
+  const float& x0_b = box_b[0];
+  const float& y0_b = box_b[1];
+  if (is_equal(x0_a, x0_b)) {
+    return is_equal(y0_a, y0_b) ? false : y0_a > y0_b;
+  }
+  return x0_a > x0_b;
+}
+
+void ReorderDetectionResultByIndices(DetectionResult* result,
+                                     const std::vector<size_t>& indices) {
+  // reorder boxes, scores, label_ids, masks
+  DetectionResult backup = (*result);  // move
+  const bool contain_masks = backup.contain_masks;
+  const int boxes_num = backup.boxes.size();
+  result->Clear();
+  result->Resize(boxes_num);
+  // boxes, scores, labels_ids
+  for (int i = 0; i < boxes_num; ++i) {
+    result->boxes[i] = backup.boxes[indices[i]];
+    result->scores[i] = backup.scores[indices[i]];
+    result->label_ids[i] = backup.label_ids[indices[i]];
+  }
+  if (contain_masks) {
+    result->contain_masks = true;
+    for (int i = 0; i < boxes_num; ++i) {
+      const auto& shape = backup.masks[indices[i]].shape;
+      const int mask_numel = shape[0] * shape[1];
+      result->masks[i].shape = shape;
+      result->masks[i].Resize(mask_numel);
+      std::memcpy(result->masks[i].Data(), backup.masks[indices[i]].Data(),
+                  mask_numel * sizeof(uint8_t));
+    }
+  }
+}
+
+void LexSortDetectionResultByXY(DetectionResult* result) {
+  if (result->boxes.size() == 0) {
+    return;
+  }
+  std::vector<size_t> indices;
+  indices.resize(result->boxes.size());
+  for (size_t i = 0; i < result->boxes.size(); ++i) {
+    indices[i] = i;
+  }
+  // lex sort by x(w) then y(h)
+  auto& boxes = result->boxes;
+  std::sort(indices.begin(), indices.end(), [&boxes](size_t a, size_t b) {
+    return LexSortByXYCompare(boxes[a], boxes[b]);
+  });
+  ReorderDetectionResultByIndices(result, indices);
+}
+
 }  // namespace utils
 }  // namespace vision
 }  // namespace fastdeploy
diff --git a/fastdeploy/vision/utils/utils.h b/fastdeploy/vision/utils/utils.h
index 9f5106c4a..bca781973 100644
--- a/fastdeploy/vision/utils/utils.h
+++ b/fastdeploy/vision/utils/utils.h
@@ -64,12 +64,13 @@ void NMS(DetectionResult* output, float iou_threshold = 0.5,
 
 void NMS(FaceDetectionResult* result, float iou_threshold = 0.5);
 
-// MergeSort
-void SortDetectionResult(DetectionResult* output);
+/// Sort DetectionResult/FaceDetectionResult by score
+FASTDEPLOY_DECL void SortDetectionResult(DetectionResult* result);
+FASTDEPLOY_DECL void SortDetectionResult(FaceDetectionResult* result);
+/// Lex Sort DetectionResult/FaceDetectionResult by x(w) & y(h) axis
+FASTDEPLOY_DECL void LexSortDetectionResultByXY(DetectionResult* result);
 
-void SortDetectionResult(FaceDetectionResult* result);
-
-// L2 Norm / cosine similarity  (for face recognition, ...)
+/// L2 Norm / cosine similarity  (for face recognition, ...)
 FASTDEPLOY_DECL std::vector<float>
 L2Normalize(const std::vector<float>& values);
 
diff --git a/scripts/android/build_android_cpp_with_benchmark.sh b/scripts/android/build_android_cpp_with_benchmark.sh
index 4a2c4084c..eb812aa64 100755
--- a/scripts/android/build_android_cpp_with_benchmark.sh
+++ b/scripts/android/build_android_cpp_with_benchmark.sh
@@ -92,11 +92,12 @@ __build_fastdeploy_android_shared() {
         -DENABLE_FLYCV=ON \
         -DENABLE_TEXT=OFF \
         -DENABLE_VISION=ON \
-        -DBUILD_EXAMPLES=ON \
+        -DBUILD_EXAMPLES=OFF \
         -DENABLE_BENCHMARK=ON \
         -DWITH_OPENCV_STATIC=OFF \
         -DWITH_LITE_STATIC=OFF \
         -DWITH_OPENMP=OFF \
+        -DWITH_TESTING=OFF \
         -DCMAKE_INSTALL_PREFIX=${FASDEPLOY_INSTALL_DIR} \
         -Wno-dev ../../.. && make -j8 && make install
 
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 96674ed7a..74b7e8e52 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -62,12 +62,22 @@ function(add_fastdeploy_unittest CC_FILE)
 endfunction()
 
 if(WITH_TESTING)
+  if(ANDROID OR IOS)
+    # gtest in FastDeploy is not support for cross compiling now.
+    message(FATAL_ERROR "Not support unittest for Android and IOS now.")
+  endif()
   include_directories(${CMAKE_CURRENT_SOURCE_DIR})
   add_library(fastdeploy_gtest_main STATIC gtest_main)
   target_link_libraries(fastdeploy_gtest_main PUBLIC gtest gflags)
   message(STATUS "")
   message(STATUS "*************FastDeploy Unittest Summary**********")
   file(GLOB_RECURSE ALL_TEST_SRCS ${PROJECT_SOURCE_DIR}/tests/*/test_*.cc)
+  if(NOT ENABLE_VISION)
+    # vision_preprocess and release_task need vision
+    file(GLOB_RECURSE VISION_TEST_SRCS ${PROJECT_SOURCE_DIR}/tests/vision_preprocess/test_*.cc)
+    file(GLOB_RECURSE RELEASE_TEST_SRCS ${PROJECT_SOURCE_DIR}/tests/release_task/test_*.cc)
+    list(REMOVE_ITEM ALL_TEST_SRCS ${VISION_TEST_SRCS} ${RELEASE_TEST_SRCS})
+  endif()
   foreach(_CC_FILE ${ALL_TEST_SRCS})
     add_fastdeploy_unittest(${_CC_FILE})
   endforeach()

From efa46563f37b3353fa66c2ec74867468a0a7f0ad Mon Sep 17 00:00:00 2001
From: Wang Xinyu <wangxinyu_es@163.com>
Date: Fri, 17 Feb 2023 10:27:05 +0800
Subject: [PATCH 36/41] [nvJPEG] Integrate nvJPEG decoder (#1288)

* nvjpeg cmake

* add common decoder, nvjpeg decoder and add image name predict api

* ppclas support nvjpeg decoder

* remove useless comments

* image decoder support opencv

* nvjpeg decode fallback to opencv

* fdtensor add nbytes_allocated

* single image decode api

* fix bug

* add pybind

* ignore nvjpeg on jetson

* fix cmake in

* predict on fdmat

* remove image names predict api, add image decoder tutorial

* Update __init__.py

* fix pybind
---
 CMakeLists.txt                                |   8 +-
 FastDeploy.cmake.in                           |  10 +-
 fastdeploy/core/fd_tensor.cc                  |  14 +-
 fastdeploy/core/fd_tensor.h                   |   5 +
 .../vision/classification/ppcls/model.cc      |  40 +-
 .../vision/classification/ppcls/model.h       |  17 +
 .../common/image_decoder/image_decoder.cc     | 112 ++++++
 .../common/image_decoder/image_decoder.h      |  49 +++
 .../common/image_decoder/nvjpeg_decoder.cc    | 363 ++++++++++++++++++
 .../common/image_decoder/nvjpeg_decoder.h     |  69 ++++
 .../vision/common/processors/manager.cc       |  10 +
 fastdeploy/vision/common/processors/manager.h |   6 +-
 fastdeploy/vision/common/processors/mat.cc    |  46 ++-
 fastdeploy/vision/common/processors/mat.h     |   5 +-
 .../vision/common/processors/mat_batch.cc     |   8 +-
 .../vision/common/processors/mat_batch.h      |   4 +-
 .../processors/normalize_and_permute.cu       |   2 +-
 tutorials/README.md                           |   6 +-
 tutorials/README_CN.md                        |   1 +
 tutorials/image_decoder/README.md             |  16 +
 tutorials/image_decoder/README_CN.md          |  16 +
 tutorials/image_decoder/cpp/CMakeLists.txt    |  11 +
 tutorials/image_decoder/cpp/README.md         |  22 ++
 tutorials/image_decoder/cpp/README_CN.md      |  22 ++
 tutorials/image_decoder/cpp/main.cc           |  57 +++
 25 files changed, 875 insertions(+), 44 deletions(-)
 mode change 100755 => 100644 fastdeploy/vision/classification/ppcls/model.cc
 create mode 100644 fastdeploy/vision/common/image_decoder/image_decoder.cc
 create mode 100644 fastdeploy/vision/common/image_decoder/image_decoder.h
 create mode 100644 fastdeploy/vision/common/image_decoder/nvjpeg_decoder.cc
 create mode 100644 fastdeploy/vision/common/image_decoder/nvjpeg_decoder.h
 create mode 100644 tutorials/image_decoder/README.md
 create mode 100644 tutorials/image_decoder/README_CN.md
 create mode 100644 tutorials/image_decoder/cpp/CMakeLists.txt
 create mode 100644 tutorials/image_decoder/cpp/README.md
 create mode 100644 tutorials/image_decoder/cpp/README_CN.md
 create mode 100644 tutorials/image_decoder/cpp/main.cc

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fa99c6ff7..6cee4ef72 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -300,10 +300,16 @@ if(WITH_GPU)
   include_directories(${CUDA_DIRECTORY}/include)
   if(WIN32)
     find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib/x64)
+    find_library(NVJPEG_LIB nvjpeg ${CUDA_DIRECTORY}/lib/x64)
+    add_definitions(-DENABLE_NVJPEG)
   else()
     find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib64)
+    if(NOT BUILD_ON_JETSON)
+      find_library(NVJPEG_LIB nvjpeg ${CUDA_DIRECTORY}/lib64)
+      add_definitions(-DENABLE_NVJPEG)
+    endif()
   endif()
-  list(APPEND DEPEND_LIBS ${CUDA_LIB})
+  list(APPEND DEPEND_LIBS ${CUDA_LIB} ${NVJPEG_LIB})
 
   # build CUDA source files in fastdeploy, CUDA source files include CUDA preprocessing, TRT plugins, etc.
   enable_language(CUDA)
diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in
index c79001c28..5c2c5b733 100644
--- a/FastDeploy.cmake.in
+++ b/FastDeploy.cmake.in
@@ -169,21 +169,25 @@ if(ENABLE_POROS_BACKEND)
 endif()
 
 if(WITH_GPU)
-  if (NOT CUDA_DIRECTORY)
+  if(NOT CUDA_DIRECTORY)
     set(CUDA_DIRECTORY "/usr/local/cuda")
   endif()
   if(WIN32)
     find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib/x64)
+    find_library(NVJPEG_LIB nvjpeg ${CUDA_DIRECTORY}/lib/x64)
   else()
     find_library(CUDA_LIB cudart ${CUDA_DIRECTORY}/lib64)
+    if(NOT BUILD_ON_JETSON)
+      find_library(NVJPEG_LIB nvjpeg ${CUDA_DIRECTORY}/lib64)
+    endif()
   endif()
   if(NOT CUDA_LIB)
     message(FATAL_ERROR "[FastDeploy] Cannot find library cudart in ${CUDA_DIRECTORY}, Please define CUDA_DIRECTORY, e.g -DCUDA_DIRECTORY=/path/to/cuda")
   endif()
-  list(APPEND FASTDEPLOY_LIBS ${CUDA_LIB})
+  list(APPEND FASTDEPLOY_LIBS ${CUDA_LIB} ${NVJPEG_LIB})
   list(APPEND FASTDEPLOY_INCS ${CUDA_DIRECTORY}/include)
 
-  if (ENABLE_TRT_BACKEND)
+  if(ENABLE_TRT_BACKEND)
     if(BUILD_ON_JETSON)
       find_library(TRT_INFER_LIB nvinfer /usr/lib/aarch64-linux-gnu/)
       find_library(TRT_ONNX_LIB nvonnxparser /usr/lib/aarch64-linux-gnu/)
diff --git a/fastdeploy/core/fd_tensor.cc b/fastdeploy/core/fd_tensor.cc
index c21caf3e9..8fd921043 100644
--- a/fastdeploy/core/fd_tensor.cc
+++ b/fastdeploy/core/fd_tensor.cc
@@ -245,12 +245,13 @@ void FDTensor::PrintInfo(const std::string& prefix) const {
 bool FDTensor::ReallocFn(size_t nbytes) {
   if (device == Device::GPU) {
 #ifdef WITH_GPU
-    size_t original_nbytes = Nbytes();
+    size_t original_nbytes = nbytes_allocated;
     if (nbytes > original_nbytes) {
       if (buffer_ != nullptr) {
         FDDeviceFree()(buffer_);
       }
       FDDeviceAllocator()(&buffer_, nbytes);
+      nbytes_allocated = nbytes;
     }
     return buffer_ != nullptr;
 #else
@@ -262,12 +263,13 @@ bool FDTensor::ReallocFn(size_t nbytes) {
   } else {
     if (is_pinned_memory) {
 #ifdef WITH_GPU
-      size_t original_nbytes = Nbytes();
+      size_t original_nbytes = nbytes_allocated;
       if (nbytes > original_nbytes) {
         if (buffer_ != nullptr) {
           FDDeviceHostFree()(buffer_);
         }
         FDDeviceHostAllocator()(&buffer_, nbytes);
+        nbytes_allocated = nbytes;
       }
       return buffer_ != nullptr;
 #else
@@ -278,6 +280,7 @@ bool FDTensor::ReallocFn(size_t nbytes) {
 #endif
     }
     buffer_ = realloc(buffer_, nbytes);
+    nbytes_allocated = nbytes;
     return buffer_ != nullptr;
   }
 }
@@ -299,6 +302,7 @@ void FDTensor::FreeFn() {
       }
     }
     buffer_ = nullptr;
+    nbytes_allocated = 0;
   }
 }
 
@@ -380,7 +384,7 @@ FDTensor::FDTensor(const FDTensor& other)
       device_id(other.device_id) {
   // Copy buffer
   if (other.buffer_ == nullptr) {
-    buffer_ = nullptr;
+    FreeFn();
   } else {
     size_t nbytes = Nbytes();
     FDASSERT(ReallocFn(nbytes),
@@ -396,7 +400,8 @@ FDTensor::FDTensor(FDTensor&& other)
       dtype(other.dtype),
       external_data_ptr(other.external_data_ptr),
       device(other.device),
-      device_id(other.device_id) {
+      device_id(other.device_id),
+      nbytes_allocated(other.nbytes_allocated) {
   other.name = "";
   // Note(zhoushunjie): Avoid double free.
   other.buffer_ = nullptr;
@@ -435,6 +440,7 @@ FDTensor& FDTensor::operator=(FDTensor&& other) {
     dtype = other.dtype;
     device = other.device;
     device_id = other.device_id;
+    nbytes_allocated = other.nbytes_allocated;
 
     other.name = "";
     // Note(zhoushunjie): Avoid double free.
diff --git a/fastdeploy/core/fd_tensor.h b/fastdeploy/core/fd_tensor.h
index 5584f1b30..95a603dd8 100644
--- a/fastdeploy/core/fd_tensor.h
+++ b/fastdeploy/core/fd_tensor.h
@@ -54,6 +54,11 @@ struct FASTDEPLOY_DECL FDTensor {
   // other devices' data
   std::vector<int8_t> temporary_cpu_buffer;
 
+  // The number of bytes allocated so far.
+  // When resizing GPU memory, we will free and realloc the memory only if the
+  // required size is larger than this value.
+  size_t nbytes_allocated = 0;
+
   // Get data buffer pointer
   void* MutableData();
 
diff --git a/fastdeploy/vision/classification/ppcls/model.cc b/fastdeploy/vision/classification/ppcls/model.cc
old mode 100755
new mode 100644
index 6868c9c62..d52eeace9
--- a/fastdeploy/vision/classification/ppcls/model.cc
+++ b/fastdeploy/vision/classification/ppcls/model.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "fastdeploy/vision/classification/ppcls/model.h"
+
 #include "fastdeploy/utils/unique_ptr.h"
 
 namespace fastdeploy {
@@ -23,7 +24,8 @@ PaddleClasModel::PaddleClasModel(const std::string& model_file,
                                  const std::string& params_file,
                                  const std::string& config_file,
                                  const RuntimeOption& custom_option,
-                                 const ModelFormat& model_format) : preprocessor_(config_file) {
+                                 const ModelFormat& model_format)
+    : preprocessor_(config_file) {
   if (model_format == ModelFormat::PADDLE) {
     valid_cpu_backends = {Backend::OPENVINO, Backend::PDINFER, Backend::ORT,
                           Backend::LITE};
@@ -32,15 +34,14 @@ PaddleClasModel::PaddleClasModel(const std::string& model_file,
     valid_ascend_backends = {Backend::LITE};
     valid_kunlunxin_backends = {Backend::LITE};
     valid_ipu_backends = {Backend::PDINFER};
-  }else if (model_format == ModelFormat::SOPHGO) {
+  } else if (model_format == ModelFormat::SOPHGO) {
     valid_sophgonpu_backends = {Backend::SOPHGOTPU};
-  } 
-  else {
+  } else {
     valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
     valid_gpu_backends = {Backend::ORT, Backend::TRT};
     valid_rknpu_backends = {Backend::RKNPU2};
   }
-  
+
   runtime_option = custom_option;
   runtime_option.model_format = model_format;
   runtime_option.model_file = model_file;
@@ -48,8 +49,9 @@ PaddleClasModel::PaddleClasModel(const std::string& model_file,
   initialized = Initialize();
 }
 
-std::unique_ptr<PaddleClasModel>  PaddleClasModel::Clone() const {
-  std::unique_ptr<PaddleClasModel> clone_model = utils::make_unique<PaddleClasModel>(PaddleClasModel(*this));
+std::unique_ptr<PaddleClasModel> PaddleClasModel::Clone() const {
+  std::unique_ptr<PaddleClasModel> clone_model =
+      utils::make_unique<PaddleClasModel>(PaddleClasModel(*this));
   clone_model->SetRuntime(clone_model->CloneRuntime());
   return clone_model;
 }
@@ -71,17 +73,30 @@ bool PaddleClasModel::Predict(cv::Mat* im, ClassifyResult* result, int topk) {
 }
 
 bool PaddleClasModel::Predict(const cv::Mat& im, ClassifyResult* result) {
+  FDMat mat = WrapMat(im);
+  return Predict(mat, result);
+}
+
+bool PaddleClasModel::BatchPredict(const std::vector<cv::Mat>& images,
+                                   std::vector<ClassifyResult>* results) {
+  std::vector<FDMat> mats = WrapMat(images);
+  return BatchPredict(mats, results);
+}
+
+bool PaddleClasModel::Predict(const FDMat& mat, ClassifyResult* result) {
   std::vector<ClassifyResult> results;
-  if (!BatchPredict({im}, &results)) {
+  std::vector<FDMat> mats = {mat};
+  if (!BatchPredict(mats, &results)) {
     return false;
   }
   *result = std::move(results[0]);
   return true;
 }
 
-bool PaddleClasModel::BatchPredict(const std::vector<cv::Mat>& images, std::vector<ClassifyResult>* results) {
-  std::vector<FDMat> fd_images = WrapMat(images);
-  if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) {
+bool PaddleClasModel::BatchPredict(const std::vector<FDMat>& mats,
+                                   std::vector<ClassifyResult>* results) {
+  std::vector<FDMat> fd_mats = mats;
+  if (!preprocessor_.Run(&fd_mats, &reused_input_tensors_)) {
     FDERROR << "Failed to preprocess the input image." << std::endl;
     return false;
   }
@@ -92,7 +107,8 @@ bool PaddleClasModel::BatchPredict(const std::vector<cv::Mat>& images, std::vect
   }
 
   if (!postprocessor_.Run(reused_output_tensors_, results)) {
-    FDERROR << "Failed to postprocess the inference results by runtime." << std::endl;
+    FDERROR << "Failed to postprocess the inference results by runtime."
+            << std::endl;
     return false;
   }
 
diff --git a/fastdeploy/vision/classification/ppcls/model.h b/fastdeploy/vision/classification/ppcls/model.h
index 5971147fb..b5ef8a60c 100644
--- a/fastdeploy/vision/classification/ppcls/model.h
+++ b/fastdeploy/vision/classification/ppcls/model.h
@@ -75,6 +75,23 @@ class FASTDEPLOY_DECL PaddleClasModel : public FastDeployModel {
   virtual bool BatchPredict(const std::vector<cv::Mat>& imgs,
                             std::vector<ClassifyResult>* results);
 
+  /** \brief Predict the classification result for an input image
+   *
+   * \param[in] mat The input mat
+   * \param[in] result The output classification result
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool Predict(const FDMat& mat, ClassifyResult* result);
+
+  /** \brief Predict the classification results for a batch of input images
+   *
+   * \param[in] mats, The input mat list
+   * \param[in] results The output classification result list
+   * \return true if the prediction successed, otherwise false
+   */
+  virtual bool BatchPredict(const std::vector<FDMat>& mats,
+                            std::vector<ClassifyResult>* results);
+
   /// Get preprocessor reference of PaddleClasModel
   virtual PaddleClasPreprocessor& GetPreprocessor() {
     return preprocessor_;
diff --git a/fastdeploy/vision/common/image_decoder/image_decoder.cc b/fastdeploy/vision/common/image_decoder/image_decoder.cc
new file mode 100644
index 000000000..085d234c2
--- /dev/null
+++ b/fastdeploy/vision/common/image_decoder/image_decoder.cc
@@ -0,0 +1,112 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision/common/image_decoder/image_decoder.h"
+
+#include "opencv2/imgcodecs.hpp"
+
+namespace fastdeploy {
+namespace vision {
+
+ImageDecoder::ImageDecoder(ImageDecoderLib lib) {
+  if (lib == ImageDecoderLib::NVJPEG) {
+#ifdef ENABLE_NVJPEG
+    nvjpeg::init_decoder(nvjpeg_params_);
+#endif
+  }
+  lib_ = lib;
+}
+
+ImageDecoder::~ImageDecoder() {
+  if (lib_ == ImageDecoderLib::NVJPEG) {
+#ifdef ENABLE_NVJPEG
+    nvjpeg::destroy_decoder(nvjpeg_params_);
+#endif
+  }
+}
+
+bool ImageDecoder::Decode(const std::string& img_name, FDMat* mat) {
+  std::vector<FDMat> mats(1);
+  mats[0] = std::move(*mat);
+  if (!BatchDecode({img_name}, &mats)) {
+    return false;
+  }
+  *mat = std::move(mats[0]);
+  return true;
+}
+
+bool ImageDecoder::BatchDecode(const std::vector<std::string>& img_names,
+                               std::vector<FDMat>* mats) {
+  if (lib_ == ImageDecoderLib::OPENCV) {
+    return ImplByOpenCV(img_names, mats);
+  } else if (lib_ == ImageDecoderLib::NVJPEG) {
+    return ImplByNvJpeg(img_names, mats);
+  }
+  return true;
+}
+
+bool ImageDecoder::ImplByOpenCV(const std::vector<std::string>& img_names,
+                                std::vector<FDMat>* mats) {
+  for (size_t i = 0; i < img_names.size(); ++i) {
+    cv::Mat im = cv::imread(img_names[i]);
+    (*mats)[i].SetMat(im);
+    (*mats)[i].layout = Layout::HWC;
+    (*mats)[i].SetWidth(im.cols);
+    (*mats)[i].SetHeight(im.rows);
+    (*mats)[i].SetChannels(im.channels());
+  }
+  return true;
+}
+
+bool ImageDecoder::ImplByNvJpeg(const std::vector<std::string>& img_names,
+                                std::vector<FDMat>* mats) {
+#ifdef ENABLE_NVJPEG
+  nvjpeg_params_.batch_size = img_names.size();
+  std::vector<nvjpegImage_t> output_imgs(nvjpeg_params_.batch_size);
+  std::vector<int> widths(nvjpeg_params_.batch_size);
+  std::vector<int> heights(nvjpeg_params_.batch_size);
+  // TODO(wangxinyu): support other output format
+  nvjpeg_params_.fmt = NVJPEG_OUTPUT_BGRI;
+  double total;
+  nvjpeg_params_.stream = (*mats)[0].Stream();
+
+  std::vector<FDTensor*> output_buffers;
+  for (size_t i = 0; i < mats->size(); ++i) {
+    FDASSERT((*mats)[i].output_cache != nullptr,
+             "The output_cache of FDMat was not set.");
+    output_buffers.push_back((*mats)[i].output_cache);
+  }
+
+  if (nvjpeg::process_images(img_names, nvjpeg_params_, total, output_imgs,
+                             output_buffers, widths, heights)) {
+    // If nvJPEG decode failed, will fallback to OpenCV,
+    // e.g. png format is not supported by nvJPEG
+    FDWARNING << "nvJPEG decode failed, falling back to OpenCV for this batch"
+              << std::endl;
+    return ImplByOpenCV(img_names, mats);
+  }
+
+  for (size_t i = 0; i < mats->size(); ++i) {
+    (*mats)[i].mat_type = ProcLib::CUDA;
+    (*mats)[i].layout = Layout::HWC;
+    (*mats)[i].SetTensor(output_buffers[i]);
+  }
+#else
+  FDASSERT(false, "FastDeploy didn't compile with NVJPEG.");
+#endif
+  return true;
+}
+
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/common/image_decoder/image_decoder.h b/fastdeploy/vision/common/image_decoder/image_decoder.h
new file mode 100644
index 000000000..7c7b2d297
--- /dev/null
+++ b/fastdeploy/vision/common/image_decoder/image_decoder.h
@@ -0,0 +1,49 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "fastdeploy/utils/utils.h"
+#include "fastdeploy/vision/common/processors/mat.h"
+#include "fastdeploy/vision/common/image_decoder/nvjpeg_decoder.h"
+
+namespace fastdeploy {
+namespace vision {
+
+enum class FASTDEPLOY_DECL ImageDecoderLib { OPENCV, NVJPEG };
+
+class FASTDEPLOY_DECL ImageDecoder {
+ public:
+  explicit ImageDecoder(ImageDecoderLib lib = ImageDecoderLib::OPENCV);
+
+  ~ImageDecoder();
+
+  bool Decode(const std::string& img_name, FDMat* mat);
+
+  bool BatchDecode(const std::vector<std::string>& img_names,
+                   std::vector<FDMat>* mats);
+
+ private:
+  bool ImplByOpenCV(const std::vector<std::string>& img_names,
+                    std::vector<FDMat>* mats);
+  bool ImplByNvJpeg(const std::vector<std::string>& img_names,
+                    std::vector<FDMat>* mats);
+  ImageDecoderLib lib_ = ImageDecoderLib::OPENCV;
+#ifdef ENABLE_NVJPEG
+  nvjpeg::decode_params_t nvjpeg_params_;
+#endif
+};
+
+}  // namespace vision
+}  // namespace fastdeploy
diff --git a/fastdeploy/vision/common/image_decoder/nvjpeg_decoder.cc b/fastdeploy/vision/common/image_decoder/nvjpeg_decoder.cc
new file mode 100644
index 000000000..ad121ee3f
--- /dev/null
+++ b/fastdeploy/vision/common/image_decoder/nvjpeg_decoder.cc
@@ -0,0 +1,363 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Part of the following code in this file refs to
+// https://github.com/CVCUDA/CV-CUDA/blob/release_v0.2.x/samples/common/NvDecoder.cpp
+//
+// Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Licensed under the Apache-2.0 license
+// \brief
+// \author NVIDIA
+
+#ifdef ENABLE_NVJPEG
+#include "fastdeploy/vision/common/image_decoder/nvjpeg_decoder.h"
+
+namespace fastdeploy {
+namespace vision {
+namespace nvjpeg {
+
+#define CHECK_CUDA(call)                                                   \
+  {                                                                        \
+    cudaError_t _e = (call);                                               \
+    if (_e != cudaSuccess) {                                               \
+      std::cout << "CUDA Runtime failure: '#" << _e << "' at " << __FILE__ \
+                << ":" << __LINE__ << std::endl;                           \
+      exit(1);                                                             \
+    }                                                                      \
+  }
+
+#define CHECK_NVJPEG(call)                                                  \
+  {                                                                         \
+    nvjpegStatus_t _e = (call);                                             \
+    if (_e != NVJPEG_STATUS_SUCCESS) {                                      \
+      std::cout << "NVJPEG failure: '#" << _e << "' at " << __FILE__ << ":" \
+                << __LINE__ << std::endl;                                   \
+      exit(1);                                                              \
+    }                                                                       \
+  }
+
+static int dev_malloc(void** p, size_t s) { return (int)cudaMalloc(p, s); }
+
+static int dev_free(void* p) { return (int)cudaFree(p); }
+
+static int host_malloc(void** p, size_t s, unsigned int f) {
+  return (int)cudaHostAlloc(p, s, f);
+}
+
+static int host_free(void* p) { return (int)cudaFreeHost(p); }
+
+static int read_images(const FileNames& image_names, FileData& raw_data,
+                       std::vector<size_t>& raw_len) {
+  for (size_t i = 0; i < image_names.size(); ++i) {
+    if (image_names.size() == 0) {
+      std::cerr << "No valid images left in the input list, exit" << std::endl;
+      return EXIT_FAILURE;
+    }
+
+    // Read an image from disk.
+    std::ifstream input(image_names[i].c_str(),
+                        std::ios::in | std::ios::binary | std::ios::ate);
+    if (!(input.is_open())) {
+      std::cerr << "Cannot open image: " << image_names[i] << std::endl;
+      FDASSERT(false, "Read file error.");
+      continue;
+    }
+
+    // Get the size
+    long unsigned int file_size = input.tellg();
+    input.seekg(0, std::ios::beg);
+    // resize if buffer is too small
+    if (raw_data[i].size() < file_size) {
+      raw_data[i].resize(file_size);
+    }
+    if (!input.read(raw_data[i].data(), file_size)) {
+      std::cerr << "Cannot read from file: " << image_names[i] << std::endl;
+      // image_names.erase(cur_iter);
+      FDASSERT(false, "Read file error.");
+      continue;
+    }
+    raw_len[i] = file_size;
+  }
+  return EXIT_SUCCESS;
+}
+
+// prepare buffers for RGBi output format
+static int prepare_buffers(FileData& file_data, std::vector<size_t>& file_len,
+                           std::vector<int>& img_width,
+                           std::vector<int>& img_height,
+                           std::vector<nvjpegImage_t>& ibuf,
+                           std::vector<nvjpegImage_t>& isz,
+                           std::vector<FDTensor*>& output_buffers,
+                           const FileNames& current_names,
+                           decode_params_t& params) {
+  int widths[NVJPEG_MAX_COMPONENT];
+  int heights[NVJPEG_MAX_COMPONENT];
+  int channels;
+  nvjpegChromaSubsampling_t subsampling;
+
+  for (long unsigned int i = 0; i < file_data.size(); i++) {
+    nvjpegStatus_t status = nvjpegGetImageInfo(
+        params.nvjpeg_handle, (unsigned char*)file_data[i].data(), file_len[i],
+        &channels, &subsampling, widths, heights);
+    if (status != NVJPEG_STATUS_SUCCESS) {
+      std::cout << "NVJPEG failure: #" << status << " in nvjpegGetImageInfo."
+                << std::endl;
+      return EXIT_FAILURE;
+    }
+
+    img_width[i] = widths[0];
+    img_height[i] = heights[0];
+
+    int mul = 1;
+    // in the case of interleaved RGB output, write only to single channel, but
+    // 3 samples at once
+    if (params.fmt == NVJPEG_OUTPUT_RGBI || params.fmt == NVJPEG_OUTPUT_BGRI) {
+      channels = 1;
+      mul = 3;
+    } else if (params.fmt == NVJPEG_OUTPUT_RGB ||
+               params.fmt == NVJPEG_OUTPUT_BGR) {
+      // in the case of rgb create 3 buffers with sizes of original image
+      channels = 3;
+      widths[1] = widths[2] = widths[0];
+      heights[1] = heights[2] = heights[0];
+    } else {
+      FDASSERT(false, "Unsupport NVJPEG output format: %d", params.fmt);
+    }
+
+    output_buffers[i]->Resize({heights[0], widths[0], mul * channels},
+                              FDDataType::UINT8, "output_cache", Device::GPU);
+
+    uint8_t* cur_buffer = reinterpret_cast<uint8_t*>(output_buffers[i]->Data());
+
+    // realloc output buffer if required
+    for (int c = 0; c < channels; c++) {
+      int aw = mul * widths[c];
+      int ah = heights[c];
+      size_t sz = aw * ah;
+      ibuf[i].pitch[c] = aw;
+      if (sz > isz[i].pitch[c]) {
+        ibuf[i].channel[c] = cur_buffer;
+        cur_buffer = cur_buffer + sz;
+        isz[i].pitch[c] = sz;
+      }
+    }
+  }
+  return EXIT_SUCCESS;
+}
+
+static void create_decoupled_api_handles(decode_params_t& params) {
+  CHECK_NVJPEG(nvjpegDecoderCreate(params.nvjpeg_handle, NVJPEG_BACKEND_DEFAULT,
+                                   &params.nvjpeg_decoder));
+  CHECK_NVJPEG(nvjpegDecoderStateCreate(params.nvjpeg_handle,
+                                        params.nvjpeg_decoder,
+                                        &params.nvjpeg_decoupled_state));
+
+  CHECK_NVJPEG(nvjpegBufferPinnedCreate(params.nvjpeg_handle, NULL,
+                                        &params.pinned_buffers[0]));
+  CHECK_NVJPEG(nvjpegBufferPinnedCreate(params.nvjpeg_handle, NULL,
+                                        &params.pinned_buffers[1]));
+  CHECK_NVJPEG(nvjpegBufferDeviceCreate(params.nvjpeg_handle, NULL,
+                                        &params.device_buffer));
+
+  CHECK_NVJPEG(
+      nvjpegJpegStreamCreate(params.nvjpeg_handle, &params.jpeg_streams[0]));
+  CHECK_NVJPEG(
+      nvjpegJpegStreamCreate(params.nvjpeg_handle, &params.jpeg_streams[1]));
+
+  CHECK_NVJPEG(nvjpegDecodeParamsCreate(params.nvjpeg_handle,
+                                        &params.nvjpeg_decode_params));
+}
+
+static void destroy_decoupled_api_handles(decode_params_t& params) {
+  CHECK_NVJPEG(nvjpegDecodeParamsDestroy(params.nvjpeg_decode_params));
+  CHECK_NVJPEG(nvjpegJpegStreamDestroy(params.jpeg_streams[0]));
+  CHECK_NVJPEG(nvjpegJpegStreamDestroy(params.jpeg_streams[1]));
+  CHECK_NVJPEG(nvjpegBufferPinnedDestroy(params.pinned_buffers[0]));
+  CHECK_NVJPEG(nvjpegBufferPinnedDestroy(params.pinned_buffers[1]));
+  CHECK_NVJPEG(nvjpegBufferDeviceDestroy(params.device_buffer));
+  CHECK_NVJPEG(nvjpegJpegStateDestroy(params.nvjpeg_decoupled_state));
+  CHECK_NVJPEG(nvjpegDecoderDestroy(params.nvjpeg_decoder));
+}
+
+int decode_images(const FileData& img_data, const std::vector<size_t>& img_len,
+                  std::vector<nvjpegImage_t>& out, decode_params_t& params,
+                  double& time) {
+  CHECK_CUDA(cudaStreamSynchronize(params.stream));
+
+  std::vector<const unsigned char*> batched_bitstreams;
+  std::vector<size_t> batched_bitstreams_size;
+  std::vector<nvjpegImage_t> batched_output;
+
+  // bit-streams that batched decode cannot handle
+  std::vector<const unsigned char*> otherdecode_bitstreams;
+  std::vector<size_t> otherdecode_bitstreams_size;
+  std::vector<nvjpegImage_t> otherdecode_output;
+
+  if (params.hw_decode_available) {
+    for (int i = 0; i < params.batch_size; i++) {
+      // extract bitstream meta data to figure out whether a bit-stream can be
+      // decoded
+      nvjpegJpegStreamParseHeader(params.nvjpeg_handle,
+                                  (const unsigned char*)img_data[i].data(),
+                                  img_len[i], params.jpeg_streams[0]);
+      int isSupported = -1;
+      nvjpegDecodeBatchedSupported(params.nvjpeg_handle, params.jpeg_streams[0],
+                                   &isSupported);
+
+      if (isSupported == 0) {
+        batched_bitstreams.push_back((const unsigned char*)img_data[i].data());
+        batched_bitstreams_size.push_back(img_len[i]);
+        batched_output.push_back(out[i]);
+      } else {
+        otherdecode_bitstreams.push_back(
+            (const unsigned char*)img_data[i].data());
+        otherdecode_bitstreams_size.push_back(img_len[i]);
+        otherdecode_output.push_back(out[i]);
+      }
+    }
+  } else {
+    for (int i = 0; i < params.batch_size; i++) {
+      otherdecode_bitstreams.push_back(
+          (const unsigned char*)img_data[i].data());
+      otherdecode_bitstreams_size.push_back(img_len[i]);
+      otherdecode_output.push_back(out[i]);
+    }
+  }
+
+  if (batched_bitstreams.size() > 0) {
+    CHECK_NVJPEG(nvjpegDecodeBatchedInitialize(
+        params.nvjpeg_handle, params.nvjpeg_state, batched_bitstreams.size(), 1,
+        params.fmt));
+
+    CHECK_NVJPEG(nvjpegDecodeBatched(
+        params.nvjpeg_handle, params.nvjpeg_state, batched_bitstreams.data(),
+        batched_bitstreams_size.data(), batched_output.data(), params.stream));
+  }
+
+  if (otherdecode_bitstreams.size() > 0) {
+    CHECK_NVJPEG(nvjpegStateAttachDeviceBuffer(params.nvjpeg_decoupled_state,
+                                               params.device_buffer));
+    int buffer_index = 0;
+    CHECK_NVJPEG(nvjpegDecodeParamsSetOutputFormat(params.nvjpeg_decode_params,
+                                                   params.fmt));
+    for (int i = 0; i < params.batch_size; i++) {
+      CHECK_NVJPEG(nvjpegJpegStreamParse(params.nvjpeg_handle,
+                                         otherdecode_bitstreams[i],
+                                         otherdecode_bitstreams_size[i], 0, 0,
+                                         params.jpeg_streams[buffer_index]));
+
+      CHECK_NVJPEG(nvjpegStateAttachPinnedBuffer(
+          params.nvjpeg_decoupled_state, params.pinned_buffers[buffer_index]));
+
+      CHECK_NVJPEG(nvjpegDecodeJpegHost(
+          params.nvjpeg_handle, params.nvjpeg_decoder,
+          params.nvjpeg_decoupled_state, params.nvjpeg_decode_params,
+          params.jpeg_streams[buffer_index]));
+
+      CHECK_CUDA(cudaStreamSynchronize(params.stream));
+
+      CHECK_NVJPEG(nvjpegDecodeJpegTransferToDevice(
+          params.nvjpeg_handle, params.nvjpeg_decoder,
+          params.nvjpeg_decoupled_state, params.jpeg_streams[buffer_index],
+          params.stream));
+
+      buffer_index = 1 - buffer_index;  // switch pinned buffer in pipeline mode
+                                        // to avoid an extra sync
+
+      CHECK_NVJPEG(
+          nvjpegDecodeJpegDevice(params.nvjpeg_handle, params.nvjpeg_decoder,
+                                 params.nvjpeg_decoupled_state,
+                                 &otherdecode_output[i], params.stream));
+    }
+  }
+  return EXIT_SUCCESS;
+}
+
+double process_images(const FileNames& image_names, decode_params_t& params,
+                      double& total, std::vector<nvjpegImage_t>& iout,
+                      std::vector<FDTensor*>& output_buffers,
+                      std::vector<int>& widths, std::vector<int>& heights) {
+  FDASSERT(image_names.size() == params.batch_size,
+           "Number of images and batch size must be equal.");
+  // vector for storing raw files and file lengths
+  FileData file_data(params.batch_size);
+  std::vector<size_t> file_len(params.batch_size);
+  FileNames current_names(params.batch_size);
+  // we wrap over image files to process total_images of files
+  auto file_iter = image_names.begin();
+
+  // output buffer sizes, for convenience
+  std::vector<nvjpegImage_t> isz(params.batch_size);
+
+  for (long unsigned int i = 0; i < iout.size(); i++) {
+    for (int c = 0; c < NVJPEG_MAX_COMPONENT; c++) {
+      iout[i].channel[c] = NULL;
+      iout[i].pitch[c] = 0;
+      isz[i].pitch[c] = 0;
+    }
+  }
+
+  if (read_images(image_names, file_data, file_len)) {
+    return EXIT_FAILURE;
+  }
+
+  if (prepare_buffers(file_data, file_len, widths, heights, iout, isz,
+                      output_buffers, image_names, params)) {
+    return EXIT_FAILURE;
+  }
+
+  double time;
+  if (decode_images(file_data, file_len, iout, params, time)) {
+    return EXIT_FAILURE;
+  }
+
+  return EXIT_SUCCESS;
+}
+
+void init_decoder(decode_params_t& params) {
+  params.hw_decode_available = true;
+  nvjpegDevAllocator_t dev_allocator = {&dev_malloc, &dev_free};
+  nvjpegPinnedAllocator_t pinned_allocator = {&host_malloc, &host_free};
+  nvjpegStatus_t status =
+      nvjpegCreateEx(NVJPEG_BACKEND_HARDWARE, &dev_allocator, &pinned_allocator,
+                     NVJPEG_FLAGS_DEFAULT, &params.nvjpeg_handle);
+  if (status == NVJPEG_STATUS_ARCH_MISMATCH) {
+    std::cout << "Hardware Decoder not supported. "
+                 "Falling back to default backend"
+              << std::endl;
+    CHECK_NVJPEG(nvjpegCreateEx(NVJPEG_BACKEND_DEFAULT, &dev_allocator,
+                                &pinned_allocator, NVJPEG_FLAGS_DEFAULT,
+                                &params.nvjpeg_handle));
+    params.hw_decode_available = false;
+  } else {
+    CHECK_NVJPEG(status);
+  }
+
+  CHECK_NVJPEG(
+      nvjpegJpegStateCreate(params.nvjpeg_handle, &params.nvjpeg_state));
+
+  create_decoupled_api_handles(params);
+}
+
+void destroy_decoder(decode_params_t& params) {
+  destroy_decoupled_api_handles(params);
+  CHECK_NVJPEG(nvjpegJpegStateDestroy(params.nvjpeg_state));
+  CHECK_NVJPEG(nvjpegDestroy(params.nvjpeg_handle));
+}
+
+}  // namespace nvjpeg
+}  // namespace vision
+}  // namespace fastdeploy
+
+#endif  // ENABLE_NVJPEG
diff --git a/fastdeploy/vision/common/image_decoder/nvjpeg_decoder.h b/fastdeploy/vision/common/image_decoder/nvjpeg_decoder.h
new file mode 100644
index 000000000..65307ced8
--- /dev/null
+++ b/fastdeploy/vision/common/image_decoder/nvjpeg_decoder.h
@@ -0,0 +1,69 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Part of the following code in this file refs to
+// https://github.com/CVCUDA/CV-CUDA/blob/release_v0.2.x/samples/common/NvDecoder.h
+//
+// Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Licensed under the Apache-2.0 license
+// \brief
+// \author NVIDIA
+
+#pragma once
+
+#ifdef ENABLE_NVJPEG
+#include "fastdeploy/core/fd_tensor.h"
+
+#include <cuda_runtime_api.h>
+#include <nvjpeg.h>
+
+
+namespace fastdeploy {
+namespace vision {
+namespace nvjpeg {
+
+typedef std::vector<std::string> FileNames;
+typedef std::vector<std::vector<char>> FileData;
+
+struct decode_params_t {
+  int batch_size;
+  nvjpegJpegState_t nvjpeg_state;
+  nvjpegHandle_t nvjpeg_handle;
+  cudaStream_t stream;
+
+  // used with decoupled API
+  nvjpegJpegState_t nvjpeg_decoupled_state;
+  nvjpegBufferPinned_t pinned_buffers[2];  // 2 buffers for pipelining
+  nvjpegBufferDevice_t device_buffer;
+  nvjpegJpegStream_t jpeg_streams[2];  // 2 streams for pipelining
+  nvjpegDecodeParams_t nvjpeg_decode_params;
+  nvjpegJpegDecoder_t nvjpeg_decoder;
+
+  nvjpegOutputFormat_t fmt;
+  bool hw_decode_available;
+};
+
+void init_decoder(decode_params_t& params);
+void destroy_decoder(decode_params_t& params);
+
+double process_images(const FileNames& image_names, decode_params_t& params,
+                      double& total, std::vector<nvjpegImage_t>& iout,
+                      std::vector<FDTensor*>& output_buffers,
+                      std::vector<int>& widths, std::vector<int>& heights);
+
+}  // namespace nvjpeg
+}  // namespace vision
+}  // namespace fastdeploy
+
+#endif  // ENABLE_NVJPEG
diff --git a/fastdeploy/vision/common/processors/manager.cc b/fastdeploy/vision/common/processors/manager.cc
index 45b29866b..070354da1 100644
--- a/fastdeploy/vision/common/processors/manager.cc
+++ b/fastdeploy/vision/common/processors/manager.cc
@@ -77,6 +77,16 @@ bool ProcessorManager::Run(std::vector<FDMat>* images,
     }
     (*images)[i].input_cache = &input_caches_[i];
     (*images)[i].output_cache = &output_caches_[i];
+    if ((*images)[i].mat_type == ProcLib::CUDA) {
+      // Make a copy of the input data ptr, so that the original data ptr of
+      // FDMat won't be modified.
+      auto fd_tensor = std::make_shared<FDTensor>();
+      fd_tensor->SetExternalData(
+          (*images)[i].Tensor()->shape, (*images)[i].Tensor()->Dtype(),
+          (*images)[i].Tensor()->Data(), (*images)[i].Tensor()->device,
+          (*images)[i].Tensor()->device_id);
+      (*images)[i].SetTensor(fd_tensor);
+    }
   }
 
   bool ret = Apply(&image_batch, outputs);
diff --git a/fastdeploy/vision/common/processors/manager.h b/fastdeploy/vision/common/processors/manager.h
index 6c119ff56..48b5575c4 100644
--- a/fastdeploy/vision/common/processors/manager.h
+++ b/fastdeploy/vision/common/processors/manager.h
@@ -35,6 +35,10 @@ class FASTDEPLOY_DECL ProcessorManager {
 
   bool CudaUsed();
 
+#ifdef WITH_GPU
+  cudaStream_t Stream() const { return stream_; }
+#endif
+
   void SetStream(FDMat* mat) {
 #ifdef WITH_GPU
     mat->SetStream(stream_);
@@ -56,7 +60,7 @@ class FASTDEPLOY_DECL ProcessorManager {
 
   int DeviceId() { return device_id_; }
 
-  /** \brief Process the input image and prepare input tensors for runtime
+  /** \brief Process the input images and prepare input tensors for runtime
    *
    * \param[in] images The input image data list, all the elements are returned by cv::imread()
    * \param[in] outputs The output tensors which will feed in runtime
diff --git a/fastdeploy/vision/common/processors/mat.cc b/fastdeploy/vision/common/processors/mat.cc
index f56d0b585..b78f57436 100644
--- a/fastdeploy/vision/common/processors/mat.cc
+++ b/fastdeploy/vision/common/processors/mat.cc
@@ -37,7 +37,7 @@ cv::Mat* Mat::GetOpenCVMat() {
 #ifdef WITH_GPU
     FDASSERT(cudaStreamSynchronize(stream) == cudaSuccess,
              "[ERROR] Error occurs while sync cuda stream.");
-    cpu_mat = CreateZeroCopyOpenCVMatFromTensor(fd_tensor);
+    cpu_mat = CreateZeroCopyOpenCVMatFromTensor(*fd_tensor);
     mat_type = ProcLib::OPENCV;
     device = Device::CPU;
     return &cpu_mat;
@@ -59,29 +59,53 @@ void* Mat::Data() {
              "fcv::Mat.");
 #endif
   } else if (device == Device::GPU) {
-    return fd_tensor.Data();
+    return fd_tensor->Data();
   }
   return cpu_mat.ptr();
 }
 
 FDTensor* Mat::Tensor() {
   if (mat_type == ProcLib::OPENCV) {
-    ShareWithTensor(&fd_tensor);
+    ShareWithTensor(fd_tensor.get());
   } else if (mat_type == ProcLib::FLYCV) {
 #ifdef ENABLE_FLYCV
     cpu_mat = ConvertFlyCVMatToOpenCV(fcv_mat);
     mat_type = ProcLib::OPENCV;
-    ShareWithTensor(&fd_tensor);
+    ShareWithTensor(fd_tensor.get());
 #else
     FDASSERT(false, "FastDeploy didn't compiled with FlyCV!");
 #endif
   }
-  return &fd_tensor;
+  return fd_tensor.get();
 }
 
 void Mat::SetTensor(FDTensor* tensor) {
-  fd_tensor.SetExternalData(tensor->Shape(), tensor->Dtype(), tensor->Data(),
-                            tensor->device, tensor->device_id);
+  fd_tensor->SetExternalData(tensor->Shape(), tensor->Dtype(), tensor->Data(),
+                             tensor->device, tensor->device_id);
+  device = tensor->device;
+  if (layout == Layout::HWC) {
+    height = tensor->Shape()[0];
+    width = tensor->Shape()[1];
+    channels = tensor->Shape()[2];
+  } else if (layout == Layout::CHW) {
+    channels = tensor->Shape()[0];
+    height = tensor->Shape()[1];
+    width = tensor->Shape()[2];
+  }
+}
+
+void Mat::SetTensor(std::shared_ptr<FDTensor>& tensor) {
+  fd_tensor = tensor;
+  device = tensor->device;
+  if (layout == Layout::HWC) {
+    height = tensor->Shape()[0];
+    width = tensor->Shape()[1];
+    channels = tensor->Shape()[2];
+  } else if (layout == Layout::CHW) {
+    channels = tensor->Shape()[0];
+    height = tensor->Shape()[1];
+    width = tensor->Shape()[2];
+  }
 }
 
 void Mat::ShareWithTensor(FDTensor* tensor) {
@@ -134,7 +158,7 @@ void Mat::PrintInfo(const std::string& flag) {
 #ifdef WITH_GPU
     FDASSERT(cudaStreamSynchronize(stream) == cudaSuccess,
              "[ERROR] Error occurs while sync cuda stream.");
-    cv::Mat tmp_mat = CreateZeroCopyOpenCVMatFromTensor(fd_tensor);
+    cv::Mat tmp_mat = CreateZeroCopyOpenCVMatFromTensor(*fd_tensor);
     cv::Scalar mean = cv::mean(tmp_mat);
     for (int i = 0; i < Channels(); ++i) {
       std::cout << mean[i] << " ";
@@ -157,7 +181,7 @@ FDDataType Mat::Type() {
              "fcv::Mat.");
 #endif
   } else if (mat_type == ProcLib::CUDA || mat_type == ProcLib::CVCUDA) {
-    return fd_tensor.Dtype();
+    return fd_tensor->Dtype();
   }
   return OpenCVDataTypeToFD(cpu_mat.type());
 }
@@ -262,6 +286,10 @@ FDTensor* CreateCachedGpuInputTensor(Mat* mat) {
 #ifdef WITH_GPU
   FDTensor* src = mat->Tensor();
   if (src->device == Device::GPU) {
+    if (src->Data() == mat->output_cache->Data()) {
+      std::swap(mat->input_cache, mat->output_cache);
+      std::swap(mat->input_cache->name, mat->output_cache->name);
+    }
     return src;
   } else if (src->device == Device::CPU) {
     // Mats on CPU, we need copy these tensors from CPU to GPU
diff --git a/fastdeploy/vision/common/processors/mat.h b/fastdeploy/vision/common/processors/mat.h
index c29fdd4b2..13ae76abd 100644
--- a/fastdeploy/vision/common/processors/mat.h
+++ b/fastdeploy/vision/common/processors/mat.h
@@ -49,7 +49,6 @@ struct FASTDEPLOY_DECL Mat {
 #endif
 
   Mat(const Mat& mat) = default;
-  // Move assignment
   Mat& operator=(const Mat& mat) = default;
 
   // Move constructor
@@ -96,6 +95,8 @@ struct FASTDEPLOY_DECL Mat {
   // Set fd_tensor
   void SetTensor(FDTensor* tensor);
 
+  void SetTensor(std::shared_ptr<FDTensor>& tensor);
+
  private:
   int channels;
   int height;
@@ -109,7 +110,7 @@ struct FASTDEPLOY_DECL Mat {
 #endif
   // Currently, fd_tensor is only used by CUDA and CV-CUDA,
   // OpenCV and FlyCV are not using it.
-  FDTensor fd_tensor;
+  std::shared_ptr<FDTensor> fd_tensor = std::make_shared<FDTensor>();
 
  public:
   FDDataType Type();
diff --git a/fastdeploy/vision/common/processors/mat_batch.cc b/fastdeploy/vision/common/processors/mat_batch.cc
index b73703588..f625d6d4d 100644
--- a/fastdeploy/vision/common/processors/mat_batch.cc
+++ b/fastdeploy/vision/common/processors/mat_batch.cc
@@ -27,7 +27,7 @@ void FDMatBatch::SetStream(cudaStream_t s) {
 
 FDTensor* FDMatBatch::Tensor() {
   if (has_batched_tensor) {
-    return &fd_tensor;
+    return fd_tensor.get();
   }
   FDASSERT(CheckShapeConsistency(mats), "Mats shapes are not consistent.")
   // Each mat has its own tensor,
@@ -45,12 +45,12 @@ FDTensor* FDMatBatch::Tensor() {
                          num_bytes, device, false);
   }
   SetTensor(input_cache);
-  return &fd_tensor;
+  return fd_tensor.get();
 }
 
 void FDMatBatch::SetTensor(FDTensor* tensor) {
-  fd_tensor.SetExternalData(tensor->Shape(), tensor->Dtype(), tensor->Data(),
-                            tensor->device, tensor->device_id);
+  fd_tensor->SetExternalData(tensor->Shape(), tensor->Dtype(), tensor->Data(),
+                             tensor->device, tensor->device_id);
   has_batched_tensor = true;
 }
 
diff --git a/fastdeploy/vision/common/processors/mat_batch.h b/fastdeploy/vision/common/processors/mat_batch.h
index ed5b408c3..090d8bb59 100644
--- a/fastdeploy/vision/common/processors/mat_batch.h
+++ b/fastdeploy/vision/common/processors/mat_batch.h
@@ -29,7 +29,7 @@ struct FASTDEPLOY_DECL FDMatBatch {
   // MatBatch is intialized with a list of mats,
   // the data is stored in the mats separately.
   // Call Tensor() function to get a batched 4-dimension tensor.
-  explicit FDMatBatch(std::vector<Mat>* _mats) {
+  explicit FDMatBatch(std::vector<FDMat>* _mats) {
     mats = _mats;
     layout = FDMatBatchLayout::NHWC;
     mat_type = ProcLib::OPENCV;
@@ -44,7 +44,7 @@ struct FASTDEPLOY_DECL FDMatBatch {
 #ifdef WITH_GPU
   cudaStream_t stream = nullptr;
 #endif
-  FDTensor fd_tensor;
+  std::shared_ptr<FDTensor> fd_tensor = std::make_shared<FDTensor>();
 
  public:
   // When using CV-CUDA/CUDA, please set input/output cache,
diff --git a/fastdeploy/vision/common/processors/normalize_and_permute.cu b/fastdeploy/vision/common/processors/normalize_and_permute.cu
index fd482e9d6..7f6320ba4 100644
--- a/fastdeploy/vision/common/processors/normalize_and_permute.cu
+++ b/fastdeploy/vision/common/processors/normalize_and_permute.cu
@@ -81,7 +81,7 @@ bool NormalizeAndPermute::ImplByCuda(FDMatBatch* mat_batch) {
 
   // Prepare output tensor
   mat_batch->output_cache->Resize(src->Shape(), FDDataType::FP32,
-                                  "output_cache", Device::GPU);
+                                  "batch_output_cache", Device::GPU);
   // NHWC -> NCHW
   std::swap(mat_batch->output_cache->shape[1],
             mat_batch->output_cache->shape[3]);
diff --git a/tutorials/README.md b/tutorials/README.md
index 05c4c74a8..8e0f3095c 100644
--- a/tutorials/README.md
+++ b/tutorials/README.md
@@ -1,13 +1,9 @@
 English | [中文](README_CN.md)
 
-
 # Tutorials
 
-
-
 This directory provides some tutorials for FastDeploy. For other model deployment, please refer to the example [FastDeploy/examples](../examples) directly.
 
-
 - Intel independent graphics card/integrated graphics card deployment [see intel_gpu](intel_gpu)
-
 - Model multithreaded call [see multi_thread](multi_thread)
+- Image decoding, including hardward decoding, e.g. nvJPEG [image_decoder](image_decoder)
diff --git a/tutorials/README_CN.md b/tutorials/README_CN.md
index 771bb8934..efdd904b9 100644
--- a/tutorials/README_CN.md
+++ b/tutorials/README_CN.md
@@ -7,3 +7,4 @@
 
 - Intel独立显卡/集成显卡部署 [见intel_gpu](intel_gpu)
 - 模型多线程调用 [见multi_thread](multi_thread)
+- 图片解码（含nvJPEG硬解码） [见image_decoder](image_decoder)
diff --git a/tutorials/image_decoder/README.md b/tutorials/image_decoder/README.md
new file mode 100644
index 000000000..659afee9c
--- /dev/null
+++ b/tutorials/image_decoder/README.md
@@ -0,0 +1,16 @@
+English | [中文](README_CN.md)
+
+# Image Decoder
+
+Currently, we support below image decoder libs：
+- OpenCV
+- nvJPEG (Needs NVIDIA GPU, doesn't support Jetson)
+
+## Example
+
+- [C++ Example](cpp)
+- Python API(WIP)
+
+## nvJPEG vs. OpenCV performance benchmark
+
+Refer to: https://github.com/PaddlePaddle/FastDeploy/pull/1288#issuecomment-1427749772
diff --git a/tutorials/image_decoder/README_CN.md b/tutorials/image_decoder/README_CN.md
new file mode 100644
index 000000000..d6fcfee76
--- /dev/null
+++ b/tutorials/image_decoder/README_CN.md
@@ -0,0 +1,16 @@
+简体中文 | [English](README.md)
+
+# Image Decoder
+
+图片解码库，目前支持以下图片解码库：
+- OpenCV
+- nvJPEG (依赖NVIDIA GPU，不支持Jetson)
+
+## 示例代码
+
+- [C++示例](cpp)
+- Python API仍在开发中...
+
+## nvJPEG和OpenCV性能对比数据
+
+参见：https://github.com/PaddlePaddle/FastDeploy/pull/1288#issuecomment-1427749772
diff --git a/tutorials/image_decoder/cpp/CMakeLists.txt b/tutorials/image_decoder/cpp/CMakeLists.txt
new file mode 100644
index 000000000..d1f90095e
--- /dev/null
+++ b/tutorials/image_decoder/cpp/CMakeLists.txt
@@ -0,0 +1,11 @@
+PROJECT(image_decoder C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(image_decoder ${PROJECT_SOURCE_DIR}/main.cc)
+target_link_libraries(image_decoder ${FASTDEPLOY_LIBS})
diff --git a/tutorials/image_decoder/cpp/README.md b/tutorials/image_decoder/cpp/README.md
new file mode 100644
index 000000000..1a2198b0f
--- /dev/null
+++ b/tutorials/image_decoder/cpp/README.md
@@ -0,0 +1,22 @@
+English | [中文](README_CN.md)
+
+# Image Decoder C++ Example
+
+1. [Build FastDeploy](../docs/cn/build_and_install) or download [FastDeploy prebuilt library](../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+2. Build example
+```bash
+mkdir build
+cd build
+
+# [PATH-TO-FASTDEPLOY] is the install directory of FastDeploy
+cmake .. -DFASTDEPLOY_INSTALL_DIR=[PATH-TO-FASTDEPLOY]
+make -j
+
+# Download the test image
+wget https://gitee.com/paddlepaddle/PaddleClas/raw/release/2.4/deploy/images/ImageNet/ILSVRC2012_val_00000010.jpeg
+
+# OpenCV decoder
+./image_decoder ILSVRC2012_val_00000010.jpeg 0
+# nvJPEG
+./image_decoder ILSVRC2012_val_00000010.jpeg 1
diff --git a/tutorials/image_decoder/cpp/README_CN.md b/tutorials/image_decoder/cpp/README_CN.md
new file mode 100644
index 000000000..a62c26630
--- /dev/null
+++ b/tutorials/image_decoder/cpp/README_CN.md
@@ -0,0 +1,22 @@
+简体中文 | [English](README.md)
+
+# Image Decoder C++示例
+
+1. [编译FastDeploy](../docs/cn/build_and_install), 或直接下载[FastDeploy预编译库](../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+2. 编译示例
+```bash
+mkdir build
+cd build
+
+# [PATH-TO-FASTDEPLOY]需替换为FastDeploy的安装路径
+cmake .. -DFASTDEPLOY_INSTALL_DIR=[PATH-TO-FASTDEPLOY]
+make -j
+
+# 下载测试图片
+wget https://gitee.com/paddlepaddle/PaddleClas/raw/release/2.4/deploy/images/ImageNet/ILSVRC2012_val_00000010.jpeg
+
+# OpenCV解码
+./image_decoder ILSVRC2012_val_00000010.jpeg 0
+# nvJPEG
+./image_decoder ILSVRC2012_val_00000010.jpeg 1
diff --git a/tutorials/image_decoder/cpp/main.cc b/tutorials/image_decoder/cpp/main.cc
new file mode 100644
index 000000000..2193aa8e7
--- /dev/null
+++ b/tutorials/image_decoder/cpp/main.cc
@@ -0,0 +1,57 @@
+#include "fastdeploy/vision/common/image_decoder/image_decoder.h"
+
+namespace fdvis = fastdeploy::vision;
+namespace fd = fastdeploy;
+
+void OpenCVImageDecode(const std::string& img_name) {
+  fdvis::FDMat mat;
+  auto img_decoder = new fdvis::ImageDecoder();
+  img_decoder->Decode(img_name, &mat);
+  mat.PrintInfo("");
+  delete img_decoder;
+}
+
+void NvJpegImageDecode(const std::string& img_name) {
+  std::vector<fdvis::FDMat> mats(1);
+  std::vector<fastdeploy::FDTensor> caches(1);
+
+  cudaStream_t stream;
+  cudaStreamCreate(&stream);
+  // For nvJPEG decoder, we need set stream and output cache for the FDMat
+  for (size_t i = 0; i < mats.size(); i++) {
+    mats[i].output_cache = &caches[i];
+    mats[i].SetStream(stream);
+  }
+  auto img_decoder = new fdvis::ImageDecoder(fdvis::ImageDecoderLib::NVJPEG);
+
+  // This is batch decode API, for single image decode API,
+  // please refer to OpenCVImageDecode()
+  img_decoder->BatchDecode({img_name}, &mats);
+
+  for (size_t i = 0; i < mats.size(); i++) {
+    std::cout << "Mat type: " << mats[i].mat_type << ", "
+              << "DataType=" << mats[i].Type() << ", "
+              << "Channel=" << mats[i].Channels() << ", "
+              << "Height=" << mats[i].Height() << ", "
+              << "Width=" << mats[i].Width() << std::endl;
+  }
+
+  cudaStreamDestroy(stream);
+}
+
+int main(int argc, char* argv[]) {
+  if (argc < 3) {
+    std::cout << "Usage: image_decoder path/to/image run_option, "
+                 "e.g ./image_decoder ./test.jpeg 0"
+              << std::endl;
+    std::cout << "Run_option 0: OpenCV; 1: nvJPEG " << std::endl;
+    return -1;
+  }
+
+  if (std::atoi(argv[2]) == 0) {
+    OpenCVImageDecode(argv[1]);
+  } else if (std::atoi(argv[2]) == 1) {
+    NvJpegImageDecode(argv[1]);
+  }
+  return 0;
+}
\ No newline at end of file

From c38b7d43771606f39c66e3793f36406d7e4cb099 Mon Sep 17 00:00:00 2001
From: yunyaoXYY <109218879+yunyaoXYY@users.noreply.github.com>
Date: Fri, 17 Feb 2023 10:53:51 +0800
Subject: [PATCH 37/41] [Backend] Support onnxruntime DirectML inference. 
 (#1304)

* Fix links in readme

* Fix links in readme

* Update PPOCRv2/v3 examples

* Update auto compression configs

* Add neww quantization  support for paddleclas model

* Update quantized Yolov6s model download link

* Improve PPOCR comments

* Add English doc for quantization

* Fix PPOCR rec model bug

* Add  new paddleseg quantization support

* Add  new paddleseg quantization support

* Add  new paddleseg quantization support

* Add  new paddleseg quantization support

* Add Ascend model list

* Add ascend model list

* Add ascend model list

* Add ascend model list

* Add ascend model list

* Add ascend model list

* Add ascend model list

* Support DirectML in onnxruntime

* Support onnxruntime DirectML

* Support onnxruntime DirectML

* Support onnxruntime DirectML

* Support OnnxRuntime DirectML

* Support OnnxRuntime DirectML

* Support OnnxRuntime DirectML

* Support OnnxRuntime DirectML

* Support OnnxRuntime DirectML

* Support OnnxRuntime DirectML

* Support OnnxRuntime DirectML

* Support OnnxRuntime DirectML

* Remove DirectML vision model example

* Imporve OnnxRuntime DirectML

* Imporve OnnxRuntime DirectML

* fix opencv cmake in Windows

* recheck codestyle
---
 CMakeLists.txt                                |  1 +
 cmake/check.cmake                             |  3 -
 cmake/onnxruntime.cmake                       | 10 ++-
 cmake/opencv.cmake                            | 12 ++-
 cmake/summary.cmake                           |  1 +
 docs/cn/build_and_install/directml.md         | 59 ++++++++++++++
 docs/en/build_and_install/directml.md         | 57 ++++++++++++++
 examples/runtime/cpp/infer_paddle_dml.cc      | 77 +++++++++++++++++++
 .../classification/paddleclas/cpp/README.md   | 16 ++--
 .../classification/paddleclas/cpp/infer.cc    |  9 ++-
 .../paddleseg/cpu-gpu/cpp/infer.cc            |  0
 fastdeploy/core/config.h.in                   |  6 +-
 fastdeploy/fastdeploy_model.cc                | 60 +++++++++++----
 fastdeploy/fastdeploy_model.h                 |  6 +-
 .../runtime/backends/ort/ort_backend.cc       | 65 ++++++++++++++--
 fastdeploy/runtime/backends/ort/ort_backend.h |  8 +-
 fastdeploy/runtime/enum_variables.cc          |  3 +
 fastdeploy/runtime/enum_variables.h           | 25 +++---
 fastdeploy/runtime/runtime_option.cc          |  2 +
 fastdeploy/runtime/runtime_option.h           |  4 +
 .../vision/classification/ppcls/model.cc      |  2 +
 fastdeploy/vision/segmentation/ppseg/model.cc | 27 ++++---
 22 files changed, 393 insertions(+), 60 deletions(-)
 create mode 100644 docs/cn/build_and_install/directml.md
 create mode 100644 docs/en/build_and_install/directml.md
 create mode 100644 examples/runtime/cpp/infer_paddle_dml.cc
 mode change 100644 => 100755 examples/vision/segmentation/paddleseg/cpu-gpu/cpp/infer.cc
 mode change 100755 => 100644 fastdeploy/runtime/runtime_option.h
 mode change 100644 => 100755 fastdeploy/vision/classification/ppcls/model.cc

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6cee4ef72..c234a0f06 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -70,6 +70,7 @@ option(ENABLE_CVCUDA "Whether to enable NVIDIA CV-CUDA to boost image preprocess
 option(ENABLE_ENCRYPTION "Whether to enable ENCRYPTION." OFF)
 option(ENABLE_BENCHMARK "Whether to enable Benchmark mode." OFF)
 option(WITH_ASCEND "Whether to compile for Huawei Ascend deploy." OFF)
+option(WITH_DIRECTML "Whether to compile for onnxruntime DirectML deploy." OFF)
 option(WITH_TIMVX "Whether to compile for TIMVX deploy." OFF)
 option(WITH_KUNLUNXIN "Whether to compile for KunlunXin XPU deploy." OFF)
 option(WITH_TESTING "Whether to compile with unittest." OFF)
diff --git a/cmake/check.cmake b/cmake/check.cmake
index 690149a9e..5e0ce1794 100644
--- a/cmake/check.cmake
+++ b/cmake/check.cmake
@@ -12,9 +12,6 @@ if(WIN32)
     if(ENABLE_POROS_BACKEND)
       message(FATAL_ERROR "-DENABLE_POROS_BACKEND=ON doesn't support on non 64-bit system now.")
     endif()
-    if(ENABLE_VISION)
-      message(FATAL_ERROR "-DENABLE_VISION=ON doesn't support on non 64-bit system now.")
-    endif()
   endif()
 endif()
 
diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake
index 48b4675f1..b823f734b 100644
--- a/cmake/onnxruntime.cmake
+++ b/cmake/onnxruntime.cmake
@@ -44,14 +44,20 @@ set(CMAKE_BUILD_RPATH "${CMAKE_BUILD_RPATH}" "${ONNXRUNTIME_LIB_DIR}")
 set(ONNXRUNTIME_VERSION "1.12.0")
 set(ONNXRUNTIME_URL_PREFIX "https://bj.bcebos.com/paddle2onnx/libs/")
 
-if(WIN32)
+if(WIN32) 
   if(WITH_GPU)
     set(ONNXRUNTIME_FILENAME "onnxruntime-win-x64-gpu-${ONNXRUNTIME_VERSION}.zip")
+  elseif(WITH_DIRECTML)
+    set(ONNXRUNTIME_FILENAME "onnxruntime-directml-win-x64.zip")
   else()
     set(ONNXRUNTIME_FILENAME "onnxruntime-win-x64-${ONNXRUNTIME_VERSION}.zip")
   endif()
   if(NOT CMAKE_CL_64)
-    set(ONNXRUNTIME_FILENAME "onnxruntime-win-x86-${ONNXRUNTIME_VERSION}.zip")
+    if(WITH_DIRECTML)
+      set(ONNXRUNTIME_FILENAME "onnxruntime-directml-win-x86.zip")
+    else()
+      set(ONNXRUNTIME_FILENAME "onnxruntime-win-x86-${ONNXRUNTIME_VERSION}.zip")
+    endif()
   endif()
 elseif(APPLE)
   if(CURRENT_OSX_ARCH MATCHES "arm64")
diff --git a/cmake/opencv.cmake b/cmake/opencv.cmake
index 9968c129c..e4a63f42b 100755
--- a/cmake/opencv.cmake
+++ b/cmake/opencv.cmake
@@ -15,7 +15,11 @@
 set(COMPRESSED_SUFFIX ".tgz")
 
 if(WIN32)
-  set(OPENCV_FILENAME "opencv-win-x64-3.4.16")
+  if(NOT CMAKE_CL_64)
+    set(OPENCV_FILENAME "opencv-win-x86-3.4.16")
+  else()
+    set(OPENCV_FILENAME "opencv-win-x64-3.4.16")
+  endif()
   set(COMPRESSED_SUFFIX ".zip")
 elseif(APPLE)
   if(CURRENT_OSX_ARCH MATCHES "arm64")
@@ -51,6 +55,12 @@ endif()
 set(OPENCV_INSTALL_DIR ${THIRD_PARTY_PATH}/install/)
 if(ANDROID)
   set(OPENCV_URL_PREFIX "https://bj.bcebos.com/fastdeploy/third_libs")
+elseif(WIN32)
+  if(NOT CMAKE_CL_64)
+    set(OPENCV_URL_PREFIX "https://bj.bcebos.com/fastdeploy/third_libs")
+  else()
+    set(OPENCV_URL_PREFIX "https://bj.bcebos.com/paddle2onnx/libs")
+  endif()
 else() # TODO: use fastdeploy/third_libs instead.
   set(OPENCV_URL_PREFIX "https://bj.bcebos.com/paddle2onnx/libs")
 endif()
diff --git a/cmake/summary.cmake b/cmake/summary.cmake
index 6bda8fdcb..45cc837a8 100755
--- a/cmake/summary.cmake
+++ b/cmake/summary.cmake
@@ -43,6 +43,7 @@ function(fastdeploy_summary)
   message(STATUS "  WITH_GPU                  : ${WITH_GPU}")
   message(STATUS "  WITH_TESTING              : ${WITH_TESTING}")
   message(STATUS "  WITH_ASCEND               : ${WITH_ASCEND}")
+  message(STATUS "  WITH_DIRECTML             : ${WITH_DIRECTML}")
   message(STATUS "  WITH_TIMVX                : ${WITH_TIMVX}")
   message(STATUS "  WITH_KUNLUNXIN            : ${WITH_KUNLUNXIN}")
   message(STATUS "  WITH_CAPI            : ${WITH_CAPI}")
diff --git a/docs/cn/build_and_install/directml.md b/docs/cn/build_and_install/directml.md
new file mode 100644
index 000000000..c1d701a84
--- /dev/null
+++ b/docs/cn/build_and_install/directml.md
@@ -0,0 +1,59 @@
+[English](../../en/build_and_install/directml.md) | 简体中文
+
+# DirectML部署库编译
+Direct Machine Learning (DirectML) 是Windows系统上用于机器学习的一款高性能, 提供硬件加速的 DirectX 12 库.
+目前, Fastdeploy的ONNX Runtime后端已集成DirectML,让用户可以在支持DirectX 12的 AMD/Intel/Nvidia/Qualcomm的GPU上部署模型.
+
+更多详细介绍可见:
+- [ONNX Runtime DirectML Execution Provider](https://onnxruntime.ai/docs/execution-providers/DirectML-ExecutionProvider.html)
+
+# DirectML使用需求
+- 编译需求: Visuald Studio 2017 及其以上工具链.
+- 操作系统: Windows10, 1903 版本, 及其更新版本. (DirectML为操作系统的组成部分, 无需单独安装)
+- 硬件需求: 支持DirectX 12的显卡, 例如, AMD GCN 第一代及以上版本/ Intel Haswell HD集成显卡及以上版本/Nvidia Kepler架构及以上版本/ Qualcomm Adreno 600及以上版本.
+
+# 编译DirectML部署库
+DirectML是基于ONNX Runtime后端集成, 所以要使用DirectML, 用户需要打开编译ONNX Runtime的选项. 同时, FastDeploy的DirectML支持x64/x86(Win32)架构的程序构建.
+
+
+x64示例, 在Windows菜单中，找到`x64 Native Tools Command Prompt for VS 2019`打开，执行如下命令
+```bat
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy
+mkdir build && cd build
+
+cmake .. -G "Visual Studio 16 2019" -A x64 ^
+         -DWITH_DIRECTML=ON ^
+         -DENABLE_ORT_BACKEND=ON ^
+         -DENABLE_VISION=ON ^
+         -DCMAKE_INSTALL_PREFIX="D:\Paddle\compiled_fastdeploy" ^
+
+msbuild fastdeploy.sln /m /p:Configuration=Release /p:Platform=x64
+msbuild INSTALL.vcxproj /m /p:Configuration=Release /p:Platform=x64
+```
+编译完成后，即在`CMAKE_INSTALL_PREFIX`指定的目录下生成C++推理库.
+如您使用CMake GUI可参考文档[Windows使用CMakeGUI + Visual Studio 2019 IDE编译](../faq/build_on_win_with_gui.md)
+
+
+x86(Win32)示例, 在Windows菜单中，找到`x86 Native Tools Command Prompt for VS 2019`打开，执行如下命令
+```bat
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy
+mkdir build && cd build
+
+cmake .. -G "Visual Studio 16 2019" -A Win32 ^
+         -DWITH_DIRECTML=ON ^
+         -DENABLE_ORT_BACKEND=ON ^
+         -DENABLE_VISION=ON ^
+         -DCMAKE_INSTALL_PREFIX="D:\Paddle\compiled_fastdeploy" ^
+
+msbuild fastdeploy.sln /m /p:Configuration=Release /p:Platform=Win32
+msbuild INSTALL.vcxproj /m /p:Configuration=Release /p:Platform=Win32
+```
+编译完成后，即在`CMAKE_INSTALL_PREFIX`指定的目录下生成C++推理库.
+如您使用CMake GUI可参考文档[Windows使用CMakeGUI + Visual Studio 2019 IDE编译](../faq/build_on_win_with_gui.md)
+
+# 使用DirectML库
+DirectML编译库的使用方式, 和其他硬件在Windows上使用的方式一样, 参考以下链接.
+- [FastDeploy C++库在Windows上的多种使用方式](../faq/use_sdk_on_windows_build.md)
+- [在 Windows 使用 FastDeploy C++ SDK](../faq/use_sdk_on_windows.md)
diff --git a/docs/en/build_and_install/directml.md b/docs/en/build_and_install/directml.md
new file mode 100644
index 000000000..bc68ac702
--- /dev/null
+++ b/docs/en/build_and_install/directml.md
@@ -0,0 +1,57 @@
+English | [中文](../../cn/build_and_install/directml.md)
+
+# How to Build DirectML Deployment Environment
+Direct Machine Learning (DirectML) is a high-performance, hardware-accelerated DirectX 12 library for machine learning on Windows systems.
+Currently, Fastdeploy's ONNX Runtime backend has DirectML integrated, allowing users to deploy models on AMD/Intel/Nvidia/Qualcomm GPUs with DirectX 12 support.
+
+More details:
+- [ONNX Runtime DirectML Execution Provider](https://onnxruntime.ai/docs/execution-providers/DirectML-ExecutionProvider.html)
+
+# DirectML requirements
+- Compilation requirements: Visual Studio 2017 toolchain and above.
+- Operating system: Windows 10, version 1903, and newer. (DirectML is part of the operating system and does not need to be installed separately)
+- Hardware requirements: DirectX 12 supported graphics cards, e.g., AMD GCN 1st generation and above/ Intel Haswell HD integrated graphics and above/ Nvidia Kepler architecture and above/ Qualcomm Adreno 600 and above.
+
+# How to Build and Install DirectML C++ SDK
+The DirectML is integrated with the ONNX Runtime backend, so to use DirectML, users need to turn on the option to compile ONNX Runtime. Also, FastDeploy's DirectML supports building programs for x64/x86 (Win32) architectures.
+
+For the x64 example, in the Windows menu, find `x64 Native Tools Command Prompt for VS 2019` and open it by executing the following command
+```bat
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy
+mkdir build && cd build
+
+cmake .. -G "Visual Studio 16 2019" -A x64 ^
+         -DWITH_DIRECTML=ON ^
+         -DENABLE_ORT_BACKEND=ON ^
+         -DENABLE_VISION=ON ^
+         -DCMAKE_INSTALL_PREFIX="D:\Paddle\compiled_fastdeploy" ^
+
+msbuild fastdeploy.sln /m /p:Configuration=Release /p:Platform=x64
+msbuild INSTALL.vcxproj /m /p:Configuration=Release /p:Platform=x64
+```
+Once compiled, the C++ inference library is generated in the directory specified by `CMAKE_INSTALL_PREFIX`
+If you use CMake GUI, please refer to [How to Compile with CMakeGUI + Visual Studio 2019 IDE on Windows](../faq/build_on_win_with_gui.md)
+
+
+For the x86(Win32) example, in the Windows menu, find `x86 Native Tools Command Prompt for VS 2019` and open it by executing the following command
+```bat
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy
+mkdir build && cd build
+
+cmake .. -G "Visual Studio 16 2019" -A Win32 ^
+         -DWITH_DIRECTML=ON ^
+         -DENABLE_ORT_BACKEND=ON ^
+         -DENABLE_VISION=ON ^
+         -DCMAKE_INSTALL_PREFIX="D:\Paddle\compiled_fastdeploy" ^
+
+msbuild fastdeploy.sln /m /p:Configuration=Release /p:Platform=Win32
+msbuild INSTALL.vcxproj /m /p:Configuration=Release /p:Platform=Win32
+```
+Once compiled, the C++ inference library is generated in the directory specified by `CMAKE_INSTALL_PREFIX`
+If you use CMake GUI, please refer to [How to Compile with CMakeGUI + Visual Studio 2019 IDE on Windows](../faq/build_on_win_with_gui.md)
+
+# How to use compiled DirectML SDK.
+The DirectML compiled library can be used in the same way as any other hardware on Windows, see the following link.
+- [Using the FastDeploy C++ SDK on Windows Platform](../faq/use_sdk_on_windows.md)
diff --git a/examples/runtime/cpp/infer_paddle_dml.cc b/examples/runtime/cpp/infer_paddle_dml.cc
new file mode 100644
index 000000000..dfa71a661
--- /dev/null
+++ b/examples/runtime/cpp/infer_paddle_dml.cc
@@ -0,0 +1,77 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/runtime.h"
+
+namespace fd = fastdeploy;
+
+int main(int argc, char* argv[]) {
+  // create option
+  fd::RuntimeOption runtime_option;
+
+  // model and param files
+  std::string model_file = "mobilenetv2/inference.pdmodel";
+  std::string params_file = "mobilenetv2/inference.pdiparams";
+
+  // read model From disk.
+  // runtime_option.SetModelPath(model_file, params_file,
+  // fd::ModelFormat::PADDLE);
+
+  // read model from buffer
+  std::string model_buffer, params_buffer;
+  fd::ReadBinaryFromFile(model_file, &model_buffer);
+  fd::ReadBinaryFromFile(params_file, &params_buffer);
+  runtime_option.SetModelBuffer(model_buffer, params_buffer,
+                                fd::ModelFormat::PADDLE);
+
+  // setup other option
+  runtime_option.SetCpuThreadNum(12);
+  // use ONNX Runtime DirectML
+  runtime_option.UseOrtBackend();
+  runtime_option.UseDirectML();
+
+  // init runtime
+  std::unique_ptr<fd::Runtime> runtime =
+      std::unique_ptr<fd::Runtime>(new fd::Runtime());
+  if (!runtime->Init(runtime_option)) {
+    std::cerr << "--- Init FastDeploy Runitme Failed! "
+              << "\n--- Model:  " << model_file << std::endl;
+    return -1;
+  } else {
+    std::cout << "--- Init FastDeploy Runitme Done! "
+              << "\n--- Model:  " << model_file << std::endl;
+  }
+  // init input tensor shape
+  fd::TensorInfo info = runtime->GetInputInfo(0);
+  info.shape = {1, 3, 224, 224};
+
+  std::vector<fd::FDTensor> input_tensors(1);
+  std::vector<fd::FDTensor> output_tensors(1);
+
+  std::vector<float> inputs_data;
+  inputs_data.resize(1 * 3 * 224 * 224);
+  for (size_t i = 0; i < inputs_data.size(); ++i) {
+    inputs_data[i] = std::rand() % 1000 / 1000.0f;
+  }
+  input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32,
+                                   inputs_data.data());
+
+  // get input name
+  input_tensors[0].name = info.name;
+
+  runtime->Infer(input_tensors, &output_tensors);
+
+  output_tensors[0].PrintInfo();
+  return 0;
+}
\ No newline at end of file
diff --git a/examples/vision/classification/paddleclas/cpp/README.md b/examples/vision/classification/paddleclas/cpp/README.md
index 8d85c24fc..f0e6fc7ac 100755
--- a/examples/vision/classification/paddleclas/cpp/README.md
+++ b/examples/vision/classification/paddleclas/cpp/README.md
@@ -1,7 +1,7 @@
 English | [简体中文](README_CN.md)
 # PaddleClas C++ Deployment Example
 
-This directory provides examples that `infer.cc` fast finishes the deployment of PaddleClas models on CPU/GPU and GPU accelerated by TensorRT. 
+This directory provides examples that `infer.cc` fast finishes the deployment of PaddleClas models on CPU/GPU and GPU accelerated by TensorRT.
 
 Before deployment, two steps require confirmation.
 
@@ -13,13 +13,13 @@ Taking ResNet50_vd inference on Linux as an example, the compilation test can be
 ```bash
 mkdir build
 cd build
-# Download FastDeploy precompiled library. Users can choose your appropriate version in the`FastDeploy Precompiled Library` mentioned above 
+# Download FastDeploy precompiled library. Users can choose your appropriate version in the`FastDeploy Precompiled Library` mentioned above
 wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
 tar xvf fastdeploy-linux-x64-x.x.x.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
 make -j
 
-# Download ResNet50_vd model file and test images 
+# Download ResNet50_vd model file and test images
 wget https://bj.bcebos.com/paddlehub/fastdeploy/ResNet50_vd_infer.tgz
 tar -xvf ResNet50_vd_infer.tgz
 wget https://gitee.com/paddlepaddle/PaddleClas/raw/release/2.4/deploy/images/ImageNet/ILSVRC2012_val_00000010.jpeg
@@ -35,12 +35,14 @@ wget https://gitee.com/paddlepaddle/PaddleClas/raw/release/2.4/deploy/images/Ima
 ./infer_demo ResNet50_vd_infer ILSVRC2012_val_00000010.jpeg 3
 # KunlunXin XPU inference
 ./infer_demo ResNet50_vd_infer ILSVRC2012_val_00000010.jpeg 4
+# Ascend inference
+./infer_demo ResNet50_vd_infer ILSVRC2012_val_00000010.jpeg 5
 ```
 
-The above command works for Linux or MacOS. Refer to 
+The above command works for Linux or MacOS. Refer to
 - [How to use FastDeploy C++ SDK in Windows](../../../../../docs/cn/faq/use_sdk_on_windows.md) for SDK use-pattern in Windows
 
-## PaddleClas C++ Interface 
+## PaddleClas C++ Interface
 
 ### PaddleClas Class
 
@@ -57,8 +59,8 @@ PaddleClas model loading and initialization, where model_file and params_file ar
 
 **Parameter**
 
-> * **model_file**(str): Model file path 
-> * **params_file**(str): Parameter file path 
+> * **model_file**(str): Model file path
+> * **params_file**(str): Parameter file path
 > * **config_file**(str): Inference deployment configuration file
 > * **runtime_option**(RuntimeOption): Backend inference configuration. None by default. (use the default configuration)
 > * **model_format**(ModelFormat): Model format. Paddle format by default
diff --git a/examples/vision/classification/paddleclas/cpp/infer.cc b/examples/vision/classification/paddleclas/cpp/infer.cc
index 90c5557c2..b0065fdb9 100755
--- a/examples/vision/classification/paddleclas/cpp/infer.cc
+++ b/examples/vision/classification/paddleclas/cpp/infer.cc
@@ -96,7 +96,8 @@ void IpuInfer(const std::string& model_dir, const std::string& image_file) {
   std::cout << res.Str() << std::endl;
 }
 
-void KunlunXinInfer(const std::string& model_dir, const std::string& image_file) {
+void KunlunXinInfer(const std::string& model_dir,
+                    const std::string& image_file) {
   auto model_file = model_dir + sep + "inference.pdmodel";
   auto params_file = model_dir + sep + "inference.pdiparams";
   auto config_file = model_dir + sep + "inference_cls.yaml";
@@ -152,7 +153,7 @@ void AscendInfer(const std::string& model_dir, const std::string& image_file) {
   auto model_file = model_dir + sep + "inference.pdmodel";
   auto params_file = model_dir + sep + "inference.pdiparams";
   auto config_file = model_dir + sep + "inference_cls.yaml";
-  
+
   auto option = fastdeploy::RuntimeOption();
   option.UseAscend();
 
@@ -172,14 +173,14 @@ void AscendInfer(const std::string& model_dir, const std::string& image_file) {
   std::cout << res.Str() << std::endl;
 }
 
-
 int main(int argc, char* argv[]) {
   if (argc < 4) {
     std::cout << "Usage: infer_demo path/to/model path/to/image run_option, "
                  "e.g ./infer_demo ./ResNet50_vd ./test.jpeg 0"
               << std::endl;
     std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run with ipu; 4: run with kunlunxin."
+                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run "
+                 "with ipu; 4: run with kunlunxin."
               << std::endl;
     return -1;
   }
diff --git a/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/infer.cc b/examples/vision/segmentation/paddleseg/cpu-gpu/cpp/infer.cc
old mode 100644
new mode 100755
diff --git a/fastdeploy/core/config.h.in b/fastdeploy/core/config.h.in
index 5593f9fd8..4da8594b8 100755
--- a/fastdeploy/core/config.h.in
+++ b/fastdeploy/core/config.h.in
@@ -41,6 +41,10 @@
 #cmakedefine WITH_GPU
 #endif
 
+#ifndef WITH_DIRECTML
+#cmakedefine WITH_DIRECTML
+#endif
+
 #ifndef ENABLE_TRT_BACKEND
 #cmakedefine ENABLE_TRT_BACKEND
 #endif
@@ -59,4 +63,4 @@
 
 #ifndef ENABLE_BENCHMARK
 #cmakedefine ENABLE_BENCHMARK
-#endif
\ No newline at end of file
+#endif
diff --git a/fastdeploy/fastdeploy_model.cc b/fastdeploy/fastdeploy_model.cc
index d909a6138..2eb25e383 100644
--- a/fastdeploy/fastdeploy_model.cc
+++ b/fastdeploy/fastdeploy_model.cc
@@ -41,20 +41,19 @@ bool CheckBackendSupported(const std::vector<Backend>& backends,
   return false;
 }
 
-bool FastDeployModel::IsSupported(const std::vector<Backend>& backends, 
+bool FastDeployModel::IsSupported(const std::vector<Backend>& backends,
                                   Backend backend) {
 #ifdef ENABLE_BENCHMARK
   if (runtime_option.benchmark_option.enable_profile) {
-    FDWARNING << "In benchmark mode, we don't check to see if " 
-              << "the backend [" << backend 
-              << "] is supported for current model!"
-              << std::endl;
+    FDWARNING << "In benchmark mode, we don't check to see if "
+              << "the backend [" << backend
+              << "] is supported for current model!" << std::endl;
     return true;
   }
-  return CheckBackendSupported(backends, backend);  
-#else  
   return CheckBackendSupported(backends, backend);
-#endif  
+#else
+  return CheckBackendSupported(backends, backend);
+#endif
 }
 
 bool FastDeployModel::InitRuntimeWithSpecifiedBackend() {
@@ -70,6 +69,7 @@ bool FastDeployModel::InitRuntimeWithSpecifiedBackend() {
   bool use_sophgotpu = (runtime_option.device == Device::SOPHGOTPUD);
   bool use_timvx = (runtime_option.device == Device::TIMVX);
   bool use_ascend = (runtime_option.device == Device::ASCEND);
+  bool use_directml = (runtime_option.device == Device::DIRECTML);
   bool use_kunlunxin = (runtime_option.device == Device::KUNLUNXIN);
 
   if (use_gpu) {
@@ -107,6 +107,13 @@ bool FastDeployModel::InitRuntimeWithSpecifiedBackend() {
               << " is not supported." << std::endl;
       return false;
     }
+  } else if (use_directml) {
+    if (!IsSupported(valid_directml_backends, runtime_option.backend)) {
+      FDERROR << "The valid directml backends of model " << ModelName()
+              << " are " << Str(valid_directml_backends) << ", "
+              << runtime_option.backend << " is not supported." << std::endl;
+      return false;
+    }
   } else if (use_kunlunxin) {
     if (!IsSupported(valid_kunlunxin_backends, runtime_option.backend)) {
       FDERROR << "The valid kunlunxin backends of model " << ModelName()
@@ -155,6 +162,8 @@ bool FastDeployModel::InitRuntimeWithSpecifiedDevice() {
     return CreateTimVXBackend();
   } else if (runtime_option.device == Device::ASCEND) {
     return CreateASCENDBackend();
+  } else if (runtime_option.device == Device::DIRECTML) {
+    return CreateDirectMLBackend();
   } else if (runtime_option.device == Device::KUNLUNXIN) {
     return CreateKunlunXinBackend();
   } else if (runtime_option.device == Device::SOPHGOTPUD) {
@@ -168,8 +177,9 @@ bool FastDeployModel::InitRuntimeWithSpecifiedDevice() {
     return false;
 #endif
   }
-  FDERROR << "Only support CPU/GPU/IPU/RKNPU/TIMVX/KunlunXin/ASCEND now."
-          << std::endl;
+  FDERROR
+      << "Only support CPU/GPU/IPU/RKNPU/TIMVX/KunlunXin/ASCEND/DirectML now."
+      << std::endl;
   return false;
 }
 
@@ -350,6 +360,30 @@ bool FastDeployModel::CreateASCENDBackend() {
   return false;
 }
 
+bool FastDeployModel::CreateDirectMLBackend() {
+  if (valid_directml_backends.size() == 0) {
+    FDERROR << "There's no valid directml backends for model: " << ModelName()
+            << std::endl;
+    return false;
+  }
+
+  for (size_t i = 0; i < valid_directml_backends.size(); ++i) {
+    if (!IsBackendAvailable(valid_directml_backends[i])) {
+      continue;
+    }
+    runtime_option.backend = valid_directml_backends[i];
+    runtime_ = std::unique_ptr<Runtime>(new Runtime());
+    if (!runtime_->Init(runtime_option)) {
+      return false;
+    }
+    runtime_initialized_ = true;
+    return true;
+  }
+  FDERROR << "Found no valid directml backend for model: " << ModelName()
+          << std::endl;
+  return false;
+}
+
 bool FastDeployModel::CreateIpuBackend() {
   if (valid_ipu_backends.size() == 0) {
     FDERROR << "There's no valid ipu backends for model: " << ModelName()
@@ -384,13 +418,13 @@ bool FastDeployModel::Infer(std::vector<FDTensor>& input_tensors,
     tc.End();
     if (time_of_runtime_.size() > 50000) {
       FDWARNING << "There are already 50000 records of runtime, will force to "
-                    "disable record time of runtime now."
+                   "disable record time of runtime now."
                 << std::endl;
       enable_record_time_of_runtime_ = false;
     }
     time_of_runtime_.push_back(tc.Duration());
   }
-  
+
   return ret;
 }
 
@@ -434,7 +468,7 @@ std::map<std::string, float> FastDeployModel::PrintStatisInfoOfRuntime() {
   statis_info_of_runtime_dict["warmup_iter"] = warmup_iter;
   statis_info_of_runtime_dict["avg_time"] = avg_time;
   statis_info_of_runtime_dict["iterations"] = time_of_runtime_.size();
-  
+
   return statis_info_of_runtime_dict;
 }
 }  // namespace fastdeploy
diff --git a/fastdeploy/fastdeploy_model.h b/fastdeploy/fastdeploy_model.h
index 037bb2192..bd7320912 100755
--- a/fastdeploy/fastdeploy_model.h
+++ b/fastdeploy/fastdeploy_model.h
@@ -45,6 +45,9 @@ class FASTDEPLOY_DECL FastDeployModel {
   /** Model's valid timvx backends. This member defined all the timvx backends have successfully tested for the model
    */
   std::vector<Backend> valid_timvx_backends = {};
+    /** Model's valid directml backends. This member defined all the onnxruntime directml backends have successfully tested for the model
+   */
+  std::vector<Backend> valid_directml_backends = {};
   /** Model's valid ascend backends. This member defined all the cann backends have successfully tested for the model
    */
   std::vector<Backend> valid_ascend_backends = {};
@@ -117,7 +120,7 @@ class FASTDEPLOY_DECL FastDeployModel {
    */
   virtual double GetProfileTime() {
     return runtime_->GetProfileTime();
-  }            
+  }
 
   /** \brief Release reused input/output buffers
   */
@@ -158,6 +161,7 @@ class FASTDEPLOY_DECL FastDeployModel {
   bool CreateTimVXBackend();
   bool CreateKunlunXinBackend();
   bool CreateASCENDBackend();
+  bool CreateDirectMLBackend();
   bool IsSupported(const std::vector<Backend>& backends,
                    Backend backend);
 
diff --git a/fastdeploy/runtime/backends/ort/ort_backend.cc b/fastdeploy/runtime/backends/ort/ort_backend.cc
index 58c449cc6..ae5e8f3ed 100644
--- a/fastdeploy/runtime/backends/ort/ort_backend.cc
+++ b/fastdeploy/runtime/backends/ort/ort_backend.cc
@@ -1,3 +1,4 @@
+
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,6 +14,7 @@
 // limitations under the License.
 
 #include "fastdeploy/runtime/backends/ort/ort_backend.h"
+
 #include "fastdeploy/core/float16.h"
 #include "fastdeploy/runtime/backends/ort/ops/adaptive_pool2d.h"
 #include "fastdeploy/runtime/backends/ort/ops/multiclass_nms.h"
@@ -24,13 +26,12 @@
 
 #include <memory>
 
-
 namespace fastdeploy {
 
 std::vector<OrtCustomOp*> OrtBackend::custom_operators_ =
     std::vector<OrtCustomOp*>();
 
-void OrtBackend::BuildOption(const OrtBackendOption& option) {
+bool OrtBackend::BuildOption(const OrtBackendOption& option) {
   option_ = option;
   if (option.graph_optimization_level >= 0) {
     session_options_.SetGraphOptimizationLevel(
@@ -45,6 +46,53 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) {
   if (option.execution_mode >= 0) {
     session_options_.SetExecutionMode(ExecutionMode(option.execution_mode));
   }
+
+#ifdef WITH_DIRECTML
+  // If use DirectML
+  if (option.device == Device::DIRECTML) {
+    auto all_providers = Ort::GetAvailableProviders();
+    bool support_dml = false;
+    std::string providers_msg = "";
+    for (size_t i = 0; i < all_providers.size(); ++i) {
+      providers_msg = providers_msg + all_providers[i] + ", ";
+      if (all_providers[i] == "DmlExecutionProvider") {
+        support_dml = true;
+      }
+    }
+
+    if (!support_dml) {
+      FDWARNING << "Compiled fastdeploy with onnxruntime doesn't "
+                   "support DirectML, the available providers are "
+                << providers_msg << "will fallback to CPUExecutionProvider."
+                << "Please check if DirectML is installed successfully."
+                << std::endl;
+      option_.device = Device::CPU;
+    } else {
+      // Must set as below when use dml.
+      session_options_.DisableMemPattern();
+      session_options_.SetExecutionMode(ExecutionMode(0));
+
+      // DML session_option
+      OrtApi const& ortApi = Ort::GetApi();
+      const OrtDmlApi* ortDmlApi;
+      ortApi.GetExecutionProviderApi(
+          "DML", ORT_API_VERSION, reinterpret_cast<const void**>(&ortDmlApi));
+      OrtStatus* onnx_dml_status =
+          ortDmlApi->SessionOptionsAppendExecutionProvider_DML(session_options_,
+                                                               0);
+      if (onnx_dml_status != nullptr) {
+        FDERROR
+            << "DirectML is not support in your machine, the program will exit."
+            << std::endl;
+        ortApi.ReleaseStatus(onnx_dml_status);
+        return false;
+      }
+    }
+    return true;
+  }
+#endif
+
+  // CUDA
   if (option.device == Device::GPU) {
     auto all_providers = Ort::GetAvailableProviders();
     bool support_cuda = false;
@@ -70,11 +118,14 @@ void OrtBackend::BuildOption(const OrtBackendOption& option) {
       }
       session_options_.AppendExecutionProvider_CUDA(cuda_options);
     }
+    return true;
   }
+  return true;
 }
 
 bool OrtBackend::Init(const RuntimeOption& option) {
-  if (option.device != Device::CPU && option.device != Device::GPU) {
+  if (option.device != Device::CPU && option.device != Device::GPU &&
+      option.device != Device::DIRECTML) {
     FDERROR
         << "Backend::ORT only supports Device::CPU/Device::GPU, but now its "
         << option.device << "." << std::endl;
@@ -169,7 +220,11 @@ bool OrtBackend::InitFromOnnx(const std::string& model_file,
     return false;
   }
 
-  BuildOption(option);
+  if (!BuildOption(option)) {
+    FDERROR << "Create Ort option fail." << std::endl;
+    return false;
+  }
+
   InitCustomOperators();
   session_ = {env_, model_file.data(), model_file.size(), session_options_};
   binding_ = std::make_shared<Ort::IoBinding>(session_);
@@ -355,4 +410,4 @@ void OrtBackend::InitCustomOperators() {
 #endif
 }
 
-}  // namespace fastdeploy
+}  // namespace fastdeploy
\ No newline at end of file
diff --git a/fastdeploy/runtime/backends/ort/ort_backend.h b/fastdeploy/runtime/backends/ort/ort_backend.h
index e0caf48a3..543b125e9 100644
--- a/fastdeploy/runtime/backends/ort/ort_backend.h
+++ b/fastdeploy/runtime/backends/ort/ort_backend.h
@@ -24,6 +24,10 @@
 #include "fastdeploy/runtime/backends/ort/option.h"
 #include "onnxruntime_cxx_api.h"  // NOLINT
 
+#ifdef WITH_DIRECTML
+#include "dml_provider_factory.h" // NOLINT
+#endif
+
 namespace fastdeploy {
 
 struct OrtValueInfo {
@@ -37,7 +41,7 @@ class OrtBackend : public BaseBackend {
   OrtBackend() {}
   virtual ~OrtBackend() = default;
 
-  void BuildOption(const OrtBackendOption& option);
+  bool BuildOption(const OrtBackendOption& option);
 
   bool Init(const RuntimeOption& option);
 
@@ -54,7 +58,7 @@ class OrtBackend : public BaseBackend {
   std::vector<TensorInfo> GetOutputInfos() override;
   static std::vector<OrtCustomOp*> custom_operators_;
   void InitCustomOperators();
-  
+
  private:
   bool InitFromPaddle(const std::string& model_buffer,
                       const std::string& params_buffer,
diff --git a/fastdeploy/runtime/enum_variables.cc b/fastdeploy/runtime/enum_variables.cc
index 22afeb9cd..c57636057 100644
--- a/fastdeploy/runtime/enum_variables.cc
+++ b/fastdeploy/runtime/enum_variables.cc
@@ -61,6 +61,9 @@ std::ostream& operator<<(std::ostream& out, const Device& d) {
     case Device::ASCEND:
       out << "Device::ASCEND";
       break;
+    case Device::DIRECTML:
+      out << "Device::DIRECTML";
+      break;
     default:
       out << "Device::UNKOWN";
   }
diff --git a/fastdeploy/runtime/enum_variables.h b/fastdeploy/runtime/enum_variables.h
index 7a96e60b4..c608504c5 100644
--- a/fastdeploy/runtime/enum_variables.h
+++ b/fastdeploy/runtime/enum_variables.h
@@ -29,7 +29,8 @@ namespace fastdeploy {
 /*! Inference backend supported in FastDeploy */
 enum Backend {
   UNKNOWN,  ///< Unknown inference backend
-  ORT,  ///< ONNX Runtime, support Paddle/ONNX format model, CPU / Nvidia GPU
+  ORT,  //< ONNX Runtime, support Paddle/ONNX format model,
+  //< CPU/ Nvidia GPU DirectML
   TRT,  ///< TensorRT, support Paddle/ONNX format model, Nvidia GPU only
   PDINFER,  ///< Paddle Inference, support Paddle format model, CPU / Nvidia GPU
   POROS,    ///< Poros, support TorchScript format model, CPU / Nvidia GPU
@@ -58,7 +59,8 @@ enum FASTDEPLOY_DECL Device {
   TIMVX,
   KUNLUNXIN,
   ASCEND,
-  SOPHGOTPUD
+  SOPHGOTPUD,
+  DIRECTML
 };
 
 /*! Deep learning model format */
@@ -93,13 +95,15 @@ static std::map<Device, std::vector<Backend>>
   {Device::TIMVX, {Backend::LITE}},
   {Device::KUNLUNXIN, {Backend::LITE}},
   {Device::ASCEND, {Backend::LITE}},
-  {Device::SOPHGOTPUD, {Backend::SOPHGOTPU}}
+  {Device::SOPHGOTPUD, {Backend::SOPHGOTPU}},
+  {Device::DIRECTML, {Backend::ORT}}
 };
 
 inline bool Supported(ModelFormat format, Backend backend) {
   auto iter = s_default_backends_by_format.find(format);
   if (iter == s_default_backends_by_format.end()) {
-    FDERROR << "Didn't find format is registered in s_default_backends_by_format." << std::endl;
+    FDERROR << "Didn't find format is registered in " <<
+            "s_default_backends_by_format." << std::endl;
     return false;
   }
   for (size_t i = 0; i < iter->second.size(); ++i) {
@@ -107,15 +111,17 @@ inline bool Supported(ModelFormat format, Backend backend) {
       return true;
     }
   }
-  std::string msg = Str(iter->second); 
-  FDERROR << backend << " only supports " << msg << ", but now it's " << format << "." << std::endl;
+  std::string msg = Str(iter->second);
+  FDERROR << backend << " only supports " << msg << ", but now it's "
+                      << format << "." << std::endl;
   return false;
 }
 
 inline bool Supported(Device device, Backend backend) {
   auto iter = s_default_backends_by_device.find(device);
   if (iter == s_default_backends_by_device.end()) {
-    FDERROR << "Didn't find device is registered in s_default_backends_by_device." << std::endl;
+    FDERROR << "Didn't find device is registered in " <<
+              "s_default_backends_by_device." << std::endl;
     return false;
   }
   for (size_t i = 0; i < iter->second.size(); ++i) {
@@ -123,8 +129,9 @@ inline bool Supported(Device device, Backend backend) {
       return true;
     }
   }
-  std::string msg = Str(iter->second); 
-  FDERROR << backend << " only supports " << msg << ", but now it's " << device << "." << std::endl;
+  std::string msg = Str(iter->second);
+  FDERROR << backend << " only supports " << msg << ", but now it's "
+          << device << "." << std::endl;
   return false;
 }
 
diff --git a/fastdeploy/runtime/runtime_option.cc b/fastdeploy/runtime/runtime_option.cc
index ff687e4f6..8d18637a8 100644
--- a/fastdeploy/runtime/runtime_option.cc
+++ b/fastdeploy/runtime/runtime_option.cc
@@ -93,6 +93,8 @@ void RuntimeOption::UseAscend() {
   paddle_lite_option.device = device;
 }
 
+void RuntimeOption::UseDirectML() { device = Device::DIRECTML; }
+
 void RuntimeOption::UseSophgo() {
   device = Device::SOPHGOTPUD;
   UseSophgoBackend();
diff --git a/fastdeploy/runtime/runtime_option.h b/fastdeploy/runtime/runtime_option.h
old mode 100755
new mode 100644
index 9bb3b4539..4b7af8a99
--- a/fastdeploy/runtime/runtime_option.h
+++ b/fastdeploy/runtime/runtime_option.h
@@ -78,6 +78,10 @@ struct FASTDEPLOY_DECL RuntimeOption {
   void UseTimVX();
   /// Use Huawei Ascend to inference
   void UseAscend();
+
+  /// Use onnxruntime DirectML to inference
+  void UseDirectML();
+
   /// Use Sophgo to inference
   void UseSophgo();
   /// \brief Turn on KunlunXin XPU.
diff --git a/fastdeploy/vision/classification/ppcls/model.cc b/fastdeploy/vision/classification/ppcls/model.cc
old mode 100644
new mode 100755
index d52eeace9..df4d89eb7
--- a/fastdeploy/vision/classification/ppcls/model.cc
+++ b/fastdeploy/vision/classification/ppcls/model.cc
@@ -34,12 +34,14 @@ PaddleClasModel::PaddleClasModel(const std::string& model_file,
     valid_ascend_backends = {Backend::LITE};
     valid_kunlunxin_backends = {Backend::LITE};
     valid_ipu_backends = {Backend::PDINFER};
+    valid_directml_backends = {Backend::ORT};
   } else if (model_format == ModelFormat::SOPHGO) {
     valid_sophgonpu_backends = {Backend::SOPHGOTPU};
   } else {
     valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};
     valid_gpu_backends = {Backend::ORT, Backend::TRT};
     valid_rknpu_backends = {Backend::RKNPU2};
+    valid_directml_backends = {Backend::ORT};
   }
 
   runtime_option = custom_option;
diff --git a/fastdeploy/vision/segmentation/ppseg/model.cc b/fastdeploy/vision/segmentation/ppseg/model.cc
index 54f978828..2f5d45c5f 100755
--- a/fastdeploy/vision/segmentation/ppseg/model.cc
+++ b/fastdeploy/vision/segmentation/ppseg/model.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "fastdeploy/vision/segmentation/ppseg/model.h"
+
 #include "fastdeploy/utils/unique_ptr.h"
 
 namespace fastdeploy {
@@ -20,22 +21,23 @@ namespace vision {
 namespace segmentation {
 
 PaddleSegModel::PaddleSegModel(const std::string& model_file,
-                     const std::string& params_file,
-                     const std::string& config_file,
-                     const RuntimeOption& custom_option,
-                     const ModelFormat& model_format) : preprocessor_(config_file),
-                                                        postprocessor_(config_file) {
-  if(model_format == ModelFormat::SOPHGO) {
+                               const std::string& params_file,
+                               const std::string& config_file,
+                               const RuntimeOption& custom_option,
+                               const ModelFormat& model_format)
+    : preprocessor_(config_file), postprocessor_(config_file) {
+  if (model_format == ModelFormat::SOPHGO) {
     valid_sophgonpu_backends = {Backend::SOPHGOTPU};
-  }
-  else{
-    valid_cpu_backends = {Backend::OPENVINO, Backend::PDINFER, Backend::ORT, Backend::LITE};
+  } else {
+    valid_cpu_backends = {Backend::OPENVINO, Backend::PDINFER, Backend::ORT,
+                          Backend::LITE};
     valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
   }
   valid_rknpu_backends = {Backend::RKNPU2};
   valid_timvx_backends = {Backend::LITE};
   valid_kunlunxin_backends = {Backend::LITE};
   valid_ascend_backends = {Backend::LITE};
+  valid_directml_backends = {Backend::ORT};
 
   runtime_option = custom_option;
   runtime_option.model_format = model_format;
@@ -44,8 +46,9 @@ PaddleSegModel::PaddleSegModel(const std::string& model_file,
   initialized = Initialize();
 }
 
-std::unique_ptr<PaddleSegModel>  PaddleSegModel::Clone() const {
-  std::unique_ptr<PaddleSegModel> clone_model = fastdeploy::utils::make_unique<PaddleSegModel>(PaddleSegModel(*this));
+std::unique_ptr<PaddleSegModel> PaddleSegModel::Clone() const {
+  std::unique_ptr<PaddleSegModel> clone_model =
+      fastdeploy::utils::make_unique<PaddleSegModel>(PaddleSegModel(*this));
   clone_model->SetRuntime(clone_model->CloneRuntime());
   return clone_model;
 }
@@ -59,7 +62,7 @@ bool PaddleSegModel::Initialize() {
 }
 
 bool PaddleSegModel::Predict(cv::Mat* im, SegmentationResult* result) {
-  return Predict(*im, result); 
+  return Predict(*im, result);
 }
 
 bool PaddleSegModel::Predict(const cv::Mat& im, SegmentationResult* result) {

From db9739a76e01001717d6ab2fe7ae13d2bc0c89bd Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Fri, 17 Feb 2023 11:03:12 +0800
Subject: [PATCH 38/41] [C API] Supplement C api for models (#1317)

* add batch_predict and initialized api for c

* add batch_predict and result str in C api

* modify visualization function

* add micro to add models batchify

* refactor and add models for ppdet others

* add base define

* fix

* refine predict api
---
 c_api/fastdeploy_capi/fd_type.h               |    5 +
 c_api/fastdeploy_capi/types_internal.cc       |  128 +-
 c_api/fastdeploy_capi/types_internal.h        |  279 ++++-
 .../vision/classification/ppcls/model.cc      |   71 +-
 .../vision/classification/ppcls/model.h       |   30 +-
 .../vision/detection/ppdet/base_define.h      |  119 ++
 .../vision/detection/ppdet/model.cc           |  252 +++-
 .../vision/detection/ppdet/model.h            | 1053 ++++++++++++++++-
 c_api/fastdeploy_capi/vision/result.cc        |   23 +-
 c_api/fastdeploy_capi/vision/result.h         |   30 +
 c_api/fastdeploy_capi/vision/visualize.cc     |   24 +-
 c_api/fastdeploy_capi/vision/visualize.h      |   26 +-
 12 files changed, 1942 insertions(+), 98 deletions(-)
 create mode 100644 c_api/fastdeploy_capi/vision/detection/ppdet/base_define.h

diff --git a/c_api/fastdeploy_capi/fd_type.h b/c_api/fastdeploy_capi/fd_type.h
index 4a3d8cadd..a7860e982 100644
--- a/c_api/fastdeploy_capi/fd_type.h
+++ b/c_api/fastdeploy_capi/fd_type.h
@@ -67,6 +67,11 @@ typedef struct FD_C_TwoDimArrayFloat {
 
 typedef void* FD_C_Mat;
 
+typedef struct FD_C_OneDimMat {
+  size_t size;
+  FD_C_Mat* data;
+} FD_C_OneDimMat;
+
 #ifdef __cplusplus
 extern "C" {
 #endif
diff --git a/c_api/fastdeploy_capi/types_internal.cc b/c_api/fastdeploy_capi/types_internal.cc
index 807f5dd21..988b39af1 100644
--- a/c_api/fastdeploy_capi/types_internal.cc
+++ b/c_api/fastdeploy_capi/types_internal.cc
@@ -18,38 +18,108 @@ namespace fastdeploy {
 
 #ifdef ENABLE_VISION
 
-std::unique_ptr<fastdeploy::vision::classification::PaddleClasModel>&
-FD_C_CheckAndConvertPaddleClasModelWrapper(
-    FD_C_PaddleClasModelWrapper* fd_c_paddleclas_model_wrapper) {
-  FDASSERT(
-      fd_c_paddleclas_model_wrapper != nullptr,
-      "The pointer of fd_c_paddleclas_model_wrapper shouldn't be nullptr.");
-  return fd_c_paddleclas_model_wrapper->paddleclas_model;
-}
+// results:
 
-std::unique_ptr<fastdeploy::vision::detection::PPYOLOE>&
-FD_C_CheckAndConvertPPYOLOEWrapper(FD_C_PPYOLOEWrapper* fd_c_ppyoloe_wrapper) {
-  FDASSERT(fd_c_ppyoloe_wrapper != nullptr,
-           "The pointer of fd_c_ppyoloe_wrapper shouldn't be nullptr.");
-  return fd_c_ppyoloe_wrapper->ppyoloe_model;
-}
+// ClassifyResult
+DECL_AND_IMPLEMENT_RESULT_FUNC_FOR_GET_PTR_FROM_WRAPPER(
+    ClassifyResult, fd_classify_result_wrapper, classify_result)
+// DetectionResult
+DECL_AND_IMPLEMENT_RESULT_FUNC_FOR_GET_PTR_FROM_WRAPPER(
+    DetectionResult, fd_detection_result_wrapper, detection_result)
 
-std::unique_ptr<fastdeploy::vision::ClassifyResult>&
-FD_C_CheckAndConvertClassifyResultWrapper(
-    FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper) {
-  FDASSERT(fd_c_classify_result_wrapper != nullptr,
-           "The pointer of fd_c_classify_result_wrapper shouldn't be nullptr.");
-  return fd_c_classify_result_wrapper->classify_result;
-}
+// Models:
+
+// Classification
+
+// PaddleClasModel
+DECL_AND_IMPLEMENT_CLASSIFICATION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
+    PaddleClasModel, fd_paddleclas_model_wrapper, paddleclas_model)
+
+// detection models:
+
+// PPYOLOE
+
+DECL_AND_IMPLEMENT_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
+    PPYOLOE, fd_ppyoloe_wrapper, ppyoloe_model)
+
+// PicoDet
+DECL_AND_IMPLEMENT_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
+    PicoDet, fd_picodet_wrapper, picodet_model)
+
+// PPYOLO
+DECL_AND_IMPLEMENT_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
+    PPYOLO, fd_ppyolo_wrapper, ppyolo_model)
+
+// YOLOv3
+DECL_AND_IMPLEMENT_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
+    YOLOv3, fd_yolov3_wrapper, yolov3_model)
+
+// PaddleYOLOX
+DECL_AND_IMPLEMENT_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
+    PaddleYOLOX, fd_paddleyolox_wrapper, paddleyolox_model)
+
+// FasterRCNN
+DECL_AND_IMPLEMENT_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
+    FasterRCNN, fd_fasterrcnn_wrapper, fasterrcnn_model)
+
+// MaskRCNN
+DECL_AND_IMPLEMENT_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
+    MaskRCNN, fd_maskrcnn_wrapper, maskrcnn_model)
+
+// SSD
+DECL_AND_IMPLEMENT_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(SSD,
+                                                                 fd_ssd_wrapper,
+                                                                 ssd_model)
+
+// PaddleYOLOv5
+DECL_AND_IMPLEMENT_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
+    PaddleYOLOv5, fd_paddleyolov5_wrapper, paddleyolov5_model)
+
+// PaddleYOLOv6
+DECL_AND_IMPLEMENT_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
+    PaddleYOLOv6, fd_paddleyolov6_wrapper, paddleyolov6_model)
+
+// PaddleYOLOv7
+DECL_AND_IMPLEMENT_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
+    PaddleYOLOv7, fd_paddleyolov7_wrapper, paddleyolov7_model)
+
+// PaddleYOLOv8
+DECL_AND_IMPLEMENT_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
+    PaddleYOLOv8, fd_paddleyolov8_wrapper, paddleyolov8_model)
+
+// RTMDet
+DECL_AND_IMPLEMENT_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
+    RTMDet, fd_rtmdet_wrapper, rtmdet_model)
+
+// CascadeRCNN
+DECL_AND_IMPLEMENT_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
+    CascadeRCNN, fd_cascadercnn_wrapper, cascadercnn_model)
+
+// PSSDet
+DECL_AND_IMPLEMENT_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
+    PSSDet, fd_pssdet_wrapper, pssdet_model)
+
+// RetinaNet
+DECL_AND_IMPLEMENT_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
+    RetinaNet, fd_retinanet_wrapper, retinanet_model)
+
+// FCOS
+DECL_AND_IMPLEMENT_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
+    FCOS, fd_fcos_wrapper, fcos_model)
+
+// TTFNet
+DECL_AND_IMPLEMENT_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
+    TTFNet, fd_ttfnet_wrapper, ttfnet_model)
+
+// TOOD
+DECL_AND_IMPLEMENT_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
+    TOOD, fd_tood_wrapper, tood_model)
+
+// GFL
+DECL_AND_IMPLEMENT_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(GFL,
+                                                                 fd_gfl_wrapper,
+                                                                 gfl_model)
 
-std::unique_ptr<fastdeploy::vision::DetectionResult>&
-FD_C_CheckAndConvertDetectionResultWrapper(
-    FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper) {
-  FDASSERT(
-      fd_c_detection_result_wrapper != nullptr,
-      "The pointer of fd_c_detection_result_wrapper shouldn't be nullptr.");
-  return fd_c_detection_result_wrapper->detection_result;
-}
 #endif
 
 std::unique_ptr<fastdeploy::RuntimeOption>&
diff --git a/c_api/fastdeploy_capi/types_internal.h b/c_api/fastdeploy_capi/types_internal.h
index f8a2cfbe9..c3238de51 100644
--- a/c_api/fastdeploy_capi/types_internal.h
+++ b/c_api/fastdeploy_capi/types_internal.h
@@ -23,39 +23,246 @@
 #include "fastdeploy/vision/common/result.h"
 #include "fastdeploy/vision/detection/ppdet/model.h"
 
-typedef struct FD_C_ClassifyResultWrapper {
-  std::unique_ptr<fastdeploy::vision::ClassifyResult> classify_result;
-} FD_C_ClassifyResultWrapper;
+#define DEFINE_RESULT_WRAPPER_STRUCT(typename, varname) typedef struct FD_C_##typename##Wrapper { \
+  std::unique_ptr<fastdeploy::vision::typename> varname; \
+} FD_C_##typename##Wrapper
 
-typedef struct FD_C_DetectionResultWrapper {
-  std::unique_ptr<fastdeploy::vision::DetectionResult> detection_result;
-} FD_C_DetectionResultWrapper;
+#define DEFINE_CLASSIFICATION_MODEL_WRAPPER_STRUCT(typename, varname)  typedef struct FD_C_##typename##Wrapper { \
+  std::unique_ptr<fastdeploy::vision::classification::typename> \
+      varname; \
+} FD_C_##typename##Wrapper
 
-typedef struct FD_C_PaddleClasModelWrapper {
-  std::unique_ptr<fastdeploy::vision::classification::PaddleClasModel>
-      paddleclas_model;
-} FD_C_PaddleClasModelWrapper;
+#define DEFINE_DETECTION_MODEL_WRAPPER_STRUCT(typename, varname)  typedef struct FD_C_##typename##Wrapper { \
+  std::unique_ptr<fastdeploy::vision::detection::typename> varname; \
+} FD_C_##typename##Wrapper
 
-typedef struct FD_C_PPYOLOEWrapper {
-  std::unique_ptr<fastdeploy::vision::detection::PPYOLOE> ppyoloe_model;
-} FD_C_PPYOLOEWrapper;
+// -------------  belows are wrapper struct define --------------------- //
+
+// Results:
+
+// ClassifyResult
+DEFINE_RESULT_WRAPPER_STRUCT(ClassifyResult, classify_result);
+
+// DetectionResult
+DEFINE_RESULT_WRAPPER_STRUCT(DetectionResult, detection_result);
+
+
+// Models:
+
+// Classification
+
+// PaddleClasModel
+
+DEFINE_CLASSIFICATION_MODEL_WRAPPER_STRUCT(PaddleClasModel, paddleclas_model);
+
+// Detection
+
+// PPYOLOE
+DEFINE_DETECTION_MODEL_WRAPPER_STRUCT(PPYOLOE, ppyoloe_model);
+
+
+// PicoDet
+DEFINE_DETECTION_MODEL_WRAPPER_STRUCT(PicoDet, picodet_model);
+
+// PPYOLO
+DEFINE_DETECTION_MODEL_WRAPPER_STRUCT(PPYOLO, ppyolo_model);
+
+// YOLOv3
+DEFINE_DETECTION_MODEL_WRAPPER_STRUCT(YOLOv3, yolov3_model);
+
+// PaddleYOLOX
+DEFINE_DETECTION_MODEL_WRAPPER_STRUCT(PaddleYOLOX, paddleyolox_model);
+
+// FasterRCNN
+DEFINE_DETECTION_MODEL_WRAPPER_STRUCT(FasterRCNN, fasterrcnn_model);
+
+// MaskRCNN
+DEFINE_DETECTION_MODEL_WRAPPER_STRUCT(MaskRCNN, maskrcnn_model);
+
+// SSD
+DEFINE_DETECTION_MODEL_WRAPPER_STRUCT(SSD, ssd_model);
+
+// PaddleYOLOv5
+DEFINE_DETECTION_MODEL_WRAPPER_STRUCT(PaddleYOLOv5, paddleyolov5_model);
+
+// PaddleYOLOv6
+DEFINE_DETECTION_MODEL_WRAPPER_STRUCT(PaddleYOLOv6, paddleyolov6_model);
+
+// PaddleYOLOv7
+DEFINE_DETECTION_MODEL_WRAPPER_STRUCT(PaddleYOLOv7, paddleyolov7_model);
+
+// PaddleYOLOv8
+DEFINE_DETECTION_MODEL_WRAPPER_STRUCT(PaddleYOLOv8, paddleyolov8_model);
+
+// RTMDet
+DEFINE_DETECTION_MODEL_WRAPPER_STRUCT(RTMDet, rtmdet_model);
+
+// CascadeRCNN
+DEFINE_DETECTION_MODEL_WRAPPER_STRUCT(CascadeRCNN, cascadercnn_model);
+
+// PSSDet
+DEFINE_DETECTION_MODEL_WRAPPER_STRUCT(PSSDet, pssdet_model);
+
+// RetinaNet
+DEFINE_DETECTION_MODEL_WRAPPER_STRUCT(RetinaNet, retinanet_model);
+
+
+// FCOS
+DEFINE_DETECTION_MODEL_WRAPPER_STRUCT(FCOS, fcos_model);
+
+// TTFNet
+DEFINE_DETECTION_MODEL_WRAPPER_STRUCT(TTFNet, ttfnet_model);
+
+// TOOD
+DEFINE_DETECTION_MODEL_WRAPPER_STRUCT(TOOD, tood_model);
+
+// GFL
+DEFINE_DETECTION_MODEL_WRAPPER_STRUCT(GFL, gfl_model);
+
+
+// -------------  belows are function declaration for get ptr from wrapper --------------------- //
+
+#define DECLARE_RESULT_FUNC_FOR_GET_PTR_FROM_WRAPPER(typename, varname) std::unique_ptr<fastdeploy::vision::typename>& \
+FD_C_CheckAndConvert##typename##Wrapper( \
+    FD_C_##typename##Wrapper* varname)
+
+#define DECLARE_CLASSIFICATION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(typename, varname) std::unique_ptr<fastdeploy::vision::classification::typename>& \
+FD_C_CheckAndConvert##typename##Wrapper( \
+    FD_C_##typename##Wrapper* varname)
+
+#define DECLARE_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(typename, varname) std::unique_ptr<fastdeploy::vision::detection::typename>& \
+FD_C_CheckAndConvert##typename##Wrapper( \
+    FD_C_##typename##Wrapper* varname)
 
 namespace fastdeploy {
-std::unique_ptr<fastdeploy::vision::ClassifyResult>&
-FD_C_CheckAndConvertClassifyResultWrapper(
-    FD_C_ClassifyResultWrapper* fd_classify_result_wrapper);
-std::unique_ptr<fastdeploy::vision::DetectionResult>&
-FD_C_CheckAndConvertDetectionResultWrapper(
-    FD_C_DetectionResultWrapper* fd_detection_result_wrapper);
-std::unique_ptr<fastdeploy::vision::classification::PaddleClasModel>&
-FD_C_CheckAndConvertPaddleClasModelWrapper(
-    FD_C_PaddleClasModelWrapper* fd_paddleclas_model_wrapper);
-std::unique_ptr<fastdeploy::vision::detection::PPYOLOE>&
-FD_C_CheckAndConvertPPYOLOEWrapper(FD_C_PPYOLOEWrapper* fd_ppyoloe_wrapper);
+
+// results:
+
+// ClassifyResult
+DECLARE_RESULT_FUNC_FOR_GET_PTR_FROM_WRAPPER(ClassifyResult,
+                                             fd_classify_result_wrapper);
+// DetectionResult
+DECLARE_RESULT_FUNC_FOR_GET_PTR_FROM_WRAPPER(DetectionResult,
+                                             fd_detection_result_wrapper);
+
+// Models:
+
+// Classification
+
+// PaddleClasModel
+
+DECLARE_CLASSIFICATION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(
+    PaddleClasModel, fd_paddleclas_model_wrapper);
+
+
+// detection models:
+
+// PPYOLOE
+
+DECLARE_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PPYOLOE,
+                                                      fd_ppyoloe_wrapper);
+
+// PicoDet
+
+DECLARE_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PicoDet,
+                                                      fd_picodet_wrapper);
+
+// PPYOLO
+
+DECLARE_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PPYOLO,
+                                                      fd_ppyolo_wrapper);
+
+// YOLOv3
+
+DECLARE_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(YOLOv3,
+                                                      fd_yolov3_wrapper);
+
+// PaddleYOLOX
+
+DECLARE_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PaddleYOLOX,
+                                                      fd_paddleyolox_wrapper);
+
+// FasterRCNN
+
+DECLARE_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(FasterRCNN,
+                                                      fd_fasterrcnn_wrapper);
+
+// MaskRCNN
+
+DECLARE_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(MaskRCNN,
+                                                      fd_maskrcnn_wrapper);
+
+// SSD
+
+DECLARE_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(SSD,
+                                                      fd_ssd_wrapper);
+
+// PaddleYOLOv5
+
+DECLARE_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PaddleYOLOv5,
+                                                      fd_paddleyolov5_wrapper);
+
+// PaddleYOLOv6
+
+DECLARE_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PaddleYOLOv6,
+                                                      fd_paddleyolov6_wrapper);
+
+// PaddleYOLOv7
+
+DECLARE_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PaddleYOLOv7,
+                                                      fd_paddleyolov7_wrapper);
+
+// PaddleYOLOv8
+
+DECLARE_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PaddleYOLOv8,
+                                                      fd_paddleyolov8_wrapper);
+
+// RTMDet
+
+DECLARE_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(RTMDet,
+                                                      fd_rtmdet_wrapper);
+
+// CascadeRCNN
+
+DECLARE_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(CascadeRCNN,
+                                                      fd_cascadercnn_wrapper);
+
+// PSSDet
+
+DECLARE_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(PSSDet,
+                                                      fd_pssdet_wrapper);
+
+// RetinaNet
+
+DECLARE_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(RetinaNet,
+                                                      fd_retinanet_wrapper);
+
+// FCOS
+
+DECLARE_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(FCOS,
+                                                      fd_fcos_wrapper);
+
+// TTFNet
+
+DECLARE_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(TTFNet,
+                                                      fd_ttfnet_wrapper);
+
+// TOOD
+
+DECLARE_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(TOOD,
+                                                      fd_tood_wrapper);
+
+// GFL
+
+DECLARE_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(GFL,
+                                                      fd_gfl_wrapper);
+
 }  // namespace fastdeploy
 
 #endif
 
+
+
 typedef struct FD_C_RuntimeOptionWrapper {
   std::unique_ptr<fastdeploy::RuntimeOption> runtime_option;
 } FD_C_RuntimeOptionWrapper;
@@ -68,3 +275,27 @@ FD_C_CheckAndConvertRuntimeOptionWrapper(
 
 #define CHECK_AND_CONVERT_FD_TYPE(TYPENAME, variable_name)                     \
   fastdeploy::FD_C_CheckAndConvert##TYPENAME(variable_name)
+
+#define DECL_AND_IMPLEMENT_RESULT_FUNC_FOR_GET_PTR_FROM_WRAPPER(typename, var_wrapper_name, var_ptr_name) std::unique_ptr<fastdeploy::vision::typename>& \
+FD_C_CheckAndConvert##typename##Wrapper( \
+    FD_C_##typename##Wrapper* var_wrapper_name) { \
+  FDASSERT(var_wrapper_name != nullptr, \
+           "The pointer of " #var_wrapper_name " shouldn't be nullptr."); \
+  return var_wrapper_name->var_ptr_name; \
+}
+
+#define DECL_AND_IMPLEMENT_CLASSIFICATION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(typename, var_wrapper_name, var_ptr_name) std::unique_ptr<fastdeploy::vision::classification::typename>& \
+FD_C_CheckAndConvert##typename##Wrapper( \
+    FD_C_##typename##Wrapper* var_wrapper_name) { \
+  FDASSERT(var_wrapper_name != nullptr, \
+           "The pointer of " #var_wrapper_name " shouldn't be nullptr."); \
+  return var_wrapper_name->var_ptr_name; \
+}
+
+#define DECL_AND_IMPLEMENT_DETECTION_MODEL_FUNC_FOR_GET_PTR_FROM_WRAPPER(typename, var_wrapper_name, var_ptr_name) std::unique_ptr<fastdeploy::vision::detection::typename>& \
+FD_C_CheckAndConvert##typename##Wrapper( \
+    FD_C_##typename##Wrapper* var_wrapper_name) { \
+  FDASSERT(var_wrapper_name != nullptr, \
+           "The pointer of " #var_wrapper_name " shouldn't be nullptr."); \
+  return var_wrapper_name->var_ptr_name; \
+}
diff --git a/c_api/fastdeploy_capi/vision/classification/ppcls/model.cc b/c_api/fastdeploy_capi/vision/classification/ppcls/model.cc
index ad0028612..f88f59471 100644
--- a/c_api/fastdeploy_capi/vision/classification/ppcls/model.cc
+++ b/c_api/fastdeploy_capi/vision/classification/ppcls/model.cc
@@ -38,21 +38,82 @@ FD_C_PaddleClasModelWrapper* FD_C_CreatePaddleClasModelWrapper(
 }
 
 void FD_C_DestroyPaddleClasModelWrapper(
-    __fd_take FD_C_PaddleClasModelWrapper* fd_c_paddleclas_model_wrapper) {
+    FD_C_PaddleClasModelWrapper* fd_c_paddleclas_model_wrapper) {
   delete fd_c_paddleclas_model_wrapper;
 }
 
 FD_C_Bool FD_C_PaddleClasModelWrapperPredict(
-    __fd_take FD_C_PaddleClasModelWrapper* fd_c_paddleclas_model_wrapper,
-    FD_C_Mat img, FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper) {
+    FD_C_PaddleClasModelWrapper* fd_c_paddleclas_model_wrapper, FD_C_Mat img,
+    FD_C_ClassifyResult* fd_c_classify_result) {
   cv::Mat* im = reinterpret_cast<cv::Mat*>(img);
   auto& paddleclas_model = CHECK_AND_CONVERT_FD_TYPE(
       PaddleClasModelWrapper, fd_c_paddleclas_model_wrapper);
+  FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper =
+      FD_C_CreateClassifyResultWrapper();
   auto& classify_result = CHECK_AND_CONVERT_FD_TYPE(
       ClassifyResultWrapper, fd_c_classify_result_wrapper);
-  return paddleclas_model->Predict(im, classify_result.get());
+
+  bool successful = paddleclas_model->Predict(im, classify_result.get());
+  if (successful) {
+    FD_C_ClassifyResult* res =
+        FD_C_ClassifyResultWrapperGetData(fd_c_classify_result_wrapper);
+    *fd_c_classify_result = *res;
+  }
+  return successful;
+}
+
+FD_C_Bool FD_C_PaddleClasModelWrapperInitialized(
+    FD_C_PaddleClasModelWrapper* fd_c_paddleclas_model_wrapper) {
+  auto& paddleclas_model = CHECK_AND_CONVERT_FD_TYPE(
+      PaddleClasModelWrapper, fd_c_paddleclas_model_wrapper);
+  return paddleclas_model->Initialized();
+}
+
+FD_C_ClassifyResult* FD_C_ClassifyResultToC(
+    fastdeploy::vision::ClassifyResult* classify_result) {
+  // Internal use, transfer fastdeploy::vision::ClassifyResult to
+  // FD_C_ClassifyResult
+  FD_C_ClassifyResult* fd_c_classify_result_data = new FD_C_ClassifyResult();
+  // copy label_ids
+  fd_c_classify_result_data->label_ids.size = classify_result->label_ids.size();
+  fd_c_classify_result_data->label_ids.data =
+      new int32_t[fd_c_classify_result_data->label_ids.size];
+  memcpy(fd_c_classify_result_data->label_ids.data,
+         classify_result->label_ids.data(),
+         sizeof(int32_t) * fd_c_classify_result_data->label_ids.size);
+  // copy scores
+  fd_c_classify_result_data->scores.size = classify_result->scores.size();
+  fd_c_classify_result_data->scores.data =
+      new float[fd_c_classify_result_data->scores.size];
+  memcpy(fd_c_classify_result_data->scores.data, classify_result->scores.data(),
+         sizeof(float) * fd_c_classify_result_data->scores.size);
+  fd_c_classify_result_data->type =
+      static_cast<FD_C_ResultType>(classify_result->type);
+  return fd_c_classify_result_data;
+}
+
+FD_C_Bool FD_C_PaddleClasModelWrapperBatchPredict(
+    FD_C_PaddleClasModelWrapper* fd_c_paddleclas_model_wrapper,
+    FD_C_OneDimMat imgs, FD_C_OneDimClassifyResult* results) {
+  std::vector<cv::Mat> imgs_vec;
+  std::vector<fastdeploy::vision::ClassifyResult> results_out;
+  for (int i = 0; i < imgs.size; i++) {
+    imgs_vec.push_back(*(reinterpret_cast<cv::Mat*>(imgs.data[i])));
+  }
+  auto& paddleclas_model = CHECK_AND_CONVERT_FD_TYPE(
+      PaddleClasModelWrapper, fd_c_paddleclas_model_wrapper);
+  bool successful = paddleclas_model->BatchPredict(imgs_vec, &results_out);
+  if (successful) {
+    // copy results back to FD_C_OneDimClassifyResult
+    results->size = results_out.size();
+    results->data = new FD_C_ClassifyResult[results->size];
+    for (int i = 0; i < results_out.size(); i++) {
+      results->data[i] = *FD_C_ClassifyResultToC(&results_out[i]);
+    }
+  }
+  return successful;
 }
 
 #ifdef __cplusplus
 }
-#endif
\ No newline at end of file
+#endif
diff --git a/c_api/fastdeploy_capi/vision/classification/ppcls/model.h b/c_api/fastdeploy_capi/vision/classification/ppcls/model.h
index db117e605..c523e76ac 100644
--- a/c_api/fastdeploy_capi/vision/classification/ppcls/model.h
+++ b/c_api/fastdeploy_capi/vision/classification/ppcls/model.h
@@ -54,12 +54,36 @@ FASTDEPLOY_CAPI_EXPORT extern void FD_C_DestroyPaddleClasModelWrapper(
  *
  * \param[in] fd_c_paddleclas_model_wrapper pointer to FD_C_PaddleClasModelWrapper object
  * \param[in] img pointer to cv::Mat image
- * \param[in] fd_c_classify_result_wrapper pointer to FD_C_PaddleClasModelWrapper object, which stores the result.
+ * \param[in] fd_c_classify_result pointer to FD_C_ClassifyResult object, which stores the result.
  */
 
 FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_PaddleClasModelWrapperPredict(
-    __fd_take FD_C_PaddleClasModelWrapper* fd_c_paddleclas_model_wrapper,
-    FD_C_Mat img, FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper);
+    __fd_keep FD_C_PaddleClasModelWrapper* fd_c_paddleclas_model_wrapper,
+    FD_C_Mat img, FD_C_ClassifyResult* fd_c_classify_result_wrapper);
+
+/** \brief Check if the model is initialized successfully
+ *
+ * \param[in] fd_c_paddleclas_model_wrapper pointer to FD_C_PaddleClasModelWrapper object
+ *
+ * \return Return a bool of value true if initialized successfully
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_PaddleClasModelWrapperInitialized(
+    __fd_keep FD_C_PaddleClasModelWrapper* fd_c_paddleclas_model_wrapper);
+
+
+/** \brief Predict the classification results for a batch of input images
+   *
+   * \param[in] fd_c_paddleclas_model_wrapper pointer to FD_C_PaddleClasModelWrapper object
+   * \param[in] imgs The input image list, each element comes from cv::imread()
+   * \param[in] results The output classification result list
+   * \return true if the prediction successed, otherwise false
+   */
+FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_PaddleClasModelWrapperBatchPredict(
+            __fd_keep FD_C_PaddleClasModelWrapper* fd_c_paddleclas_model_wrapper,
+            FD_C_OneDimMat imgs,
+            FD_C_OneDimClassifyResult* results);
+
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/c_api/fastdeploy_capi/vision/detection/ppdet/base_define.h b/c_api/fastdeploy_capi/vision/detection/ppdet/base_define.h
new file mode 100644
index 000000000..13a647f3d
--- /dev/null
+++ b/c_api/fastdeploy_capi/vision/detection/ppdet/base_define.h
@@ -0,0 +1,119 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#define DECLARE_CREATE_WRAPPER_FUNCTION(model_type) FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_##model_type##Wrapper* \
+FD_C_Creates##model_type##Wrapper( \
+    const char* model_file, const char* params_file, const char* config_file, \
+    FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, \
+    const FD_C_ModelFormat model_format)
+
+
+#define DECLARE_DESTROY_WRAPPER_FUNCTION(model_type, wrapper_var_name) FASTDEPLOY_CAPI_EXPORT extern void \
+FD_C_Destroy##model_type##Wrapper(__fd_take FD_C_##model_type##Wrapper* wrapper_var_name);
+
+#define DECLARE_PREDICT_FUNCTION(model_type, wrapper_var_name) FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_##model_type##WrapperPredict( \
+    __fd_take FD_C_##model_type##Wrapper* wrapper_var_name, FD_C_Mat img, \
+    FD_C_DetectionResult* fd_c_detection_result)
+
+#define DECLARE_INITIALIZED_FUNCTION(model_type, wrapper_var_name)  FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_##model_type##WrapperInitialized( \
+    __fd_keep FD_C_##model_type##Wrapper* wrapper_var_name)
+
+
+#define DECLARE_BATCH_PREDICT_FUNCTION(model_type, wrapper_var_name) FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_##model_type##WrapperBatchPredict( \
+                            __fd_keep FD_C_##model_type##Wrapper* wrapper_var_name, \
+                            FD_C_OneDimMat imgs, \
+                            FD_C_OneDimDetectionResult* results)
+
+#define IMPLEMENT_CREATE_WRAPPER_FUNCTION(model_type, var_name)  \
+  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper, \
+                                                   fd_c_runtime_option_wrapper); \
+  FD_C_##model_type##Wrapper* fd_c_##model_type##_wrapper = new FD_C_##model_type##Wrapper(); \
+  fd_c_##model_type##_wrapper->var_name = \
+      std::unique_ptr<fastdeploy::vision::detection::model_type>( \
+          new fastdeploy::vision::detection::model_type( \
+              std::string(model_file), std::string(params_file), \
+              std::string(config_file), *runtime_option, \
+              static_cast<fastdeploy::ModelFormat>(model_format))); \
+  return fd_c_##model_type##_wrapper
+
+#define IMPLEMENT_DESTROY_WRAPPER_FUNCTION(model_type, wrapper_var_name) delete wrapper_var_name
+
+#define IMPLEMENT_PREDICT_FUNCTION(model_type, wrapper_var_name)               \
+  cv::Mat* im = reinterpret_cast<cv::Mat*>(img);                               \
+  auto& model =                                                                \
+      CHECK_AND_CONVERT_FD_TYPE(model_type##Wrapper, wrapper_var_name);        \
+  FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper =                 \
+      FD_C_CreateDetectionResultWrapper();                                     \
+  auto& detection_result = CHECK_AND_CONVERT_FD_TYPE(                          \
+      DetectionResultWrapper, fd_c_detection_result_wrapper);                  \
+  bool successful = model->Predict(im, detection_result.get());                \
+  if (successful) {                                                            \
+    FD_C_DetectionResult* res =                                                \
+        FD_C_DetectionResultWrapperGetData(fd_c_detection_result_wrapper);     \
+    *fd_c_detection_result = *res;                                             \
+  }                                                                            \
+  return successful
+
+#define IMPLEMENT_INITIALIZED_FUNCTION(model_type, wrapper_var_name)   auto& model = \
+      CHECK_AND_CONVERT_FD_TYPE(model_type##Wrapper, wrapper_var_name); \
+return model->Initialized();
+
+#define IMPLEMENT_BATCH_PREDICT_FUNCTION(model_type, wrapper_var_name) std::vector<cv::Mat> imgs_vec; \
+  std::vector<fastdeploy::vision::DetectionResult> results_out; \
+  for (int i = 0; i < imgs.size; i++) { \
+    imgs_vec.push_back(*(reinterpret_cast<cv::Mat*>(imgs.data[i]))); \
+  } \
+  auto& model = \
+      CHECK_AND_CONVERT_FD_TYPE(model_type##Wrapper, wrapper_var_name); \
+  bool successful = model->BatchPredict(imgs_vec, &results_out); \
+  if (successful) { \
+    results->size = results_out.size(); \
+    results->data = new FD_C_DetectionResult[results->size]; \
+    for (int i = 0; i < results_out.size(); i++) { \
+      results->data[i] = *FD_C_DetectionResultToC(&results_out[i]); \
+    } \
+  } \
+  return successful;
+
+#define DECLARE_AND_IMPLEMENT_CREATE_WRAPPER_FUNCTION(model_type, var_name) FD_C_##model_type##Wrapper* FD_C_Creates##model_type##Wrapper(\
+    const char* model_file, const char* params_file, const char* config_file, \
+    FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper, \
+    const FD_C_ModelFormat model_format) { \
+  IMPLEMENT_CREATE_WRAPPER_FUNCTION(model_type, var_name); \
+}
+
+#define DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION(model_type, wrapper_var_name) void FD_C_Destroy##model_type##Wrapper( \
+    __fd_take FD_C_##model_type##Wrapper* wrapper_var_name) { \
+  IMPLEMENT_DESTROY_WRAPPER_FUNCTION(model_type, wrapper_var_name); \
+}
+
+
+#define DECLARE_AND_IMPLEMENT_PREDICT_FUNCTION(model_type, wrapper_var_name) FD_C_Bool FD_C_##model_type##WrapperPredict( \
+    FD_C_##model_type##Wrapper* wrapper_var_name, FD_C_Mat img, \
+    FD_C_DetectionResult* fd_c_detection_result) { \
+  IMPLEMENT_PREDICT_FUNCTION(model_type, wrapper_var_name); \
+}
+
+#define DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(model_type, wrapper_var_name)  FD_C_Bool FD_C_##model_type##WrapperInitialized( \
+    FD_C_##model_type##Wrapper* wrapper_var_name) { \
+  IMPLEMENT_INITIALIZED_FUNCTION(model_type, wrapper_var_name); \
+}
+
+#define DECLARE_AND_IMPLEMENT_BATCH_PREDICT_FUNCTION(model_type, wrapper_var_name) FD_C_Bool FD_C_##model_type##WrapperBatchPredict( \
+    FD_C_##model_type##Wrapper* wrapper_var_name, FD_C_OneDimMat imgs, \
+    FD_C_OneDimDetectionResult* results) { \
+      IMPLEMENT_BATCH_PREDICT_FUNCTION(model_type, wrapper_var_name); \
+}
diff --git a/c_api/fastdeploy_capi/vision/detection/ppdet/model.cc b/c_api/fastdeploy_capi/vision/detection/ppdet/model.cc
index e7055185f..7bfbd67f0 100644
--- a/c_api/fastdeploy_capi/vision/detection/ppdet/model.cc
+++ b/c_api/fastdeploy_capi/vision/detection/ppdet/model.cc
@@ -21,38 +21,248 @@
 extern "C" {
 #endif
 
+// PPYOLOE
+
 FD_C_PPYOLOEWrapper* FD_C_CreatesPPYOLOEWrapper(
     const char* model_file, const char* params_file, const char* config_file,
     FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
     const FD_C_ModelFormat model_format) {
-  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
-                                                   fd_c_runtime_option_wrapper);
-  FD_C_PPYOLOEWrapper* fd_c_ppyoloe_wrapper = new FD_C_PPYOLOEWrapper();
-  fd_c_ppyoloe_wrapper->ppyoloe_model =
-      std::unique_ptr<fastdeploy::vision::detection::PPYOLOE>(
-          new fastdeploy::vision::detection::PPYOLOE(
-              std::string(model_file), std::string(params_file),
-              std::string(config_file), *runtime_option,
-              static_cast<fastdeploy::ModelFormat>(model_format)));
-  return fd_c_ppyoloe_wrapper;
+  IMPLEMENT_CREATE_WRAPPER_FUNCTION(PPYOLOE, ppyoloe_model);
 }
 
 void FD_C_DestroyPPYOLOEWrapper(
-    __fd_take FD_C_PPYOLOEWrapper* fd_c_ppyoloe_wrapper) {
-  delete fd_c_ppyoloe_wrapper;
+    __fd_take FD_C_PPYOLOEWrapper* fd_ppyoloe_wrapper) {
+  IMPLEMENT_DESTROY_WRAPPER_FUNCTION(PPYOLOE, fd_ppyoloe_wrapper);
 }
 
 FD_C_Bool FD_C_PPYOLOEWrapperPredict(
-    FD_C_PPYOLOEWrapper* fd_c_ppyoloe_wrapper, FD_C_Mat img,
-    FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper) {
-  cv::Mat* im = reinterpret_cast<cv::Mat*>(img);
-  auto& ppyoloe_model =
-      CHECK_AND_CONVERT_FD_TYPE(PPYOLOEWrapper, fd_c_ppyoloe_wrapper);
-  auto& detection_result = CHECK_AND_CONVERT_FD_TYPE(
-      DetectionResultWrapper, fd_c_detection_result_wrapper);
-  return ppyoloe_model->Predict(im, detection_result.get());
+    FD_C_PPYOLOEWrapper* fd_ppyoloe_wrapper, FD_C_Mat img,
+    FD_C_DetectionResult* fd_c_detection_result) {
+  IMPLEMENT_PREDICT_FUNCTION(PPYOLOE, fd_ppyoloe_wrapper);
 }
 
+FD_C_Bool FD_C_PPYOLOEWrapperInitialized(
+    FD_C_PPYOLOEWrapper* fd_ppyoloe_wrapper) {
+  IMPLEMENT_INITIALIZED_FUNCTION(PPYOLOE, fd_ppyoloe_wrapper);
+}
+
+FD_C_DetectionResult* FD_C_DetectionResultToC(
+    fastdeploy::vision::DetectionResult* detection_result) {
+  // Internal use, transfer fastdeploy::vision::DetectionResult to
+  // FD_C_DetectionResult
+  FD_C_DetectionResult* fd_c_detection_result = new FD_C_DetectionResult();
+  // copy boxes
+  const int boxes_coordinate_dim = 4;
+  fd_c_detection_result->boxes.size = detection_result->boxes.size();
+  fd_c_detection_result->boxes.data =
+      new FD_C_OneDimArrayFloat[fd_c_detection_result->boxes.size];
+  for (size_t i = 0; i < detection_result->boxes.size(); i++) {
+    fd_c_detection_result->boxes.data[i].size = boxes_coordinate_dim;
+    fd_c_detection_result->boxes.data[i].data = new float[boxes_coordinate_dim];
+    for (size_t j = 0; j < boxes_coordinate_dim; j++) {
+      fd_c_detection_result->boxes.data[i].data[j] =
+          detection_result->boxes[i][j];
+    }
+  }
+  // copy scores
+  fd_c_detection_result->scores.size = detection_result->scores.size();
+  fd_c_detection_result->scores.data =
+      new float[fd_c_detection_result->scores.size];
+  memcpy(fd_c_detection_result->scores.data, detection_result->scores.data(),
+         sizeof(float) * fd_c_detection_result->scores.size);
+  // copy label_ids
+  fd_c_detection_result->label_ids.size = detection_result->label_ids.size();
+  fd_c_detection_result->label_ids.data =
+      new int32_t[fd_c_detection_result->label_ids.size];
+  memcpy(fd_c_detection_result->label_ids.data,
+         detection_result->label_ids.data(),
+         sizeof(int32_t) * fd_c_detection_result->label_ids.size);
+  // copy masks
+  fd_c_detection_result->masks.size = detection_result->masks.size();
+  fd_c_detection_result->masks.data =
+      new FD_C_Mask[fd_c_detection_result->masks.size];
+  for (size_t i = 0; i < detection_result->masks.size(); i++) {
+    // copy data in mask
+    fd_c_detection_result->masks.data[i].data.size =
+        detection_result->masks[i].data.size();
+    fd_c_detection_result->masks.data[i].data.data =
+        new uint8_t[detection_result->masks[i].data.size()];
+    memcpy(fd_c_detection_result->masks.data[i].data.data,
+           detection_result->masks[i].data.data(),
+           sizeof(uint8_t) * detection_result->masks[i].data.size());
+    // copy shape in mask
+    fd_c_detection_result->masks.data[i].shape.size =
+        detection_result->masks[i].shape.size();
+    fd_c_detection_result->masks.data[i].shape.data =
+        new int64_t[detection_result->masks[i].shape.size()];
+    memcpy(fd_c_detection_result->masks.data[i].shape.data,
+           detection_result->masks[i].shape.data(),
+           sizeof(int64_t) * detection_result->masks[i].shape.size());
+    fd_c_detection_result->masks.data[i].type =
+        static_cast<FD_C_ResultType>(detection_result->masks[i].type);
+  }
+  fd_c_detection_result->contain_masks = detection_result->contain_masks;
+  fd_c_detection_result->type =
+      static_cast<FD_C_ResultType>(detection_result->type);
+  return fd_c_detection_result;
+}
+
+FD_C_Bool FD_C_PPYOLOEWrapperBatchPredict(
+    FD_C_PPYOLOEWrapper* fd_ppyoloe_wrapper, FD_C_OneDimMat imgs,
+    FD_C_OneDimDetectionResult* results) {
+  IMPLEMENT_BATCH_PREDICT_FUNCTION(PPYOLOE, fd_ppyoloe_wrapper);
+}
+
+// PicoDet
+DECLARE_AND_IMPLEMENT_CREATE_WRAPPER_FUNCTION(PicoDet, picodet_model)
+DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION(PicoDet, fd_picodet_wrapper)
+DECLARE_AND_IMPLEMENT_PREDICT_FUNCTION(PicoDet, fd_picodet_wrapper)
+DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(PicoDet, fd_picodet_wrapper)
+DECLARE_AND_IMPLEMENT_BATCH_PREDICT_FUNCTION(PicoDet, fd_picodet_wrapper)
+
+// PPYOLO
+DECLARE_AND_IMPLEMENT_CREATE_WRAPPER_FUNCTION(PPYOLO, ppyolo_model)
+DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION(PPYOLO, fd_ppyolo_wrapper)
+DECLARE_AND_IMPLEMENT_PREDICT_FUNCTION(PPYOLO, fd_ppyolo_wrapper)
+DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(PPYOLO, fd_ppyolo_wrapper)
+DECLARE_AND_IMPLEMENT_BATCH_PREDICT_FUNCTION(PPYOLO, fd_ppyolo_wrapper)
+
+// YOLOv3
+DECLARE_AND_IMPLEMENT_CREATE_WRAPPER_FUNCTION(YOLOv3, yolov3_model)
+DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION(YOLOv3, fd_yolov3_wrapper)
+DECLARE_AND_IMPLEMENT_PREDICT_FUNCTION(YOLOv3, fd_yolov3_wrapper)
+DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(YOLOv3, fd_yolov3_wrapper)
+DECLARE_AND_IMPLEMENT_BATCH_PREDICT_FUNCTION(YOLOv3, fd_yolov3_wrapper)
+
+// PaddleYOLOX
+DECLARE_AND_IMPLEMENT_CREATE_WRAPPER_FUNCTION(PaddleYOLOX, paddleyolox_model)
+DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION(PaddleYOLOX,
+                                               fd_paddleyolox_wrapper)
+DECLARE_AND_IMPLEMENT_PREDICT_FUNCTION(PaddleYOLOX, fd_paddleyolox_wrapper)
+DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(PaddleYOLOX, fd_paddleyolox_wrapper)
+DECLARE_AND_IMPLEMENT_BATCH_PREDICT_FUNCTION(PaddleYOLOX,
+                                             fd_paddleyolox_wrapper)
+
+// FasterRCNN
+DECLARE_AND_IMPLEMENT_CREATE_WRAPPER_FUNCTION(FasterRCNN, fasterrcnn_model)
+DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION(FasterRCNN,
+                                               fd_fasterrcnn_wrapper)
+DECLARE_AND_IMPLEMENT_PREDICT_FUNCTION(FasterRCNN, fd_fasterrcnn_wrapper)
+DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(FasterRCNN, fd_fasterrcnn_wrapper)
+DECLARE_AND_IMPLEMENT_BATCH_PREDICT_FUNCTION(FasterRCNN, fd_fasterrcnn_wrapper)
+
+// MaskRCNN
+DECLARE_AND_IMPLEMENT_CREATE_WRAPPER_FUNCTION(MaskRCNN, maskrcnn_model)
+DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION(MaskRCNN, fd_maskrcnn_wrapper)
+DECLARE_AND_IMPLEMENT_PREDICT_FUNCTION(MaskRCNN, fd_maskrcnn_wrapper)
+DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(MaskRCNN, fd_maskrcnn_wrapper)
+DECLARE_AND_IMPLEMENT_BATCH_PREDICT_FUNCTION(MaskRCNN, fd_maskrcnn_wrapper)
+
+// SSD
+DECLARE_AND_IMPLEMENT_CREATE_WRAPPER_FUNCTION(SSD, ssd_model)
+DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION(SSD, fd_ssd_wrapper)
+DECLARE_AND_IMPLEMENT_PREDICT_FUNCTION(SSD, fd_ssd_wrapper)
+DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(SSD, fd_ssd_wrapper)
+DECLARE_AND_IMPLEMENT_BATCH_PREDICT_FUNCTION(SSD, fd_ssd_wrapper)
+
+// PaddleYOLOv5
+DECLARE_AND_IMPLEMENT_CREATE_WRAPPER_FUNCTION(PaddleYOLOv5, paddleyolov5_model)
+DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION(PaddleYOLOv5,
+                                               fd_paddleyolov5_wrapper)
+DECLARE_AND_IMPLEMENT_PREDICT_FUNCTION(PaddleYOLOv5, fd_paddleyolov5_wrapper)
+DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(PaddleYOLOv5,
+                                           fd_paddleyolov5_wrapper)
+DECLARE_AND_IMPLEMENT_BATCH_PREDICT_FUNCTION(PaddleYOLOv5,
+                                             fd_paddleyolov5_wrapper)
+
+// PaddleYOLOv6
+DECLARE_AND_IMPLEMENT_CREATE_WRAPPER_FUNCTION(PaddleYOLOv6, paddleyolov6_model)
+DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION(PaddleYOLOv6,
+                                               fd_paddleyolov6_wrapper)
+DECLARE_AND_IMPLEMENT_PREDICT_FUNCTION(PaddleYOLOv6, fd_paddleyolov6_wrapper)
+DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(PaddleYOLOv6,
+                                           fd_paddleyolov6_wrapper)
+DECLARE_AND_IMPLEMENT_BATCH_PREDICT_FUNCTION(PaddleYOLOv6,
+                                             fd_paddleyolov6_wrapper)
+
+// PaddleYOLOv7
+DECLARE_AND_IMPLEMENT_CREATE_WRAPPER_FUNCTION(PaddleYOLOv7, paddleyolov7_model)
+DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION(PaddleYOLOv7,
+                                               fd_paddleyolov7_wrapper)
+DECLARE_AND_IMPLEMENT_PREDICT_FUNCTION(PaddleYOLOv7, fd_paddleyolov7_wrapper)
+DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(PaddleYOLOv7,
+                                           fd_paddleyolov7_wrapper)
+DECLARE_AND_IMPLEMENT_BATCH_PREDICT_FUNCTION(PaddleYOLOv7,
+                                             fd_paddleyolov7_wrapper)
+
+// PaddleYOLOv8
+DECLARE_AND_IMPLEMENT_CREATE_WRAPPER_FUNCTION(PaddleYOLOv8, paddleyolov8_model)
+DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION(PaddleYOLOv8,
+                                               fd_paddleyolov8_wrapper)
+DECLARE_AND_IMPLEMENT_PREDICT_FUNCTION(PaddleYOLOv8, fd_paddleyolov8_wrapper)
+DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(PaddleYOLOv8,
+                                           fd_paddleyolov8_wrapper)
+DECLARE_AND_IMPLEMENT_BATCH_PREDICT_FUNCTION(PaddleYOLOv8,
+                                             fd_paddleyolov8_wrapper)
+
+// RTMDet
+DECLARE_AND_IMPLEMENT_CREATE_WRAPPER_FUNCTION(RTMDet, rtmdet_model)
+DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION(RTMDet, fd_rtmdet_wrapper)
+DECLARE_AND_IMPLEMENT_PREDICT_FUNCTION(RTMDet, fd_rtmdet_wrapper)
+DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(RTMDet, fd_rtmdet_wrapper)
+DECLARE_AND_IMPLEMENT_BATCH_PREDICT_FUNCTION(RTMDet, fd_rtmdet_wrapper)
+
+// CascadeRCNN
+DECLARE_AND_IMPLEMENT_CREATE_WRAPPER_FUNCTION(CascadeRCNN, cascadercnn_model)
+DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION(CascadeRCNN,
+                                               fd_cascadercnn_wrapper)
+DECLARE_AND_IMPLEMENT_PREDICT_FUNCTION(CascadeRCNN, fd_cascadercnn_wrapper)
+DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(CascadeRCNN, fd_cascadercnn_wrapper)
+DECLARE_AND_IMPLEMENT_BATCH_PREDICT_FUNCTION(CascadeRCNN,
+                                             fd_cascadercnn_wrapper)
+
+// PSSDet
+DECLARE_AND_IMPLEMENT_CREATE_WRAPPER_FUNCTION(PSSDet, pssdet_model)
+DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION(PSSDet, fd_pssdet_wrapper)
+DECLARE_AND_IMPLEMENT_PREDICT_FUNCTION(PSSDet, fd_pssdet_wrapper)
+DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(PSSDet, fd_pssdet_wrapper)
+DECLARE_AND_IMPLEMENT_BATCH_PREDICT_FUNCTION(PSSDet, fd_pssdet_wrapper)
+
+// RetinaNet
+DECLARE_AND_IMPLEMENT_CREATE_WRAPPER_FUNCTION(RetinaNet, retinanet_model)
+DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION(RetinaNet, fd_retinanet_wrapper)
+DECLARE_AND_IMPLEMENT_PREDICT_FUNCTION(RetinaNet, fd_retinanet_wrapper)
+DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(RetinaNet, fd_retinanet_wrapper)
+DECLARE_AND_IMPLEMENT_BATCH_PREDICT_FUNCTION(RetinaNet, fd_retinanet_wrapper)
+
+// FCOS
+DECLARE_AND_IMPLEMENT_CREATE_WRAPPER_FUNCTION(FCOS, fcos_model)
+DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION(FCOS, fd_fcos_wrapper)
+DECLARE_AND_IMPLEMENT_PREDICT_FUNCTION(FCOS, fd_fcos_wrapper)
+DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(FCOS, fd_fcos_wrapper)
+DECLARE_AND_IMPLEMENT_BATCH_PREDICT_FUNCTION(FCOS, fd_fcos_wrapper)
+
+// TTFNet
+DECLARE_AND_IMPLEMENT_CREATE_WRAPPER_FUNCTION(TTFNet, ttfnet_model)
+DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION(TTFNet, fd_ttfnet_wrapper)
+DECLARE_AND_IMPLEMENT_PREDICT_FUNCTION(TTFNet, fd_ttfnet_wrapper)
+DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(TTFNet, fd_ttfnet_wrapper)
+DECLARE_AND_IMPLEMENT_BATCH_PREDICT_FUNCTION(TTFNet, fd_ttfnet_wrapper)
+
+// TOOD
+DECLARE_AND_IMPLEMENT_CREATE_WRAPPER_FUNCTION(TOOD, tood_model)
+DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION(TOOD, fd_tood_wrapper)
+DECLARE_AND_IMPLEMENT_PREDICT_FUNCTION(TOOD, fd_tood_wrapper)
+DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(TOOD, fd_tood_wrapper)
+DECLARE_AND_IMPLEMENT_BATCH_PREDICT_FUNCTION(TOOD, fd_tood_wrapper)
+
+// GFL
+DECLARE_AND_IMPLEMENT_CREATE_WRAPPER_FUNCTION(GFL, gfl_model)
+DECLARE_AND_IMPLEMENT_DESTROY_WRAPPER_FUNCTION(GFL, fd_gfl_wrapper)
+DECLARE_AND_IMPLEMENT_PREDICT_FUNCTION(GFL, fd_gfl_wrapper)
+DECLARE_AND_IMPLEMENT_INITIALIZED_FUNCTION(GFL, fd_gfl_wrapper)
+DECLARE_AND_IMPLEMENT_BATCH_PREDICT_FUNCTION(GFL, fd_gfl_wrapper)
+
 #ifdef __cplusplus
 }
-#endif
\ No newline at end of file
+#endif
diff --git a/c_api/fastdeploy_capi/vision/detection/ppdet/model.h b/c_api/fastdeploy_capi/vision/detection/ppdet/model.h
index 6dce7a64e..71b74bcdf 100644
--- a/c_api/fastdeploy_capi/vision/detection/ppdet/model.h
+++ b/c_api/fastdeploy_capi/vision/detection/ppdet/model.h
@@ -18,14 +18,18 @@
 #include "fastdeploy_capi/fd_type.h"
 #include "fastdeploy_capi/runtime_option.h"
 #include "fastdeploy_capi/vision/result.h"
+#include "fastdeploy_capi/vision/detection/ppdet/base_define.h"
 
-typedef struct FD_C_PPYOLOEWrapper FD_C_PPYOLOEWrapper;
 typedef struct FD_C_RuntimeOptionWrapper FD_C_RuntimeOptionWrapper;
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+// PPYOLOE
+
+typedef struct FD_C_PPYOLOEWrapper FD_C_PPYOLOEWrapper;
+
 /** \brief Create a new FD_C_PPYOLOEWrapper object
  *
  * \param[in] model_file Path of model file, e.g resnet/model.pdmodel
@@ -37,19 +41,14 @@ extern "C" {
  * \return Return a pointer to FD_C_PPYOLOEWrapper object
  */
 
-FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_PPYOLOEWrapper*
-FD_C_CreatesPPYOLOEWrapper(
-    const char* model_file, const char* params_file, const char* config_file,
-    FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
-    const FD_C_ModelFormat model_format);
+DECLARE_CREATE_WRAPPER_FUNCTION(PPYOLOE);
 
 /** \brief Destroy a FD_C_PPYOLOEWrapper object
  *
  * \param[in] fd_c_ppyoloe_wrapper pointer to FD_C_PPYOLOEWrapper object
  */
 
-FASTDEPLOY_CAPI_EXPORT extern void
-FD_C_DestroyPPYOLOEWrapper(__fd_take FD_C_PPYOLOEWrapper* fd_c_ppyoloe_wrapper);
+DECLARE_DESTROY_WRAPPER_FUNCTION(PPYOLOE, fd_c_ppyoloe_wrapper);
 
 /** \brief Predict the detection result for an input image
  *
@@ -58,9 +57,1041 @@ FD_C_DestroyPPYOLOEWrapper(__fd_take FD_C_PPYOLOEWrapper* fd_c_ppyoloe_wrapper);
  * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object, which stores the result.
  */
 
-FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_PPYOLOEWrapperPredict(
-    __fd_take FD_C_PPYOLOEWrapper* fd_c_ppyoloe_wrapper, FD_C_Mat img,
-    FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper);
+DECLARE_PREDICT_FUNCTION(PPYOLOE, fd_c_ppyoloe_wrapper);
+
+/** \brief Check if the model is initialized successfully
+ *
+ * \param[in] fd_c_ppyoloe_wrapper pointer to FD_C_PPYOLOEWrapper object
+ *
+ * \return Return a bool of value true if initialized successfully
+ */
+
+DECLARE_INITIALIZED_FUNCTION(PPYOLOE, fd_c_ppyoloe_wrapper);
+
+/** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] fd_c_ppyoloe_wrapper pointer to FD_C_PPYOLOEWrapper object
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   *
+   * \return true if the prediction successed, otherwise false
+   */
+
+DECLARE_BATCH_PREDICT_FUNCTION(PPYOLOE, fd_c_ppyoloe_wrapper);
+
+// PicoDet
+
+typedef struct FD_C_PicoDetWrapper FD_C_PicoDetWrapper;
+
+/** \brief Create a new FD_C_PicoDetWrapper object
+ *
+ * \param[in] model_file Path of model file, e.g resnet/model.pdmodel
+ * \param[in] params_file Path of parameter file, e.g resnet/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+ * \param[in] config_file Path of configuration file for deployment, e.g resnet/infer_cfg.yml
+ * \param[in] fd_c_runtime_option_wrapper RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`
+ * \param[in] model_format Model format of the loaded model, default is Paddle format
+ *
+ * \return Return a pointer to FD_C_PicoDetWrapper object
+ */
+
+DECLARE_CREATE_WRAPPER_FUNCTION(PicoDet);
+
+/** \brief Destroy a FD_C_PicoDetWrapper object
+ *
+ * \param[in] fd_c_picodet_wrapper pointer to FD_C_PicoDetWrapper object
+ */
+
+DECLARE_DESTROY_WRAPPER_FUNCTION(PicoDet, fd_c_picodet_wrapper);
+
+/** \brief Predict the detection result for an input image
+ *
+ * \param[in] fd_c_picodet_wrapper pointer to FD_C_PicoDetWrapper object
+ * \param[in] img pointer to cv::Mat image
+ * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object, which stores the result.
+ */
+
+DECLARE_PREDICT_FUNCTION(PicoDet, fd_c_picodet_wrapper);
+
+/** \brief Check if the model is initialized successfully
+ *
+ * \param[in] fd_c_picodet_wrapper pointer to FD_C_PicoDetWrapper object
+ *
+ * \return Return a bool of value true if initialized successfully
+ */
+
+DECLARE_INITIALIZED_FUNCTION(PicoDet, fd_c_picodet_wrapper);
+
+/** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] fd_c_picodet_wrapper pointer to FD_C_PicoDetWrapper object
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   *
+   * \return true if the prediction successed, otherwise false
+   */
+
+DECLARE_BATCH_PREDICT_FUNCTION(PicoDet, fd_c_picodet_wrapper);
+
+
+// PPYOLO
+
+typedef struct FD_C_PPYOLOWrapper FD_C_PPYOLOWrapper;
+
+/** \brief Create a new FD_C_PPYOLOWrapper object
+ *
+ * \param[in] model_file Path of model file, e.g resnet/model.pdmodel
+ * \param[in] params_file Path of parameter file, e.g resnet/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+ * \param[in] config_file Path of configuration file for deployment, e.g resnet/infer_cfg.yml
+ * \param[in] fd_c_runtime_option_wrapper RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`
+ * \param[in] model_format Model format of the loaded model, default is Paddle format
+ *
+ * \return Return a pointer to FD_C_PPYOLOWrapper object
+ */
+
+DECLARE_CREATE_WRAPPER_FUNCTION(PPYOLO);
+
+/** \brief Destroy a FD_C_PPYOLOWrapper object
+ *
+ * \param[in] fd_c_ppyolo_wrapper pointer to FD_C_PPYOLOWrapper object
+ */
+
+DECLARE_DESTROY_WRAPPER_FUNCTION(PPYOLO, fd_c_ppyolo_wrapper);
+
+/** \brief Predict the detection result for an input image
+ *
+ * \param[in] fd_c_ppyolo_wrapper pointer to FD_C_PPYOLOWrapper object
+ * \param[in] img pointer to cv::Mat image
+ * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object, which stores the result.
+ */
+
+DECLARE_PREDICT_FUNCTION(PPYOLO, fd_c_ppyolo_wrapper);
+
+/** \brief Check if the model is initialized successfully
+ *
+ * \param[in] fd_c_ppyolo_wrapper pointer to FD_C_PPYOLOWrapper object
+ *
+ * \return Return a bool of value true if initialized successfully
+ */
+
+DECLARE_INITIALIZED_FUNCTION(PPYOLO, fd_c_ppyolo_wrapper);
+
+/** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] fd_c_ppyolo_wrapper pointer to FD_C_PPYOLOWrapper object
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   *
+   * \return true if the prediction successed, otherwise false
+   */
+
+DECLARE_BATCH_PREDICT_FUNCTION(PPYOLO, fd_c_ppyolo_wrapper);
+
+// YOLOv3
+
+typedef struct FD_C_YOLOv3Wrapper FD_C_YOLOv3Wrapper;
+
+/** \brief Create a new FD_C_YOLOv3Wrapper object
+ *
+ * \param[in] model_file Path of model file, e.g resnet/model.pdmodel
+ * \param[in] params_file Path of parameter file, e.g resnet/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+ * \param[in] config_file Path of configuration file for deployment, e.g resnet/infer_cfg.yml
+ * \param[in] fd_c_runtime_option_wrapper RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`
+ * \param[in] model_format Model format of the loaded model, default is Paddle format
+ *
+ * \return Return a pointer to FD_C_YOLOv3Wrapper object
+ */
+
+DECLARE_CREATE_WRAPPER_FUNCTION(YOLOv3);
+
+/** \brief Destroy a FD_C_YOLOv3Wrapper object
+ *
+ * \param[in] fd_c_yolov3_wrapper pointer to FD_C_YOLOv3Wrapper object
+ */
+
+DECLARE_DESTROY_WRAPPER_FUNCTION(YOLOv3, fd_c_yolov3_wrapper);
+
+/** \brief Predict the detection result for an input image
+ *
+ * \param[in] fd_c_yolov3_wrapper pointer to FD_C_YOLOv3Wrapper object
+ * \param[in] img pointer to cv::Mat image
+ * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object, which stores the result.
+ */
+
+DECLARE_PREDICT_FUNCTION(YOLOv3, fd_c_yolov3_wrapper);
+
+/** \brief Check if the model is initialized successfully
+ *
+ * \param[in] fd_c_yolov3_wrapper pointer to FD_C_YOLOv3Wrapper object
+ *
+ * \return Return a bool of value true if initialized successfully
+ */
+
+DECLARE_INITIALIZED_FUNCTION(YOLOv3, fd_c_yolov3_wrapper);
+
+/** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] fd_c_yolov3_wrapper pointer to FD_C_YOLOv3Wrapper object
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   *
+   * \return true if the prediction successed, otherwise false
+   */
+
+DECLARE_BATCH_PREDICT_FUNCTION(YOLOv3, fd_c_yolov3_wrapper);
+
+// PaddleYOLOX
+
+typedef struct FD_C_PaddleYOLOXWrapper FD_C_PaddleYOLOXWrapper;
+
+/** \brief Create a new FD_C_PaddleYOLOXWrapper object
+ *
+ * \param[in] model_file Path of model file, e.g resnet/model.pdmodel
+ * \param[in] params_file Path of parameter file, e.g resnet/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+ * \param[in] config_file Path of configuration file for deployment, e.g resnet/infer_cfg.yml
+ * \param[in] fd_c_runtime_option_wrapper RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`
+ * \param[in] model_format Model format of the loaded model, default is Paddle format
+ *
+ * \return Return a pointer to FD_C_PaddleYOLOXWrapper object
+ */
+
+DECLARE_CREATE_WRAPPER_FUNCTION(PaddleYOLOX);
+
+/** \brief Destroy a FD_C_PaddleYOLOXWrapper object
+ *
+ * \param[in] fd_c_paddleyolox_wrapper pointer to FD_C_PaddleYOLOXWrapper object
+ */
+
+DECLARE_DESTROY_WRAPPER_FUNCTION(PaddleYOLOX, fd_c_paddleyolox_wrapper);
+
+/** \brief Predict the detection result for an input image
+ *
+ * \param[in] fd_c_paddleyolox_wrapper pointer to FD_C_PaddleYOLOXWrapper object
+ * \param[in] img pointer to cv::Mat image
+ * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object, which stores the result.
+ */
+
+DECLARE_PREDICT_FUNCTION(PaddleYOLOX, fd_c_paddleyolox_wrapper);
+
+/** \brief Check if the model is initialized successfully
+ *
+ * \param[in] fd_c_paddleyolox_wrapper pointer to FD_C_PaddleYOLOXWrapper object
+ *
+ * \return Return a bool of value true if initialized successfully
+ */
+
+DECLARE_INITIALIZED_FUNCTION(PaddleYOLOX, fd_c_paddleyolox_wrapper);
+
+/** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] fd_c_paddleyolox_wrapper pointer to FD_C_PaddleYOLOXWrapper object
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   *
+   * \return true if the prediction successed, otherwise false
+   */
+
+DECLARE_BATCH_PREDICT_FUNCTION(PaddleYOLOX, fd_c_paddleyolox_wrapper);
+
+// FasterRCNN
+
+typedef struct FD_C_FasterRCNNWrapper FD_C_FasterRCNNWrapper;
+
+/** \brief Create a new FD_C_FasterRCNNWrapper object
+ *
+ * \param[in] model_file Path of model file, e.g resnet/model.pdmodel
+ * \param[in] params_file Path of parameter file, e.g resnet/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+ * \param[in] config_file Path of configuration file for deployment, e.g resnet/infer_cfg.yml
+ * \param[in] fd_c_runtime_option_wrapper RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`
+ * \param[in] model_format Model format of the loaded model, default is Paddle format
+ *
+ * \return Return a pointer to FD_C_FasterRCNNWrapper object
+ */
+
+DECLARE_CREATE_WRAPPER_FUNCTION(FasterRCNN);
+
+/** \brief Destroy a FD_C_FasterRCNNWrapper object
+ *
+ * \param[in] fd_c_fasterrcnn_wrapper pointer to FD_C_FasterRCNNWrapper object
+ */
+
+DECLARE_DESTROY_WRAPPER_FUNCTION(FasterRCNN, fd_c_fasterrcnn_wrapper);
+
+/** \brief Predict the detection result for an input image
+ *
+ * \param[in] fd_c_fasterrcnn_wrapper pointer to FD_C_FasterRCNNWrapper object
+ * \param[in] img pointer to cv::Mat image
+ * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object, which stores the result.
+ */
+
+DECLARE_PREDICT_FUNCTION(FasterRCNN, fd_c_fasterrcnn_wrapper);
+
+/** \brief Check if the model is initialized successfully
+ *
+ * \param[in] fd_c_fasterrcnn_wrapper pointer to FD_C_FasterRCNNWrapper object
+ *
+ * \return Return a bool of value true if initialized successfully
+ */
+
+DECLARE_INITIALIZED_FUNCTION(FasterRCNN, fd_c_fasterrcnn_wrapper);
+
+/** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] fd_c_fasterrcnn_wrapper pointer to FD_C_FasterRCNNWrapper object
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   *
+   * \return true if the prediction successed, otherwise false
+   */
+
+DECLARE_BATCH_PREDICT_FUNCTION(FasterRCNN, fd_c_fasterrcnn_wrapper);
+
+// MaskRCNN
+
+typedef struct FD_C_MaskRCNNWrapper FD_C_MaskRCNNWrapper;
+
+/** \brief Create a new FD_C_MaskRCNNWrapper object
+ *
+ * \param[in] model_file Path of model file, e.g resnet/model.pdmodel
+ * \param[in] params_file Path of parameter file, e.g resnet/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+ * \param[in] config_file Path of configuration file for deployment, e.g resnet/infer_cfg.yml
+ * \param[in] fd_c_runtime_option_wrapper RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`
+ * \param[in] model_format Model format of the loaded model, default is Paddle format
+ *
+ * \return Return a pointer to FD_C_MaskRCNNWrapper object
+ */
+
+DECLARE_CREATE_WRAPPER_FUNCTION(MaskRCNN);
+
+/** \brief Destroy a FD_C_MaskRCNNWrapper object
+ *
+ * \param[in] fd_c_maskrcnn_wrapper pointer to FD_C_MaskRCNNWrapper object
+ */
+
+DECLARE_DESTROY_WRAPPER_FUNCTION(MaskRCNN, fd_c_maskrcnn_wrapper);
+
+/** \brief Predict the detection result for an input image
+ *
+ * \param[in] fd_c_maskrcnn_wrapper pointer to FD_C_MaskRCNNWrapper object
+ * \param[in] img pointer to cv::Mat image
+ * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object, which stores the result.
+ */
+
+DECLARE_PREDICT_FUNCTION(MaskRCNN, fd_c_maskrcnn_wrapper);
+
+/** \brief Check if the model is initialized successfully
+ *
+ * \param[in] fd_c_maskrcnn_wrapper pointer to FD_C_MaskRCNNWrapper object
+ *
+ * \return Return a bool of value true if initialized successfully
+ */
+
+DECLARE_INITIALIZED_FUNCTION(MaskRCNN, fd_c_maskrcnn_wrapper);
+
+/** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] fd_c_maskrcnn_wrapper pointer to FD_C_MaskRCNNWrapper object
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   *
+   * \return true if the prediction successed, otherwise false
+   */
+
+DECLARE_BATCH_PREDICT_FUNCTION(MaskRCNN, fd_c_maskrcnn_wrapper);
+
+// SSD
+
+typedef struct FD_C_SSDWrapper FD_C_SSDWrapper;
+
+/** \brief Create a new FD_C_SSDWrapper object
+ *
+ * \param[in] model_file Path of model file, e.g resnet/model.pdmodel
+ * \param[in] params_file Path of parameter file, e.g resnet/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+ * \param[in] config_file Path of configuration file for deployment, e.g resnet/infer_cfg.yml
+ * \param[in] fd_c_runtime_option_wrapper RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`
+ * \param[in] model_format Model format of the loaded model, default is Paddle format
+ *
+ * \return Return a pointer to FD_C_SSDWrapper object
+ */
+
+DECLARE_CREATE_WRAPPER_FUNCTION(SSD);
+
+/** \brief Destroy a FD_C_SSDWrapper object
+ *
+ * \param[in] fd_c_ssd_wrapper pointer to FD_C_SSDWrapper object
+ */
+
+DECLARE_DESTROY_WRAPPER_FUNCTION(SSD, fd_c_ssd_wrapper);
+
+/** \brief Predict the detection result for an input image
+ *
+ * \param[in] fd_c_ssd_wrapper pointer to FD_C_SSDWrapper object
+ * \param[in] img pointer to cv::Mat image
+ * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object, which stores the result.
+ */
+
+DECLARE_PREDICT_FUNCTION(SSD, fd_c_ssd_wrapper);
+
+/** \brief Check if the model is initialized successfully
+ *
+ * \param[in] fd_c_ssd_wrapper pointer to FD_C_SSDWrapper object
+ *
+ * \return Return a bool of value true if initialized successfully
+ */
+
+DECLARE_INITIALIZED_FUNCTION(SSD, fd_c_ssd_wrapper);
+
+/** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] fd_c_ssd_wrapper pointer to FD_C_SSDWrapper object
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   *
+   * \return true if the prediction successed, otherwise false
+   */
+
+DECLARE_BATCH_PREDICT_FUNCTION(SSD, fd_c_ssd_wrapper);
+
+// PaddleYOLOv5
+
+typedef struct FD_C_PaddleYOLOv5Wrapper FD_C_PaddleYOLOv5Wrapper;
+
+/** \brief Create a new FD_C_PaddleYOLOv5Wrapper object
+ *
+ * \param[in] model_file Path of model file, e.g resnet/model.pdmodel
+ * \param[in] params_file Path of parameter file, e.g resnet/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+ * \param[in] config_file Path of configuration file for deployment, e.g resnet/infer_cfg.yml
+ * \param[in] fd_c_runtime_option_wrapper RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`
+ * \param[in] model_format Model format of the loaded model, default is Paddle format
+ *
+ * \return Return a pointer to FD_C_PaddleYOLOv5Wrapper object
+ */
+
+DECLARE_CREATE_WRAPPER_FUNCTION(PaddleYOLOv5);
+
+/** \brief Destroy a FD_C_PaddleYOLOv5Wrapper object
+ *
+ * \param[in] fd_c_paddleyolov5_wrapper pointer to FD_C_PaddleYOLOv5Wrapper object
+ */
+
+DECLARE_DESTROY_WRAPPER_FUNCTION(PaddleYOLOv5, fd_c_paddleyolov5_wrapper);
+
+/** \brief Predict the detection result for an input image
+ *
+ * \param[in] fd_c_paddleyolov5_wrapper pointer to FD_C_PaddleYOLOv5Wrapper object
+ * \param[in] img pointer to cv::Mat image
+ * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object, which stores the result.
+ */
+
+DECLARE_PREDICT_FUNCTION(PaddleYOLOv5, fd_c_paddleyolov5_wrapper);
+
+/** \brief Check if the model is initialized successfully
+ *
+ * \param[in] fd_c_paddleyolov5_wrapper pointer to FD_C_PaddleYOLOv5Wrapper object
+ *
+ * \return Return a bool of value true if initialized successfully
+ */
+
+DECLARE_INITIALIZED_FUNCTION(PaddleYOLOv5, fd_c_paddleyolov5_wrapper);
+
+/** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] fd_c_paddleyolov5_wrapper pointer to FD_C_PaddleYOLOv5Wrapper object
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   *
+   * \return true if the prediction successed, otherwise false
+   */
+
+DECLARE_BATCH_PREDICT_FUNCTION(PaddleYOLOv5, fd_c_paddleyolov5_wrapper);
+
+// PaddleYOLOv6
+
+typedef struct FD_C_PaddleYOLOv6Wrapper FD_C_PaddleYOLOv6Wrapper;
+
+/** \brief Create a new FD_C_PaddleYOLOv6Wrapper object
+ *
+ * \param[in] model_file Path of model file, e.g resnet/model.pdmodel
+ * \param[in] params_file Path of parameter file, e.g resnet/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+ * \param[in] config_file Path of configuration file for deployment, e.g resnet/infer_cfg.yml
+ * \param[in] fd_c_runtime_option_wrapper RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`
+ * \param[in] model_format Model format of the loaded model, default is Paddle format
+ *
+ * \return Return a pointer to FD_C_PaddleYOLOv6Wrapper object
+ */
+
+DECLARE_CREATE_WRAPPER_FUNCTION(PaddleYOLOv6);
+
+/** \brief Destroy a FD_C_PaddleYOLOv6Wrapper object
+ *
+ * \param[in] fd_c_paddleyolov6_wrapper pointer to FD_C_PaddleYOLOv6Wrapper object
+ */
+
+DECLARE_DESTROY_WRAPPER_FUNCTION(PaddleYOLOv6, fd_c_paddleyolov6_wrapper);
+
+/** \brief Predict the detection result for an input image
+ *
+ * \param[in] fd_c_paddleyolov6_wrapper pointer to FD_C_PaddleYOLOv6Wrapper object
+ * \param[in] img pointer to cv::Mat image
+ * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object, which stores the result.
+ */
+
+DECLARE_PREDICT_FUNCTION(PaddleYOLOv6, fd_c_paddleyolov6_wrapper);
+
+/** \brief Check if the model is initialized successfully
+ *
+ * \param[in] fd_c_paddleyolov6_wrapper pointer to FD_C_PaddleYOLOv6Wrapper object
+ *
+ * \return Return a bool of value true if initialized successfully
+ */
+
+DECLARE_INITIALIZED_FUNCTION(PaddleYOLOv6, fd_c_paddleyolov6_wrapper);
+
+/** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] fd_c_paddleyolov6_wrapper pointer to FD_C_PaddleYOLOv6Wrapper object
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   *
+   * \return true if the prediction successed, otherwise false
+   */
+
+DECLARE_BATCH_PREDICT_FUNCTION(PaddleYOLOv6, fd_c_paddleyolov6_wrapper);
+
+// PaddleYOLOv7
+
+typedef struct FD_C_PaddleYOLOv7Wrapper FD_C_PaddleYOLOv7Wrapper;
+
+/** \brief Create a new FD_C_PaddleYOLOv7Wrapper object
+ *
+ * \param[in] model_file Path of model file, e.g resnet/model.pdmodel
+ * \param[in] params_file Path of parameter file, e.g resnet/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+ * \param[in] config_file Path of configuration file for deployment, e.g resnet/infer_cfg.yml
+ * \param[in] fd_c_runtime_option_wrapper RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`
+ * \param[in] model_format Model format of the loaded model, default is Paddle format
+ *
+ * \return Return a pointer to FD_C_PaddleYOLOv7Wrapper object
+ */
+
+DECLARE_CREATE_WRAPPER_FUNCTION(PaddleYOLOv7);
+
+/** \brief Destroy a FD_C_PaddleYOLOv7Wrapper object
+ *
+ * \param[in] fd_c_paddleyolov7_wrapper pointer to FD_C_PaddleYOLOv7Wrapper object
+ */
+
+DECLARE_DESTROY_WRAPPER_FUNCTION(PaddleYOLOv7, fd_c_paddleyolov7_wrapper);
+
+/** \brief Predict the detection result for an input image
+ *
+ * \param[in] fd_c_paddleyolov7_wrapper pointer to FD_C_PaddleYOLOv7Wrapper object
+ * \param[in] img pointer to cv::Mat image
+ * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object, which stores the result.
+ */
+
+DECLARE_PREDICT_FUNCTION(PaddleYOLOv7, fd_c_paddleyolov7_wrapper);
+
+/** \brief Check if the model is initialized successfully
+ *
+ * \param[in] fd_c_paddleyolov7_wrapper pointer to FD_C_PaddleYOLOv7Wrapper object
+ *
+ * \return Return a bool of value true if initialized successfully
+ */
+
+DECLARE_INITIALIZED_FUNCTION(PaddleYOLOv7, fd_c_paddleyolov7_wrapper);
+
+/** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] fd_c_paddleyolov7_wrapper pointer to FD_C_PaddleYOLOv7Wrapper object
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   *
+   * \return true if the prediction successed, otherwise false
+   */
+
+DECLARE_BATCH_PREDICT_FUNCTION(PaddleYOLOv7, fd_c_paddleyolov7_wrapper);
+
+// PaddleYOLOv8
+
+typedef struct FD_C_PaddleYOLOv8Wrapper FD_C_PaddleYOLOv8Wrapper;
+
+/** \brief Create a new FD_C_PaddleYOLOv8Wrapper object
+ *
+ * \param[in] model_file Path of model file, e.g resnet/model.pdmodel
+ * \param[in] params_file Path of parameter file, e.g resnet/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+ * \param[in] config_file Path of configuration file for deployment, e.g resnet/infer_cfg.yml
+ * \param[in] fd_c_runtime_option_wrapper RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`
+ * \param[in] model_format Model format of the loaded model, default is Paddle format
+ *
+ * \return Return a pointer to FD_C_PaddleYOLOv8Wrapper object
+ */
+
+DECLARE_CREATE_WRAPPER_FUNCTION(PaddleYOLOv8);
+
+/** \brief Destroy a FD_C_PaddleYOLOv8Wrapper object
+ *
+ * \param[in] fd_c_paddleyolov8_wrapper pointer to FD_C_PaddleYOLOv8Wrapper object
+ */
+
+DECLARE_DESTROY_WRAPPER_FUNCTION(PaddleYOLOv8, fd_c_paddleyolov8_wrapper);
+
+/** \brief Predict the detection result for an input image
+ *
+ * \param[in] fd_c_paddleyolov8_wrapper pointer to FD_C_PaddleYOLOv8Wrapper object
+ * \param[in] img pointer to cv::Mat image
+ * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object, which stores the result.
+ */
+
+DECLARE_PREDICT_FUNCTION(PaddleYOLOv8, fd_c_paddleyolov8_wrapper);
+
+/** \brief Check if the model is initialized successfully
+ *
+ * \param[in] fd_c_paddleyolov8_wrapper pointer to FD_C_PaddleYOLOv8Wrapper object
+ *
+ * \return Return a bool of value true if initialized successfully
+ */
+
+DECLARE_INITIALIZED_FUNCTION(PaddleYOLOv8, fd_c_paddleyolov8_wrapper);
+
+/** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] fd_c_paddleyolov8_wrapper pointer to FD_C_PaddleYOLOv8Wrapper object
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   *
+   * \return true if the prediction successed, otherwise false
+   */
+
+DECLARE_BATCH_PREDICT_FUNCTION(PaddleYOLOv8, fd_c_paddleyolov8_wrapper);
+
+// RTMDet
+
+typedef struct FD_C_RTMDetWrapper FD_C_RTMDetWrapper;
+
+/** \brief Create a new FD_C_RTMDetWrapper object
+ *
+ * \param[in] model_file Path of model file, e.g resnet/model.pdmodel
+ * \param[in] params_file Path of parameter file, e.g resnet/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+ * \param[in] config_file Path of configuration file for deployment, e.g resnet/infer_cfg.yml
+ * \param[in] fd_c_runtime_option_wrapper RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`
+ * \param[in] model_format Model format of the loaded model, default is Paddle format
+ *
+ * \return Return a pointer to FD_C_RTMDetWrapper object
+ */
+
+DECLARE_CREATE_WRAPPER_FUNCTION(RTMDet);
+
+/** \brief Destroy a FD_C_RTMDetWrapper object
+ *
+ * \param[in] fd_c_rtmdet_wrapper pointer to FD_C_RTMDetWrapper object
+ */
+
+DECLARE_DESTROY_WRAPPER_FUNCTION(RTMDet, fd_c_rtmdet_wrapper);
+
+/** \brief Predict the detection result for an input image
+ *
+ * \param[in] fd_c_rtmdet_wrapper pointer to FD_C_RTMDetWrapper object
+ * \param[in] img pointer to cv::Mat image
+ * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object, which stores the result.
+ */
+
+DECLARE_PREDICT_FUNCTION(RTMDet, fd_c_rtmdet_wrapper);
+
+/** \brief Check if the model is initialized successfully
+ *
+ * \param[in] fd_c_rtmdet_wrapper pointer to FD_C_RTMDetWrapper object
+ *
+ * \return Return a bool of value true if initialized successfully
+ */
+
+DECLARE_INITIALIZED_FUNCTION(RTMDet, fd_c_rtmdet_wrapper);
+
+/** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] fd_c_rtmdet_wrapper pointer to FD_C_RTMDetWrapper object
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   *
+   * \return true if the prediction successed, otherwise false
+   */
+
+DECLARE_BATCH_PREDICT_FUNCTION(RTMDet, fd_c_rtmdet_wrapper);
+
+// CascadeRCNN
+
+typedef struct FD_C_CascadeRCNNWrapper FD_C_CascadeRCNNWrapper;
+
+/** \brief Create a new FD_C_CascadeRCNNWrapper object
+ *
+ * \param[in] model_file Path of model file, e.g resnet/model.pdmodel
+ * \param[in] params_file Path of parameter file, e.g resnet/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+ * \param[in] config_file Path of configuration file for deployment, e.g resnet/infer_cfg.yml
+ * \param[in] fd_c_runtime_option_wrapper RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`
+ * \param[in] model_format Model format of the loaded model, default is Paddle format
+ *
+ * \return Return a pointer to FD_C_CascadeRCNNWrapper object
+ */
+
+DECLARE_CREATE_WRAPPER_FUNCTION(CascadeRCNN);
+
+/** \brief Destroy a FD_C_CascadeRCNNWrapper object
+ *
+ * \param[in] fd_c_cascadercnn_wrapper pointer to FD_C_CascadeRCNNWrapper object
+ */
+
+DECLARE_DESTROY_WRAPPER_FUNCTION(CascadeRCNN, fd_c_cascadercnn_wrapper);
+
+/** \brief Predict the detection result for an input image
+ *
+ * \param[in] fd_c_cascadercnn_wrapper pointer to FD_C_CascadeRCNNWrapper object
+ * \param[in] img pointer to cv::Mat image
+ * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object, which stores the result.
+ */
+
+DECLARE_PREDICT_FUNCTION(CascadeRCNN, fd_c_cascadercnn_wrapper);
+
+/** \brief Check if the model is initialized successfully
+ *
+ * \param[in] fd_c_cascadercnn_wrapper pointer to FD_C_CascadeRCNNWrapper object
+ *
+ * \return Return a bool of value true if initialized successfully
+ */
+
+DECLARE_INITIALIZED_FUNCTION(CascadeRCNN, fd_c_cascadercnn_wrapper);
+
+/** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] fd_c_cascadercnn_wrapper pointer to FD_C_CascadeRCNNWrapper object
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   *
+   * \return true if the prediction successed, otherwise false
+   */
+
+DECLARE_BATCH_PREDICT_FUNCTION(CascadeRCNN, fd_c_cascadercnn_wrapper);
+
+// PSSDet
+
+typedef struct FD_C_PSSDetWrapper FD_C_PSSDetWrapper;
+
+/** \brief Create a new FD_C_PSSDetWrapper object
+ *
+ * \param[in] model_file Path of model file, e.g resnet/model.pdmodel
+ * \param[in] params_file Path of parameter file, e.g resnet/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+ * \param[in] config_file Path of configuration file for deployment, e.g resnet/infer_cfg.yml
+ * \param[in] fd_c_runtime_option_wrapper RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`
+ * \param[in] model_format Model format of the loaded model, default is Paddle format
+ *
+ * \return Return a pointer to FD_C_PSSDetWrapper object
+ */
+
+DECLARE_CREATE_WRAPPER_FUNCTION(PSSDet);
+
+/** \brief Destroy a FD_C_PSSDetWrapper object
+ *
+ * \param[in] fd_c_pssdet_wrapper pointer to FD_C_PSSDetWrapper object
+ */
+
+DECLARE_DESTROY_WRAPPER_FUNCTION(PSSDet, fd_c_pssdet_wrapper);
+
+/** \brief Predict the detection result for an input image
+ *
+ * \param[in] fd_c_pssdet_wrapper pointer to FD_C_PSSDetWrapper object
+ * \param[in] img pointer to cv::Mat image
+ * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object, which stores the result.
+ */
+
+DECLARE_PREDICT_FUNCTION(PSSDet, fd_c_pssdet_wrapper);
+
+/** \brief Check if the model is initialized successfully
+ *
+ * \param[in] fd_c_pssdet_wrapper pointer to FD_C_PSSDetWrapper object
+ *
+ * \return Return a bool of value true if initialized successfully
+ */
+
+DECLARE_INITIALIZED_FUNCTION(PSSDet, fd_c_pssdet_wrapper);
+
+/** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] fd_c_pssdet_wrapper pointer to FD_C_PSSDetWrapper object
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   *
+   * \return true if the prediction successed, otherwise false
+   */
+
+DECLARE_BATCH_PREDICT_FUNCTION(PSSDet, fd_c_pssdet_wrapper);
+
+// RetinaNet
+
+typedef struct FD_C_RetinaNetWrapper FD_C_RetinaNetWrapper;
+
+/** \brief Create a new FD_C_RetinaNetWrapper object
+ *
+ * \param[in] model_file Path of model file, e.g resnet/model.pdmodel
+ * \param[in] params_file Path of parameter file, e.g resnet/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+ * \param[in] config_file Path of configuration file for deployment, e.g resnet/infer_cfg.yml
+ * \param[in] fd_c_runtime_option_wrapper RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`
+ * \param[in] model_format Model format of the loaded model, default is Paddle format
+ *
+ * \return Return a pointer to FD_C_RetinaNetWrapper object
+ */
+
+DECLARE_CREATE_WRAPPER_FUNCTION(RetinaNet);
+
+/** \brief Destroy a FD_C_RetinaNetWrapper object
+ *
+ * \param[in] fd_c_retinanet_wrapper pointer to FD_C_RetinaNetWrapper object
+ */
+
+DECLARE_DESTROY_WRAPPER_FUNCTION(RetinaNet, fd_c_retinanet_wrapper);
+
+/** \brief Predict the detection result for an input image
+ *
+ * \param[in] fd_c_retinanet_wrapper pointer to FD_C_RetinaNetWrapper object
+ * \param[in] img pointer to cv::Mat image
+ * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object, which stores the result.
+ */
+
+DECLARE_PREDICT_FUNCTION(RetinaNet, fd_c_retinanet_wrapper);
+
+/** \brief Check if the model is initialized successfully
+ *
+ * \param[in] fd_c_retinanet_wrapper pointer to FD_C_RetinaNetWrapper object
+ *
+ * \return Return a bool of value true if initialized successfully
+ */
+
+DECLARE_INITIALIZED_FUNCTION(RetinaNet, fd_c_retinanet_wrapper);
+
+/** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] fd_c_retinanet_wrapper pointer to FD_C_RetinaNetWrapper object
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   *
+   * \return true if the prediction successed, otherwise false
+   */
+
+DECLARE_BATCH_PREDICT_FUNCTION(RetinaNet, fd_c_retinanet_wrapper);
+
+// FCOS
+
+typedef struct FD_C_FCOSWrapper FD_C_FCOSWrapper;
+
+/** \brief Create a new FD_C_FCOSWrapper object
+ *
+ * \param[in] model_file Path of model file, e.g resnet/model.pdmodel
+ * \param[in] params_file Path of parameter file, e.g resnet/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+ * \param[in] config_file Path of configuration file for deployment, e.g resnet/infer_cfg.yml
+ * \param[in] fd_c_runtime_option_wrapper RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`
+ * \param[in] model_format Model format of the loaded model, default is Paddle format
+ *
+ * \return Return a pointer to FD_C_FCOSWrapper object
+ */
+
+DECLARE_CREATE_WRAPPER_FUNCTION(FCOS);
+
+/** \brief Destroy a FD_C_FCOSWrapper object
+ *
+ * \param[in] fd_c_fcos_wrapper pointer to FD_C_FCOSWrapper object
+ */
+
+DECLARE_DESTROY_WRAPPER_FUNCTION(FCOS, fd_c_fcos_wrapper);
+
+/** \brief Predict the detection result for an input image
+ *
+ * \param[in] fd_c_fcos_wrapper pointer to FD_C_FCOSWrapper object
+ * \param[in] img pointer to cv::Mat image
+ * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object, which stores the result.
+ */
+
+DECLARE_PREDICT_FUNCTION(FCOS, fd_c_fcos_wrapper);
+
+/** \brief Check if the model is initialized successfully
+ *
+ * \param[in] fd_c_fcos_wrapper pointer to FD_C_FCOSWrapper object
+ *
+ * \return Return a bool of value true if initialized successfully
+ */
+
+DECLARE_INITIALIZED_FUNCTION(FCOS, fd_c_fcos_wrapper);
+
+/** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] fd_c_fcos_wrapper pointer to FD_C_FCOSWrapper object
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   *
+   * \return true if the prediction successed, otherwise false
+   */
+
+DECLARE_BATCH_PREDICT_FUNCTION(FCOS, fd_c_fcos_wrapper);
+
+// TTFNet
+
+typedef struct FD_C_TTFNetWrapper FD_C_TTFNetWrapper;
+
+/** \brief Create a new FD_C_TTFNetWrapper object
+ *
+ * \param[in] model_file Path of model file, e.g resnet/model.pdmodel
+ * \param[in] params_file Path of parameter file, e.g resnet/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+ * \param[in] config_file Path of configuration file for deployment, e.g resnet/infer_cfg.yml
+ * \param[in] fd_c_runtime_option_wrapper RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`
+ * \param[in] model_format Model format of the loaded model, default is Paddle format
+ *
+ * \return Return a pointer to FD_C_TTFNetWrapper object
+ */
+
+DECLARE_CREATE_WRAPPER_FUNCTION(TTFNet);
+
+/** \brief Destroy a FD_C_TTFNetWrapper object
+ *
+ * \param[in] fd_c_ttfnet_wrapper pointer to FD_C_TTFNetWrapper object
+ */
+
+DECLARE_DESTROY_WRAPPER_FUNCTION(TTFNet, fd_c_ttfnet_wrapper);
+
+/** \brief Predict the detection result for an input image
+ *
+ * \param[in] fd_c_ttfnet_wrapper pointer to FD_C_TTFNetWrapper object
+ * \param[in] img pointer to cv::Mat image
+ * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object, which stores the result.
+ */
+
+DECLARE_PREDICT_FUNCTION(TTFNet, fd_c_ttfnet_wrapper);
+
+/** \brief Check if the model is initialized successfully
+ *
+ * \param[in] fd_c_ttfnet_wrapper pointer to FD_C_TTFNetWrapper object
+ *
+ * \return Return a bool of value true if initialized successfully
+ */
+
+DECLARE_INITIALIZED_FUNCTION(TTFNet, fd_c_ttfnet_wrapper);
+
+/** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] fd_c_ttfnet_wrapper pointer to FD_C_TTFNetWrapper object
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   *
+   * \return true if the prediction successed, otherwise false
+   */
+
+DECLARE_BATCH_PREDICT_FUNCTION(TTFNet, fd_c_ttfnet_wrapper);
+
+// TOOD
+
+typedef struct FD_C_TOODWrapper FD_C_TOODWrapper;
+
+/** \brief Create a new FD_C_TOODWrapper object
+ *
+ * \param[in] model_file Path of model file, e.g resnet/model.pdmodel
+ * \param[in] params_file Path of parameter file, e.g resnet/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+ * \param[in] config_file Path of configuration file for deployment, e.g resnet/infer_cfg.yml
+ * \param[in] fd_c_runtime_option_wrapper RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`
+ * \param[in] model_format Model format of the loaded model, default is Paddle format
+ *
+ * \return Return a pointer to FD_C_TOODWrapper object
+ */
+
+DECLARE_CREATE_WRAPPER_FUNCTION(TOOD);
+
+/** \brief Destroy a FD_C_TOODWrapper object
+ *
+ * \param[in] fd_c_tood_wrapper pointer to FD_C_TOODWrapper object
+ */
+
+DECLARE_DESTROY_WRAPPER_FUNCTION(TOOD, fd_c_tood_wrapper);
+
+/** \brief Predict the detection result for an input image
+ *
+ * \param[in] fd_c_tood_wrapper pointer to FD_C_TOODWrapper object
+ * \param[in] img pointer to cv::Mat image
+ * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object, which stores the result.
+ */
+
+DECLARE_PREDICT_FUNCTION(TOOD, fd_c_tood_wrapper);
+
+/** \brief Check if the model is initialized successfully
+ *
+ * \param[in] fd_c_tood_wrapper pointer to FD_C_TOODWrapper object
+ *
+ * \return Return a bool of value true if initialized successfully
+ */
+
+DECLARE_INITIALIZED_FUNCTION(TOOD, fd_c_tood_wrapper);
+
+/** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] fd_c_tood_wrapper pointer to FD_C_TOODWrapper object
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   *
+   * \return true if the prediction successed, otherwise false
+   */
+
+DECLARE_BATCH_PREDICT_FUNCTION(TOOD, fd_c_tood_wrapper);
+
+// GFL
+
+typedef struct FD_C_GFLWrapper FD_C_GFLWrapper;
+
+/** \brief Create a new FD_C_GFLWrapper object
+ *
+ * \param[in] model_file Path of model file, e.g resnet/model.pdmodel
+ * \param[in] params_file Path of parameter file, e.g resnet/model.pdiparams, if the model format is ONNX, this parameter will be ignored
+ * \param[in] config_file Path of configuration file for deployment, e.g resnet/infer_cfg.yml
+ * \param[in] fd_c_runtime_option_wrapper RuntimeOption for inference, the default will use cpu, and choose the backend defined in `valid_cpu_backends`
+ * \param[in] model_format Model format of the loaded model, default is Paddle format
+ *
+ * \return Return a pointer to FD_C_GFLWrapper object
+ */
+
+DECLARE_CREATE_WRAPPER_FUNCTION(GFL);
+
+/** \brief Destroy a FD_C_GFLWrapper object
+ *
+ * \param[in] fd_c_gfl_wrapper pointer to FD_C_GFLWrapper object
+ */
+
+DECLARE_DESTROY_WRAPPER_FUNCTION(GFL, fd_c_gfl_wrapper);
+
+/** \brief Predict the detection result for an input image
+ *
+ * \param[in] fd_c_gfl_wrapper pointer to FD_C_GFLWrapper object
+ * \param[in] img pointer to cv::Mat image
+ * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object, which stores the result.
+ */
+
+DECLARE_PREDICT_FUNCTION(GFL, fd_c_gfl_wrapper);
+
+/** \brief Check if the model is initialized successfully
+ *
+ * \param[in] fd_c_gfl_wrapper pointer to FD_C_GFLWrapper object
+ *
+ * \return Return a bool of value true if initialized successfully
+ */
+
+DECLARE_INITIALIZED_FUNCTION(GFL, fd_c_gfl_wrapper);
+
+/** \brief Predict the detection results for a batch of input images
+   *
+   * \param[in] fd_c_gfl_wrapper pointer to FD_C_GFLWrapper object
+   * \param[in] imgs, The input image list, each element comes from cv::imread()
+   * \param[in] results The output detection result list
+   *
+   * \return true if the prediction successed, otherwise false
+   */
+
+DECLARE_BATCH_PREDICT_FUNCTION(GFL, fd_c_gfl_wrapper);
+
+
+
+
+
+
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/c_api/fastdeploy_capi/vision/result.cc b/c_api/fastdeploy_capi/vision/result.cc
index 207ef3950..175c6b29c 100644
--- a/c_api/fastdeploy_capi/vision/result.cc
+++ b/c_api/fastdeploy_capi/vision/result.cc
@@ -90,6 +90,16 @@ FD_C_ClassifyResultWrapper* FD_C_CreateClassifyResultWrapperFromData(
   return fd_c_classify_result_wrapper;
 }
 
+char* FD_C_ClassifyResultWrapperStr(
+    FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper) {
+  auto& classify_result = CHECK_AND_CONVERT_FD_TYPE(
+      ClassifyResultWrapper, fd_c_classify_result_wrapper);
+  std::string information = classify_result->Str();
+  char* cstr = new char[information.length() + 1];
+  std::strcpy(cstr, information.c_str());
+  return cstr;
+}
+
 // Detection Results
 
 FD_C_DetectionResultWrapper* FD_C_CreateDetectionResultWrapper() {
@@ -237,6 +247,17 @@ FD_C_DetectionResultWrapper* FD_C_CreateDetectionResultWrapperFromData(
 
   return fd_c_detection_result_wrapper;
 }
+
+char* FD_C_DetectionResultWrapperStr(
+    FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper) {
+  auto& detection_result = CHECK_AND_CONVERT_FD_TYPE(
+      DetectionResultWrapper, fd_c_detection_result_wrapper);
+  std::string information = detection_result->Str();
+  char* cstr = new char[information.length() + 1];
+  std::strcpy(cstr, information.c_str());
+  return cstr;
+}
+
 #ifdef __cplusplus
 }
-#endif
\ No newline at end of file
+#endif
diff --git a/c_api/fastdeploy_capi/vision/result.h b/c_api/fastdeploy_capi/vision/result.h
index 247ec50f6..40a846af4 100644
--- a/c_api/fastdeploy_capi/vision/result.h
+++ b/c_api/fastdeploy_capi/vision/result.h
@@ -30,6 +30,11 @@ typedef struct FD_C_ClassifyResult {
   FD_C_ResultType type;
 } FD_C_ClassifyResult;
 
+typedef struct FD_C_OneDimClassifyResult {
+  size_t size;
+  FD_C_ClassifyResult* data;
+} FD_C_OneDimClassifyResult;
+
 typedef struct FD_C_Mask {
   FD_C_OneDimArrayUint8 data;
   FD_C_OneDimArrayInt64 shape;
@@ -50,6 +55,11 @@ typedef struct FD_C_DetectionResult {
   FD_C_ResultType type;
 } FD_C_DetectionResult;
 
+typedef struct FD_C_OneDimDetectionResult {
+  size_t size;
+  FD_C_DetectionResult* data;
+} FD_C_OneDimDetectionResult;
+
 // Classification Results
 
 /** \brief Create a new FD_C_ClassifyResultWrapper object
@@ -95,6 +105,16 @@ FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_ClassifyResultWrapper*
 FD_C_CreateClassifyResultWrapperFromData(
     __fd_keep FD_C_ClassifyResult* fd_c_classify_result);
 
+/** \brief Print ClassifyResult formated information
+ *
+ * \param[in] fd_c_classify_result_wrapper pointer to FD_C_ClassifyResultWrapper object
+ * \return Return a string pointer
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern __fd_give char*
+FD_C_ClassifyResultWrapperStr(
+    __fd_keep FD_C_ClassifyResultWrapper* fd_c_classify_result_wrapper);
+
 // Detection Results
 
 /** \brief Create a new FD_C_DetectionResultWrapper object
@@ -140,6 +160,16 @@ FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_DetectionResultWrapper*
 FD_C_CreateDetectionResultWrapperFromData(
     __fd_keep FD_C_DetectionResult* fd_c_detection_result);
 
+/** \brief Print DetectionResult formated information
+ *
+ * \param[in] fd_c_detection_result_wrapper pointer to FD_C_DetectionResultWrapper object
+ * \return Return a string pointer
+ */
+
+FASTDEPLOY_CAPI_EXPORT extern __fd_give char*
+FD_C_DetectionResultWrapperStr(
+    __fd_keep FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/c_api/fastdeploy_capi/vision/visualize.cc b/c_api/fastdeploy_capi/vision/visualize.cc
index 6045270bd..ffa0b0939 100644
--- a/c_api/fastdeploy_capi/vision/visualize.cc
+++ b/c_api/fastdeploy_capi/vision/visualize.cc
@@ -29,11 +29,31 @@ FD_C_Mat FD_C_VisDetection(FD_C_Mat im,
       FD_C_CreateDetectionResultWrapperFromData(fd_c_detection_result);
   auto& detection_result = CHECK_AND_CONVERT_FD_TYPE(
       DetectionResultWrapper, fd_c_detection_result_wrapper);
-  cv::Mat result = fastdeploy::vision::Visualize::VisDetection(
+  cv::Mat result = fastdeploy::vision::VisDetection(
       *(reinterpret_cast<cv::Mat*>(im)), *detection_result, score_threshold,
       line_size, font_size);
   return new cv::Mat(result);
 }
+
+FD_C_Mat FD_C_VisDetectionWithLabel(FD_C_Mat im,
+                                    FD_C_DetectionResult* fd_c_detection_result,
+                                    FD_C_OneDimArrayCstr* labels,
+                                    float score_threshold, int line_size,
+                                    float font_size) {
+  std::vector<std::string> labels_in;
+  for (int i = 0; i < labels->size; i++) {
+    labels_in.emplace_back(labels->data[i].data);
+  }
+  FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper =
+      FD_C_CreateDetectionResultWrapperFromData(fd_c_detection_result);
+  auto& detection_result = CHECK_AND_CONVERT_FD_TYPE(
+      DetectionResultWrapper, fd_c_detection_result_wrapper);
+  cv::Mat result = fastdeploy::vision::VisDetection(
+      *(reinterpret_cast<cv::Mat*>(im)), *detection_result, labels_in,
+      score_threshold, line_size, font_size);
+  return new cv::Mat(result);
+}
+
 #ifdef __cplusplus
 }
-#endif
\ No newline at end of file
+#endif
diff --git a/c_api/fastdeploy_capi/vision/visualize.h b/c_api/fastdeploy_capi/vision/visualize.h
index 43d406dab..ca5bfcab3 100644
--- a/c_api/fastdeploy_capi/vision/visualize.h
+++ b/c_api/fastdeploy_capi/vision/visualize.h
@@ -22,15 +22,37 @@
 extern "C" {
 #endif
 
-/** \brief Visualize Detection
+/** \brief Show the visualized results for detection models
  *
- * \return Return a pointer to cv::Mat object
+ * \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
+ * \param[in] result the result produced by model
+ * \param[in] score_threshold threshold for result scores, the bounding box will not be shown if the score is less than score_threshold
+ * \param[in] line_size line size for bounding boxes
+ * \param[in] font_size font size for text
+ * \return cv::Mat type stores the visualized results
  */
 
 FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_Mat
 FD_C_VisDetection(FD_C_Mat im, FD_C_DetectionResult* fd_detection_result,
                   float score_threshold, int line_size, float font_size);
 
+/** \brief Show the visualized results with custom labels for detection models
+ *
+ * \param[in] im the input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format
+ * \param[in] result the result produced by model
+ * \param[in] labels the visualized result will show the bounding box contain class label
+ * \param[in] score_threshold threshold for result scores, the bounding box will not be shown if the score is less than score_threshold
+ * \param[in] line_size line size for bounding boxes
+ * \param[in] font_size font size for text
+ * \return cv::Mat type stores the visualized results
+ */
+FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_Mat FD_C_VisDetectionWithLabel(
+                                     FD_C_Mat im,
+                                     FD_C_DetectionResult* fd_detection_result,
+                                     FD_C_OneDimArrayCstr* labels,
+                                     float score_threshold,
+                                     int line_size, float font_size);
+
 #ifdef __cplusplus
 }  // extern "C"
 #endif

From ea548ab3db731192d1b86a0cbe3d0d901f45d936 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Fri, 17 Feb 2023 11:03:52 +0800
Subject: [PATCH 39/41] [C#] Supplement model and result api for c sharp
 (#1322)

* add c sharp api for fastdeploy

* update accroding to c apis

* add cmakelist for c sharp api

* add cmakelists for c sharp

* fix cmakelists

* fix cmakelists

* add c sharp api for fastdeploy

* add ppyoloe demo

* add ppyoloe demo

* modify demo namespace code

* add readme

* fix format

* format code

* fix doc

* add model api

* add batch_predict and string result for c sharp

* add ppdet models

* update api

* fix
---
 csharp/fastdeploy/types_internal_c.cs         |   20 +
 .../vision/classification/ppcls/model.cs      |   69 +-
 .../vision/detection/ppdet/model.cs           | 2041 ++++++++++++++++-
 csharp/fastdeploy/vision/result.cs            |   93 +-
 csharp/fastdeploy/vision/visualize.cs         |   24 +
 5 files changed, 2216 insertions(+), 31 deletions(-)

diff --git a/csharp/fastdeploy/types_internal_c.cs b/csharp/fastdeploy/types_internal_c.cs
index d1274e28b..4402afc70 100644
--- a/csharp/fastdeploy/types_internal_c.cs
+++ b/csharp/fastdeploy/types_internal_c.cs
@@ -97,6 +97,13 @@ public struct FD_ClassifyResult {
   public FD_ResultType type;
 }
 
+[StructLayout(LayoutKind.Sequential)]
+public struct FD_OneDimClassifyResult {
+  public nuint size;
+  public IntPtr data; // FD_ClassifyResult[]
+}
+
+
 [StructLayout(LayoutKind.Sequential)]
 public struct FD_Mask {
   public FD_OneDimArrayUint8 data;
@@ -121,5 +128,18 @@ public struct FD_DetectionResult {
   public FD_ResultType type;
 }
 
+
+[StructLayout(LayoutKind.Sequential)]
+public struct FD_OneDimDetectionResult {
+  public nuint size;
+  public IntPtr data; // FD_DetectionResult[]
+}
+
+[StructLayout(LayoutKind.Sequential)]
+public struct FD_OneDimMat {
+  public nuint size;
+  public IntPtr data; // Mat[]
+}
+
 }
 }
diff --git a/csharp/fastdeploy/vision/classification/ppcls/model.cs b/csharp/fastdeploy/vision/classification/ppcls/model.cs
index 4217c77a6..e130d8ecf 100644
--- a/csharp/fastdeploy/vision/classification/ppcls/model.cs
+++ b/csharp/fastdeploy/vision/classification/ppcls/model.cs
@@ -23,7 +23,7 @@ namespace fastdeploy {
 namespace vision {
 namespace classification {
 
-class PaddleClasModel {
+public class PaddleClasModel {
 
   public PaddleClasModel(string model_file, string params_file,
                          string config_file, RuntimeOption custom_option = null,
@@ -40,25 +40,54 @@ class PaddleClasModel {
     FD_C_DestroyPaddleClasModelWrapper(fd_paddleclas_model_wrapper);
   }
 
+
+  public string ModelName() {
+    return "PaddleClas/Model";
+  }
+
   public ClassifyResult Predict(Mat img) {
-    IntPtr fd_classify_result_wrapper_ptr = FD_C_CreateClassifyResultWrapper();
-    FD_C_PaddleClasModelWrapperPredict(
+    FD_ClassifyResult fd_classify_result = new FD_ClassifyResult();
+    if(! FD_C_PaddleClasModelWrapperPredict(
         fd_paddleclas_model_wrapper, img.CvPtr,
-        fd_classify_result_wrapper_ptr);  // predict
-    IntPtr fd_classify_result_ptr = FD_C_ClassifyResultWrapperGetData(
-        fd_classify_result_wrapper_ptr);  // get result from wrapper
-    FD_ClassifyResult fd_classify_result =
-        (FD_ClassifyResult)Marshal.PtrToStructure(fd_classify_result_ptr,
-                                                  typeof(FD_ClassifyResult));
+        ref fd_classify_result))
+    {
+      return null;
+    } // predict
     ClassifyResult classify_result =
         ConvertResult.ConvertCResultToClassifyResult(fd_classify_result);
-    FD_C_DestroyClassifyResultWrapper(
-        fd_classify_result_wrapper_ptr);  // free fd_classify_result_wrapper_ptr
-    FD_C_DestroyClassifyResult(
-        fd_classify_result_ptr);  // free fd_classify_result_ptr
     return classify_result;
   }
 
+  public List<ClassifyResult> BatchPredict(List<Mat> imgs){
+    FD_OneDimMat imgs_in = new FD_OneDimMat();
+    imgs_in.size = (nuint)imgs.Count;
+    // Copy data to unmanaged memory
+    IntPtr[] mat_ptrs = new IntPtr[imgs_in.size];
+    for(int i=0;i < (int)imgs.Count; i++){
+      mat_ptrs[i] = imgs[i].CvPtr;
+    }
+    int size = Marshal.SizeOf(new IntPtr()) * (int)imgs_in.size;
+    imgs_in.data = Marshal.AllocHGlobal(size);
+    Marshal.Copy(mat_ptrs, 0, imgs_in.data,
+                 mat_ptrs.Length);
+    FD_OneDimClassifyResult fd_classify_result_array =  new FD_OneDimClassifyResult();
+    if (!FD_C_PaddleClasModelWrapperBatchPredict(fd_paddleclas_model_wrapper, ref imgs_in, ref fd_classify_result_array)){
+      return null;
+    }
+    List<ClassifyResult> results_out = new List<ClassifyResult>();
+    for(int i=0;i < (int)imgs.Count; i++){
+      FD_ClassifyResult fd_classify_result = (FD_ClassifyResult)Marshal.PtrToStructure(
+          fd_classify_result_array.data + i * Marshal.SizeOf(new FD_ClassifyResult()),
+          typeof(FD_ClassifyResult));
+      results_out.Add(ConvertResult.ConvertCResultToClassifyResult(fd_classify_result));
+    }
+    return results_out;
+  }
+
+  public bool Initialized() {
+    return FD_C_PaddleClasModelWrapperInitialized(fd_paddleclas_model_wrapper);
+  }
+
   // below are underlying C api
   private IntPtr fd_paddleclas_model_wrapper;
   [DllImport("fastdeploy.dll",
@@ -75,7 +104,7 @@ class PaddleClasModel {
   private static extern bool
   FD_C_PaddleClasModelWrapperPredict(IntPtr fd_paddleclas_model_wrapper,
                                      IntPtr img,
-                                     IntPtr fd_classify_result_wrapper);
+                                     ref FD_ClassifyResult fd_classify_result);
   [DllImport("fastdeploy.dll", EntryPoint = "FD_C_CreateClassifyResultWrapper")]
   private static extern IntPtr FD_C_CreateClassifyResultWrapper();
   [DllImport("fastdeploy.dll",
@@ -93,6 +122,18 @@ class PaddleClasModel {
              EntryPoint = "FD_C_CreateClassifyResultWrapperFromData")]
   private static extern IntPtr
   FD_C_CreateClassifyResultWrapperFromData(IntPtr fd_classify_result);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_PaddleClasModelWrapperInitialized")]
+  private static extern bool
+  FD_C_PaddleClasModelWrapperInitialized(IntPtr fd_paddleclas_model_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_PaddleClasModelWrapperBatchPredict")]
+  private static extern bool
+  FD_C_PaddleClasModelWrapperBatchPredict(IntPtr fd_paddleclas_model_wrapper,
+                                          ref FD_OneDimMat imgs,
+                                          ref FD_OneDimClassifyResult results);
+
 }
 
 }
diff --git a/csharp/fastdeploy/vision/detection/ppdet/model.cs b/csharp/fastdeploy/vision/detection/ppdet/model.cs
index e675746ad..a6f5c0953 100644
--- a/csharp/fastdeploy/vision/detection/ppdet/model.cs
+++ b/csharp/fastdeploy/vision/detection/ppdet/model.cs
@@ -23,6 +23,7 @@ namespace fastdeploy {
 namespace vision {
 namespace detection {
 
+// PPYOLOE
 public class PPYOLOE {
 
   public PPYOLOE(string model_file, string params_file, string config_file,
@@ -39,24 +40,48 @@ public class PPYOLOE {
   ~PPYOLOE() { FD_C_DestroyPPYOLOEWrapper(fd_ppyoloe_wrapper); }
 
   public DetectionResult Predict(Mat img) {
-    IntPtr fd_detection_result_wrapper_ptr =
-        FD_C_CreateDetectionResultWrapper();
-    FD_C_PPYOLOEWrapperPredict(fd_ppyoloe_wrapper, img.CvPtr,
-                               fd_detection_result_wrapper_ptr);  // predict
-    IntPtr fd_detection_result_ptr = FD_C_DetectionResultWrapperGetData(
-        fd_detection_result_wrapper_ptr);  // get result from wrapper
-    FD_DetectionResult fd_detection_result =
-        (FD_DetectionResult)Marshal.PtrToStructure(fd_detection_result_ptr,
-                                                   typeof(FD_DetectionResult));
+    FD_DetectionResult fd_detection_result = new FD_DetectionResult();
+    if(! FD_C_PPYOLOEWrapperPredict(fd_ppyoloe_wrapper, img.CvPtr,
+                               ref fd_detection_result))
+    {
+      return null;
+    } // predict
+    
     DetectionResult detection_result =
         ConvertResult.ConvertCResultToDetectionResult(fd_detection_result);
-    FD_C_DestroyDetectionResultWrapper(
-        fd_detection_result_wrapper_ptr);  // free fd_detection_result_wrapper_ptr
-    FD_C_DestroyDetectionResult(
-        fd_detection_result_ptr);  // free fd_detection_result_ptr
     return detection_result;
   }
 
+  public List<DetectionResult> BatchPredict(List<Mat> imgs){
+    FD_OneDimMat imgs_in = new FD_OneDimMat();
+    imgs_in.size = (nuint)imgs.Count;
+    // Copy data to unmanaged memory
+    IntPtr[] mat_ptrs = new IntPtr[imgs_in.size];
+    for(int i=0;i < (int)imgs.Count; i++){
+      mat_ptrs[i] = imgs[i].CvPtr;
+    }
+    int size = Marshal.SizeOf(new IntPtr()) * (int)imgs_in.size;
+    imgs_in.data = Marshal.AllocHGlobal(size);
+    Marshal.Copy(mat_ptrs, 0, imgs_in.data,
+                 mat_ptrs.Length);
+    FD_OneDimDetectionResult fd_detection_result_array =  new FD_OneDimDetectionResult();
+    if(!FD_C_PPYOLOEWrapperBatchPredict(fd_ppyoloe_wrapper, ref imgs_in, ref fd_detection_result_array)){
+      return null;
+    }
+    List<DetectionResult> results_out = new List<DetectionResult>();
+    for(int i=0;i < (int)imgs.Count; i++){
+      FD_DetectionResult fd_detection_result = (FD_DetectionResult)Marshal.PtrToStructure(
+          fd_detection_result_array.data + i * Marshal.SizeOf(new FD_DetectionResult()),
+          typeof(FD_DetectionResult));
+      results_out.Add(ConvertResult.ConvertCResultToDetectionResult(fd_detection_result));
+    }
+    return results_out;
+  }
+
+  public bool Initialized() {
+    return FD_C_PPYOLOEWrapperInitialized(fd_ppyoloe_wrapper);
+  }
+
   // below are underlying C api
   private IntPtr fd_ppyoloe_wrapper;
   [DllImport("fastdeploy.dll", EntryPoint = "FD_C_CreatesPPYOLOEWrapper")]
@@ -69,7 +94,7 @@ public class PPYOLOE {
   [DllImport("fastdeploy.dll", EntryPoint = "FD_C_PPYOLOEWrapperPredict")]
   private static extern bool
   FD_C_PPYOLOEWrapperPredict(IntPtr fd_ppyoloe_wrapper, IntPtr img,
-                             IntPtr fd_detection_result_wrapper);
+                             ref FD_DetectionResult fd_detection_result);
   [DllImport("fastdeploy.dll",
              EntryPoint = "FD_C_CreateDetectionResultWrapper")]
   private static extern IntPtr FD_C_CreateDetectionResultWrapper();
@@ -88,7 +113,1995 @@ public class PPYOLOE {
              EntryPoint = "FD_C_CreateDetectionResultWrapperFromData")]
   private static extern IntPtr
   FD_C_CreateDetectionResultWrapperFromData(IntPtr fd_detection_result);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_PPYOLOEWrapperInitialized")]
+  private static extern bool
+  FD_C_PPYOLOEWrapperInitialized(IntPtr fd_c_ppyoloe_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_PPYOLOEWrapperBatchPredict")]
+  private static extern bool
+  FD_C_PPYOLOEWrapperBatchPredict(IntPtr fd_c_ppyoloe_wrapper,
+                                  ref FD_OneDimMat imgs,
+                                  ref FD_OneDimDetectionResult results);
 }
+
+// PicoDet
+public class PicoDet {
+
+  public PicoDet(string model_file, string params_file, string config_file,
+                 RuntimeOption custom_option = null,
+                 ModelFormat model_format = ModelFormat.PADDLE) {
+    if (custom_option == null) {
+      custom_option = new RuntimeOption();
+    }
+    fd_picodet_wrapper =
+        FD_C_CreatesPicoDetWrapper(model_file, params_file, config_file,
+                                   custom_option.GetWrapperPtr(), model_format);
+  }
+
+  ~PicoDet() { FD_C_DestroyPicoDetWrapper(fd_picodet_wrapper); }
+
+  public DetectionResult Predict(Mat img) {
+    FD_DetectionResult fd_detection_result = new FD_DetectionResult();
+    if(! FD_C_PicoDetWrapperPredict(fd_picodet_wrapper, img.CvPtr,
+                               ref fd_detection_result))
+    {
+      return null;
+    } // predict
+    
+    DetectionResult detection_result =
+        ConvertResult.ConvertCResultToDetectionResult(fd_detection_result);
+    return detection_result;
+  }
+
+  public List<DetectionResult> BatchPredict(List<Mat> imgs){
+    FD_OneDimMat imgs_in = new FD_OneDimMat();
+    imgs_in.size = (nuint)imgs.Count;
+    // Copy data to unmanaged memory
+    IntPtr[] mat_ptrs = new IntPtr[imgs_in.size];
+    for(int i=0;i < (int)imgs.Count; i++){
+      mat_ptrs[i] = imgs[i].CvPtr;
+    }
+    int size = Marshal.SizeOf(new IntPtr()) * (int)imgs_in.size;
+    imgs_in.data = Marshal.AllocHGlobal(size);
+    Marshal.Copy(mat_ptrs, 0, imgs_in.data,
+                 mat_ptrs.Length);
+    FD_OneDimDetectionResult fd_detection_result_array =  new FD_OneDimDetectionResult();
+    if(!FD_C_PicoDetWrapperBatchPredict(fd_picodet_wrapper, ref imgs_in, ref fd_detection_result_array)){
+      return null;
+    }
+    List<DetectionResult> results_out = new List<DetectionResult>();
+    for(int i=0;i < (int)imgs.Count; i++){
+      FD_DetectionResult fd_detection_result = (FD_DetectionResult)Marshal.PtrToStructure(
+          fd_detection_result_array.data + i * Marshal.SizeOf(new FD_DetectionResult()),
+          typeof(FD_DetectionResult));
+      results_out.Add(ConvertResult.ConvertCResultToDetectionResult(fd_detection_result));
+    }
+    return results_out;
+  }
+
+  public bool Initialized() {
+    return FD_C_PicoDetWrapperInitialized(fd_picodet_wrapper);
+  }
+
+  // below are underlying C api
+  private IntPtr fd_picodet_wrapper;
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_CreatesPicoDetWrapper")]
+  private static extern IntPtr FD_C_CreatesPicoDetWrapper(
+      string model_file, string params_file, string config_file,
+      IntPtr fd_runtime_option_wrapper, ModelFormat model_format);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyPicoDetWrapper")]
+  private static extern void
+  FD_C_DestroyPicoDetWrapper(IntPtr fd_picodet_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_PicoDetWrapperPredict")]
+  private static extern bool
+  FD_C_PicoDetWrapperPredict(IntPtr fd_picodet_wrapper, IntPtr img,
+                             ref FD_DetectionResult fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapper")]
+  private static extern IntPtr FD_C_CreateDetectionResultWrapper();
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DestroyDetectionResultWrapper")]
+  private static extern void
+  FD_C_DestroyDetectionResultWrapper(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyDetectionResult")]
+  private static extern void
+  FD_C_DestroyDetectionResult(IntPtr fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DetectionResultWrapperGetData")]
+  private static extern IntPtr
+  FD_C_DetectionResultWrapperGetData(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapperFromData")]
+  private static extern IntPtr
+  FD_C_CreateDetectionResultWrapperFromData(IntPtr fd_detection_result);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_PicoDetWrapperInitialized")]
+  private static extern bool
+  FD_C_PicoDetWrapperInitialized(IntPtr fd_c_picodet_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_PicoDetWrapperBatchPredict")]
+  private static extern bool
+  FD_C_PicoDetWrapperBatchPredict(IntPtr fd_c_picodet_wrapper,
+                                  ref FD_OneDimMat imgs,
+                                  ref FD_OneDimDetectionResult results);
+}
+
+
+// PPYOLO
+
+public class PPYOLO {
+
+  public PPYOLO(string model_file, string params_file, string config_file,
+                 RuntimeOption custom_option = null,
+                 ModelFormat model_format = ModelFormat.PADDLE) {
+    if (custom_option == null) {
+      custom_option = new RuntimeOption();
+    }
+    fd_ppyolo_wrapper =
+        FD_C_CreatesPPYOLOWrapper(model_file, params_file, config_file,
+                                   custom_option.GetWrapperPtr(), model_format);
+  }
+
+  ~PPYOLO() { FD_C_DestroyPPYOLOWrapper(fd_ppyolo_wrapper); }
+
+  public DetectionResult Predict(Mat img) {
+    FD_DetectionResult fd_detection_result = new FD_DetectionResult();
+    if(! FD_C_PPYOLOWrapperPredict(fd_ppyolo_wrapper, img.CvPtr,
+                               ref fd_detection_result))
+    {
+      return null;
+    } // predict
+    
+    DetectionResult detection_result =
+        ConvertResult.ConvertCResultToDetectionResult(fd_detection_result);
+    return detection_result;
+  }
+
+  public List<DetectionResult> BatchPredict(List<Mat> imgs){
+    FD_OneDimMat imgs_in = new FD_OneDimMat();
+    imgs_in.size = (nuint)imgs.Count;
+    // Copy data to unmanaged memory
+    IntPtr[] mat_ptrs = new IntPtr[imgs_in.size];
+    for(int i=0;i < (int)imgs.Count; i++){
+      mat_ptrs[i] = imgs[i].CvPtr;
+    }
+    int size = Marshal.SizeOf(new IntPtr()) * (int)imgs_in.size;
+    imgs_in.data = Marshal.AllocHGlobal(size);
+    Marshal.Copy(mat_ptrs, 0, imgs_in.data,
+                 mat_ptrs.Length);
+    FD_OneDimDetectionResult fd_detection_result_array =  new FD_OneDimDetectionResult();
+    if(!FD_C_PPYOLOWrapperBatchPredict(fd_ppyolo_wrapper, ref imgs_in, ref fd_detection_result_array)){
+      return null;
+    }
+    List<DetectionResult> results_out = new List<DetectionResult>();
+    for(int i=0;i < (int)imgs.Count; i++){
+      FD_DetectionResult fd_detection_result = (FD_DetectionResult)Marshal.PtrToStructure(
+          fd_detection_result_array.data + i * Marshal.SizeOf(new FD_DetectionResult()),
+          typeof(FD_DetectionResult));
+      results_out.Add(ConvertResult.ConvertCResultToDetectionResult(fd_detection_result));
+    }
+    return results_out;
+  }
+
+  public bool Initialized() {
+    return FD_C_PPYOLOWrapperInitialized(fd_ppyolo_wrapper);
+  }
+
+  // below are underlying C api
+  private IntPtr fd_ppyolo_wrapper;
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_CreatesPPYOLOWrapper")]
+  private static extern IntPtr FD_C_CreatesPPYOLOWrapper(
+      string model_file, string params_file, string config_file,
+      IntPtr fd_runtime_option_wrapper, ModelFormat model_format);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyPPYOLOWrapper")]
+  private static extern void
+  FD_C_DestroyPPYOLOWrapper(IntPtr fd_ppyolo_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_PPYOLOWrapperPredict")]
+  private static extern bool
+  FD_C_PPYOLOWrapperPredict(IntPtr fd_ppyolo_wrapper, IntPtr img,
+                             ref FD_DetectionResult fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapper")]
+  private static extern IntPtr FD_C_CreateDetectionResultWrapper();
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DestroyDetectionResultWrapper")]
+  private static extern void
+  FD_C_DestroyDetectionResultWrapper(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyDetectionResult")]
+  private static extern void
+  FD_C_DestroyDetectionResult(IntPtr fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DetectionResultWrapperGetData")]
+  private static extern IntPtr
+  FD_C_DetectionResultWrapperGetData(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapperFromData")]
+  private static extern IntPtr
+  FD_C_CreateDetectionResultWrapperFromData(IntPtr fd_detection_result);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_PPYOLOWrapperInitialized")]
+  private static extern bool
+  FD_C_PPYOLOWrapperInitialized(IntPtr fd_c_ppyolo_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_PPYOLOWrapperBatchPredict")]
+  private static extern bool
+  FD_C_PPYOLOWrapperBatchPredict(IntPtr fd_c_ppyolo_wrapper,
+                                  ref FD_OneDimMat imgs,
+                                  ref FD_OneDimDetectionResult results);
+}
+
+// YOLOv3
+
+public class YOLOv3 {
+
+  public YOLOv3(string model_file, string params_file, string config_file,
+                 RuntimeOption custom_option = null,
+                 ModelFormat model_format = ModelFormat.PADDLE) {
+    if (custom_option == null) {
+      custom_option = new RuntimeOption();
+    }
+    fd_yolov3_wrapper =
+        FD_C_CreatesYOLOv3Wrapper(model_file, params_file, config_file,
+                                   custom_option.GetWrapperPtr(), model_format);
+  }
+
+  ~YOLOv3() { FD_C_DestroyYOLOv3Wrapper(fd_yolov3_wrapper); }
+
+  public DetectionResult Predict(Mat img) {
+    FD_DetectionResult fd_detection_result = new FD_DetectionResult();
+    if(! FD_C_YOLOv3WrapperPredict(fd_yolov3_wrapper, img.CvPtr,
+                               ref fd_detection_result))
+    {
+      return null;
+    } // predict
+    
+    DetectionResult detection_result =
+        ConvertResult.ConvertCResultToDetectionResult(fd_detection_result);
+    return detection_result;
+  }
+
+  public List<DetectionResult> BatchPredict(List<Mat> imgs){
+    FD_OneDimMat imgs_in = new FD_OneDimMat();
+    imgs_in.size = (nuint)imgs.Count;
+    // Copy data to unmanaged memory
+    IntPtr[] mat_ptrs = new IntPtr[imgs_in.size];
+    for(int i=0;i < (int)imgs.Count; i++){
+      mat_ptrs[i] = imgs[i].CvPtr;
+    }
+    int size = Marshal.SizeOf(new IntPtr()) * (int)imgs_in.size;
+    imgs_in.data = Marshal.AllocHGlobal(size);
+    Marshal.Copy(mat_ptrs, 0, imgs_in.data,
+                 mat_ptrs.Length);
+    FD_OneDimDetectionResult fd_detection_result_array =  new FD_OneDimDetectionResult();
+    if(!FD_C_YOLOv3WrapperBatchPredict(fd_yolov3_wrapper, ref imgs_in, ref fd_detection_result_array)){
+      return null;
+    }
+    List<DetectionResult> results_out = new List<DetectionResult>();
+    for(int i=0;i < (int)imgs.Count; i++){
+      FD_DetectionResult fd_detection_result = (FD_DetectionResult)Marshal.PtrToStructure(
+          fd_detection_result_array.data + i * Marshal.SizeOf(new FD_DetectionResult()),
+          typeof(FD_DetectionResult));
+      results_out.Add(ConvertResult.ConvertCResultToDetectionResult(fd_detection_result));
+    }
+    return results_out;
+  }
+
+  public bool Initialized() {
+    return FD_C_YOLOv3WrapperInitialized(fd_yolov3_wrapper);
+  }
+
+  // below are underlying C api
+  private IntPtr fd_yolov3_wrapper;
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_CreatesYOLOv3Wrapper")]
+  private static extern IntPtr FD_C_CreatesYOLOv3Wrapper(
+      string model_file, string params_file, string config_file,
+      IntPtr fd_runtime_option_wrapper, ModelFormat model_format);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyYOLOv3Wrapper")]
+  private static extern void
+  FD_C_DestroyYOLOv3Wrapper(IntPtr fd_yolov3_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_YOLOv3WrapperPredict")]
+  private static extern bool
+  FD_C_YOLOv3WrapperPredict(IntPtr fd_yolov3_wrapper, IntPtr img,
+                             ref FD_DetectionResult fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapper")]
+  private static extern IntPtr FD_C_CreateDetectionResultWrapper();
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DestroyDetectionResultWrapper")]
+  private static extern void
+  FD_C_DestroyDetectionResultWrapper(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyDetectionResult")]
+  private static extern void
+  FD_C_DestroyDetectionResult(IntPtr fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DetectionResultWrapperGetData")]
+  private static extern IntPtr
+  FD_C_DetectionResultWrapperGetData(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapperFromData")]
+  private static extern IntPtr
+  FD_C_CreateDetectionResultWrapperFromData(IntPtr fd_detection_result);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_YOLOv3WrapperInitialized")]
+  private static extern bool
+  FD_C_YOLOv3WrapperInitialized(IntPtr fd_c_yolov3_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_YOLOv3WrapperBatchPredict")]
+  private static extern bool
+  FD_C_YOLOv3WrapperBatchPredict(IntPtr fd_c_yolov3_wrapper,
+                                  ref FD_OneDimMat imgs,
+                                  ref FD_OneDimDetectionResult results);
+}
+
+// PaddleYOLOX
+
+public class PaddleYOLOX {
+
+  public PaddleYOLOX(string model_file, string params_file, string config_file,
+                 RuntimeOption custom_option = null,
+                 ModelFormat model_format = ModelFormat.PADDLE) {
+    if (custom_option == null) {
+      custom_option = new RuntimeOption();
+    }
+    fd_paddleyolox_wrapper =
+        FD_C_CreatesPaddleYOLOXWrapper(model_file, params_file, config_file,
+                                   custom_option.GetWrapperPtr(), model_format);
+  }
+
+  ~PaddleYOLOX() { FD_C_DestroyPaddleYOLOXWrapper(fd_paddleyolox_wrapper); }
+
+  public DetectionResult Predict(Mat img) {
+    FD_DetectionResult fd_detection_result = new FD_DetectionResult();
+    if(! FD_C_PaddleYOLOXWrapperPredict(fd_paddleyolox_wrapper, img.CvPtr,
+                               ref fd_detection_result))
+    {
+      return null;
+    } // predict
+    
+    DetectionResult detection_result =
+        ConvertResult.ConvertCResultToDetectionResult(fd_detection_result);
+    return detection_result;
+  }
+
+  public List<DetectionResult> BatchPredict(List<Mat> imgs){
+    FD_OneDimMat imgs_in = new FD_OneDimMat();
+    imgs_in.size = (nuint)imgs.Count;
+    // Copy data to unmanaged memory
+    IntPtr[] mat_ptrs = new IntPtr[imgs_in.size];
+    for(int i=0;i < (int)imgs.Count; i++){
+      mat_ptrs[i] = imgs[i].CvPtr;
+    }
+    int size = Marshal.SizeOf(new IntPtr()) * (int)imgs_in.size;
+    imgs_in.data = Marshal.AllocHGlobal(size);
+    Marshal.Copy(mat_ptrs, 0, imgs_in.data,
+                 mat_ptrs.Length);
+    FD_OneDimDetectionResult fd_detection_result_array =  new FD_OneDimDetectionResult();
+    if(!FD_C_PaddleYOLOXWrapperBatchPredict(fd_paddleyolox_wrapper, ref imgs_in, ref fd_detection_result_array)){
+      return null;
+    }
+    List<DetectionResult> results_out = new List<DetectionResult>();
+    for(int i=0;i < (int)imgs.Count; i++){
+      FD_DetectionResult fd_detection_result = (FD_DetectionResult)Marshal.PtrToStructure(
+          fd_detection_result_array.data + i * Marshal.SizeOf(new FD_DetectionResult()),
+          typeof(FD_DetectionResult));
+      results_out.Add(ConvertResult.ConvertCResultToDetectionResult(fd_detection_result));
+    }
+    return results_out;
+  }
+
+  public bool Initialized() {
+    return FD_C_PaddleYOLOXWrapperInitialized(fd_paddleyolox_wrapper);
+  }
+
+  // below are underlying C api
+  private IntPtr fd_paddleyolox_wrapper;
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_CreatesPaddleYOLOXWrapper")]
+  private static extern IntPtr FD_C_CreatesPaddleYOLOXWrapper(
+      string model_file, string params_file, string config_file,
+      IntPtr fd_runtime_option_wrapper, ModelFormat model_format);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyPaddleYOLOXWrapper")]
+  private static extern void
+  FD_C_DestroyPaddleYOLOXWrapper(IntPtr fd_paddleyolox_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_PaddleYOLOXWrapperPredict")]
+  private static extern bool
+  FD_C_PaddleYOLOXWrapperPredict(IntPtr fd_paddleyolox_wrapper, IntPtr img,
+                             ref FD_DetectionResult fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapper")]
+  private static extern IntPtr FD_C_CreateDetectionResultWrapper();
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DestroyDetectionResultWrapper")]
+  private static extern void
+  FD_C_DestroyDetectionResultWrapper(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyDetectionResult")]
+  private static extern void
+  FD_C_DestroyDetectionResult(IntPtr fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DetectionResultWrapperGetData")]
+  private static extern IntPtr
+  FD_C_DetectionResultWrapperGetData(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapperFromData")]
+  private static extern IntPtr
+  FD_C_CreateDetectionResultWrapperFromData(IntPtr fd_detection_result);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_PaddleYOLOXWrapperInitialized")]
+  private static extern bool
+  FD_C_PaddleYOLOXWrapperInitialized(IntPtr fd_c_paddleyolox_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_PaddleYOLOXWrapperBatchPredict")]
+  private static extern bool
+  FD_C_PaddleYOLOXWrapperBatchPredict(IntPtr fd_c_paddleyolox_wrapper,
+                                  ref FD_OneDimMat imgs,
+                                  ref FD_OneDimDetectionResult results);
+}
+
+// FasterRCNN
+
+public class FasterRCNN {
+
+  public FasterRCNN(string model_file, string params_file, string config_file,
+                 RuntimeOption custom_option = null,
+                 ModelFormat model_format = ModelFormat.PADDLE) {
+    if (custom_option == null) {
+      custom_option = new RuntimeOption();
+    }
+    fd_fasterrcnn_wrapper =
+        FD_C_CreatesFasterRCNNWrapper(model_file, params_file, config_file,
+                                   custom_option.GetWrapperPtr(), model_format);
+  }
+
+  ~FasterRCNN() { FD_C_DestroyFasterRCNNWrapper(fd_fasterrcnn_wrapper); }
+
+  public DetectionResult Predict(Mat img) {
+    FD_DetectionResult fd_detection_result = new FD_DetectionResult();
+    if(! FD_C_FasterRCNNWrapperPredict(fd_fasterrcnn_wrapper, img.CvPtr,
+                               ref fd_detection_result))
+    {
+      return null;
+    } // predict
+    
+    DetectionResult detection_result =
+        ConvertResult.ConvertCResultToDetectionResult(fd_detection_result);
+    return detection_result;
+  }
+
+  public List<DetectionResult> BatchPredict(List<Mat> imgs){
+    FD_OneDimMat imgs_in = new FD_OneDimMat();
+    imgs_in.size = (nuint)imgs.Count;
+    // Copy data to unmanaged memory
+    IntPtr[] mat_ptrs = new IntPtr[imgs_in.size];
+    for(int i=0;i < (int)imgs.Count; i++){
+      mat_ptrs[i] = imgs[i].CvPtr;
+    }
+    int size = Marshal.SizeOf(new IntPtr()) * (int)imgs_in.size;
+    imgs_in.data = Marshal.AllocHGlobal(size);
+    Marshal.Copy(mat_ptrs, 0, imgs_in.data,
+                 mat_ptrs.Length);
+    FD_OneDimDetectionResult fd_detection_result_array =  new FD_OneDimDetectionResult();
+    if(!FD_C_FasterRCNNWrapperBatchPredict(fd_fasterrcnn_wrapper, ref imgs_in, ref fd_detection_result_array)){
+      return null;
+    }
+    List<DetectionResult> results_out = new List<DetectionResult>();
+    for(int i=0;i < (int)imgs.Count; i++){
+      FD_DetectionResult fd_detection_result = (FD_DetectionResult)Marshal.PtrToStructure(
+          fd_detection_result_array.data + i * Marshal.SizeOf(new FD_DetectionResult()),
+          typeof(FD_DetectionResult));
+      results_out.Add(ConvertResult.ConvertCResultToDetectionResult(fd_detection_result));
+    }
+    return results_out;
+  }
+
+  public bool Initialized() {
+    return FD_C_FasterRCNNWrapperInitialized(fd_fasterrcnn_wrapper);
+  }
+
+  // below are underlying C api
+  private IntPtr fd_fasterrcnn_wrapper;
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_CreatesFasterRCNNWrapper")]
+  private static extern IntPtr FD_C_CreatesFasterRCNNWrapper(
+      string model_file, string params_file, string config_file,
+      IntPtr fd_runtime_option_wrapper, ModelFormat model_format);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyFasterRCNNWrapper")]
+  private static extern void
+  FD_C_DestroyFasterRCNNWrapper(IntPtr fd_fasterrcnn_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_FasterRCNNWrapperPredict")]
+  private static extern bool
+  FD_C_FasterRCNNWrapperPredict(IntPtr fd_fasterrcnn_wrapper, IntPtr img,
+                             ref FD_DetectionResult fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapper")]
+  private static extern IntPtr FD_C_CreateDetectionResultWrapper();
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DestroyDetectionResultWrapper")]
+  private static extern void
+  FD_C_DestroyDetectionResultWrapper(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyDetectionResult")]
+  private static extern void
+  FD_C_DestroyDetectionResult(IntPtr fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DetectionResultWrapperGetData")]
+  private static extern IntPtr
+  FD_C_DetectionResultWrapperGetData(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapperFromData")]
+  private static extern IntPtr
+  FD_C_CreateDetectionResultWrapperFromData(IntPtr fd_detection_result);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_FasterRCNNWrapperInitialized")]
+  private static extern bool
+  FD_C_FasterRCNNWrapperInitialized(IntPtr fd_c_fasterrcnn_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_FasterRCNNWrapperBatchPredict")]
+  private static extern bool
+  FD_C_FasterRCNNWrapperBatchPredict(IntPtr fd_c_fasterrcnn_wrapper,
+                                  ref FD_OneDimMat imgs,
+                                  ref FD_OneDimDetectionResult results);
+}
+
+// MaskRCNN
+
+public class MaskRCNN {
+
+  public MaskRCNN(string model_file, string params_file, string config_file,
+                 RuntimeOption custom_option = null,
+                 ModelFormat model_format = ModelFormat.PADDLE) {
+    if (custom_option == null) {
+      custom_option = new RuntimeOption();
+    }
+    fd_maskrcnn_wrapper =
+        FD_C_CreatesMaskRCNNWrapper(model_file, params_file, config_file,
+                                   custom_option.GetWrapperPtr(), model_format);
+  }
+
+  ~MaskRCNN() { FD_C_DestroyMaskRCNNWrapper(fd_maskrcnn_wrapper); }
+
+  public DetectionResult Predict(Mat img) {
+    FD_DetectionResult fd_detection_result = new FD_DetectionResult();
+    if(! FD_C_MaskRCNNWrapperPredict(fd_maskrcnn_wrapper, img.CvPtr,
+                               ref fd_detection_result))
+    {
+      return null;
+    } // predict
+    
+    DetectionResult detection_result =
+        ConvertResult.ConvertCResultToDetectionResult(fd_detection_result);
+    return detection_result;
+  }
+
+  public List<DetectionResult> BatchPredict(List<Mat> imgs){
+    FD_OneDimMat imgs_in = new FD_OneDimMat();
+    imgs_in.size = (nuint)imgs.Count;
+    // Copy data to unmanaged memory
+    IntPtr[] mat_ptrs = new IntPtr[imgs_in.size];
+    for(int i=0;i < (int)imgs.Count; i++){
+      mat_ptrs[i] = imgs[i].CvPtr;
+    }
+    int size = Marshal.SizeOf(new IntPtr()) * (int)imgs_in.size;
+    imgs_in.data = Marshal.AllocHGlobal(size);
+    Marshal.Copy(mat_ptrs, 0, imgs_in.data,
+                 mat_ptrs.Length);
+    FD_OneDimDetectionResult fd_detection_result_array =  new FD_OneDimDetectionResult();
+    if(!FD_C_MaskRCNNWrapperBatchPredict(fd_maskrcnn_wrapper, ref imgs_in, ref fd_detection_result_array)){
+      return null;
+    }
+    List<DetectionResult> results_out = new List<DetectionResult>();
+    for(int i=0;i < (int)imgs.Count; i++){
+      FD_DetectionResult fd_detection_result = (FD_DetectionResult)Marshal.PtrToStructure(
+          fd_detection_result_array.data + i * Marshal.SizeOf(new FD_DetectionResult()),
+          typeof(FD_DetectionResult));
+      results_out.Add(ConvertResult.ConvertCResultToDetectionResult(fd_detection_result));
+    }
+    return results_out;
+  }
+
+  public bool Initialized() {
+    return FD_C_MaskRCNNWrapperInitialized(fd_maskrcnn_wrapper);
+  }
+
+  // below are underlying C api
+  private IntPtr fd_maskrcnn_wrapper;
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_CreatesMaskRCNNWrapper")]
+  private static extern IntPtr FD_C_CreatesMaskRCNNWrapper(
+      string model_file, string params_file, string config_file,
+      IntPtr fd_runtime_option_wrapper, ModelFormat model_format);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyMaskRCNNWrapper")]
+  private static extern void
+  FD_C_DestroyMaskRCNNWrapper(IntPtr fd_maskrcnn_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_MaskRCNNWrapperPredict")]
+  private static extern bool
+  FD_C_MaskRCNNWrapperPredict(IntPtr fd_maskrcnn_wrapper, IntPtr img,
+                             ref FD_DetectionResult fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapper")]
+  private static extern IntPtr FD_C_CreateDetectionResultWrapper();
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DestroyDetectionResultWrapper")]
+  private static extern void
+  FD_C_DestroyDetectionResultWrapper(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyDetectionResult")]
+  private static extern void
+  FD_C_DestroyDetectionResult(IntPtr fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DetectionResultWrapperGetData")]
+  private static extern IntPtr
+  FD_C_DetectionResultWrapperGetData(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapperFromData")]
+  private static extern IntPtr
+  FD_C_CreateDetectionResultWrapperFromData(IntPtr fd_detection_result);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_MaskRCNNWrapperInitialized")]
+  private static extern bool
+  FD_C_MaskRCNNWrapperInitialized(IntPtr fd_c_maskrcnn_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_MaskRCNNWrapperBatchPredict")]
+  private static extern bool
+  FD_C_MaskRCNNWrapperBatchPredict(IntPtr fd_c_maskrcnn_wrapper,
+                                  ref FD_OneDimMat imgs,
+                                  ref FD_OneDimDetectionResult results);
+}
+
+// SSD
+
+public class SSD {
+
+  public SSD(string model_file, string params_file, string config_file,
+                 RuntimeOption custom_option = null,
+                 ModelFormat model_format = ModelFormat.PADDLE) {
+    if (custom_option == null) {
+      custom_option = new RuntimeOption();
+    }
+    fd_ssd_wrapper =
+        FD_C_CreatesSSDWrapper(model_file, params_file, config_file,
+                                   custom_option.GetWrapperPtr(), model_format);
+  }
+
+  ~SSD() { FD_C_DestroySSDWrapper(fd_ssd_wrapper); }
+
+  public DetectionResult Predict(Mat img) {
+    FD_DetectionResult fd_detection_result = new FD_DetectionResult();
+    if(! FD_C_SSDWrapperPredict(fd_ssd_wrapper, img.CvPtr,
+                               ref fd_detection_result))
+    {
+      return null;
+    } // predict
+    
+    DetectionResult detection_result =
+        ConvertResult.ConvertCResultToDetectionResult(fd_detection_result);
+    return detection_result;
+  }
+
+  public List<DetectionResult> BatchPredict(List<Mat> imgs){
+    FD_OneDimMat imgs_in = new FD_OneDimMat();
+    imgs_in.size = (nuint)imgs.Count;
+    // Copy data to unmanaged memory
+    IntPtr[] mat_ptrs = new IntPtr[imgs_in.size];
+    for(int i=0;i < (int)imgs.Count; i++){
+      mat_ptrs[i] = imgs[i].CvPtr;
+    }
+    int size = Marshal.SizeOf(new IntPtr()) * (int)imgs_in.size;
+    imgs_in.data = Marshal.AllocHGlobal(size);
+    Marshal.Copy(mat_ptrs, 0, imgs_in.data,
+                 mat_ptrs.Length);
+    FD_OneDimDetectionResult fd_detection_result_array =  new FD_OneDimDetectionResult();
+    if(!FD_C_SSDWrapperBatchPredict(fd_ssd_wrapper, ref imgs_in, ref fd_detection_result_array)){
+      return null;
+    }
+    List<DetectionResult> results_out = new List<DetectionResult>();
+    for(int i=0;i < (int)imgs.Count; i++){
+      FD_DetectionResult fd_detection_result = (FD_DetectionResult)Marshal.PtrToStructure(
+          fd_detection_result_array.data + i * Marshal.SizeOf(new FD_DetectionResult()),
+          typeof(FD_DetectionResult));
+      results_out.Add(ConvertResult.ConvertCResultToDetectionResult(fd_detection_result));
+    }
+    return results_out;
+  }
+
+  public bool Initialized() {
+    return FD_C_SSDWrapperInitialized(fd_ssd_wrapper);
+  }
+
+  // below are underlying C api
+  private IntPtr fd_ssd_wrapper;
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_CreatesSSDWrapper")]
+  private static extern IntPtr FD_C_CreatesSSDWrapper(
+      string model_file, string params_file, string config_file,
+      IntPtr fd_runtime_option_wrapper, ModelFormat model_format);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroySSDWrapper")]
+  private static extern void
+  FD_C_DestroySSDWrapper(IntPtr fd_ssd_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_SSDWrapperPredict")]
+  private static extern bool
+  FD_C_SSDWrapperPredict(IntPtr fd_ssd_wrapper, IntPtr img,
+                             ref FD_DetectionResult fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapper")]
+  private static extern IntPtr FD_C_CreateDetectionResultWrapper();
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DestroyDetectionResultWrapper")]
+  private static extern void
+  FD_C_DestroyDetectionResultWrapper(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyDetectionResult")]
+  private static extern void
+  FD_C_DestroyDetectionResult(IntPtr fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DetectionResultWrapperGetData")]
+  private static extern IntPtr
+  FD_C_DetectionResultWrapperGetData(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapperFromData")]
+  private static extern IntPtr
+  FD_C_CreateDetectionResultWrapperFromData(IntPtr fd_detection_result);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_SSDWrapperInitialized")]
+  private static extern bool
+  FD_C_SSDWrapperInitialized(IntPtr fd_c_ssd_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_SSDWrapperBatchPredict")]
+  private static extern bool
+  FD_C_SSDWrapperBatchPredict(IntPtr fd_c_ssd_wrapper,
+                                  ref FD_OneDimMat imgs,
+                                  ref FD_OneDimDetectionResult results);
+}
+
+// PaddleYOLOv5
+
+public class PaddleYOLOv5 {
+
+  public PaddleYOLOv5(string model_file, string params_file, string config_file,
+                 RuntimeOption custom_option = null,
+                 ModelFormat model_format = ModelFormat.PADDLE) {
+    if (custom_option == null) {
+      custom_option = new RuntimeOption();
+    }
+    fd_paddleyolov5_wrapper =
+        FD_C_CreatesPaddleYOLOv5Wrapper(model_file, params_file, config_file,
+                                   custom_option.GetWrapperPtr(), model_format);
+  }
+
+  ~PaddleYOLOv5() { FD_C_DestroyPaddleYOLOv5Wrapper(fd_paddleyolov5_wrapper); }
+
+  public DetectionResult Predict(Mat img) {
+    FD_DetectionResult fd_detection_result = new FD_DetectionResult();
+    if(! FD_C_PaddleYOLOv5WrapperPredict(fd_paddleyolov5_wrapper, img.CvPtr,
+                               ref fd_detection_result))
+    {
+      return null;
+    } // predict
+    
+    DetectionResult detection_result =
+        ConvertResult.ConvertCResultToDetectionResult(fd_detection_result);
+    return detection_result;
+  }
+
+  public List<DetectionResult> BatchPredict(List<Mat> imgs){
+    FD_OneDimMat imgs_in = new FD_OneDimMat();
+    imgs_in.size = (nuint)imgs.Count;
+    // Copy data to unmanaged memory
+    IntPtr[] mat_ptrs = new IntPtr[imgs_in.size];
+    for(int i=0;i < (int)imgs.Count; i++){
+      mat_ptrs[i] = imgs[i].CvPtr;
+    }
+    int size = Marshal.SizeOf(new IntPtr()) * (int)imgs_in.size;
+    imgs_in.data = Marshal.AllocHGlobal(size);
+    Marshal.Copy(mat_ptrs, 0, imgs_in.data,
+                 mat_ptrs.Length);
+    FD_OneDimDetectionResult fd_detection_result_array =  new FD_OneDimDetectionResult();
+    if(!FD_C_PaddleYOLOv5WrapperBatchPredict(fd_paddleyolov5_wrapper, ref imgs_in, ref fd_detection_result_array)){
+      return null;
+    }
+    List<DetectionResult> results_out = new List<DetectionResult>();
+    for(int i=0;i < (int)imgs.Count; i++){
+      FD_DetectionResult fd_detection_result = (FD_DetectionResult)Marshal.PtrToStructure(
+          fd_detection_result_array.data + i * Marshal.SizeOf(new FD_DetectionResult()),
+          typeof(FD_DetectionResult));
+      results_out.Add(ConvertResult.ConvertCResultToDetectionResult(fd_detection_result));
+    }
+    return results_out;
+  }
+
+  public bool Initialized() {
+    return FD_C_PaddleYOLOv5WrapperInitialized(fd_paddleyolov5_wrapper);
+  }
+
+  // below are underlying C api
+  private IntPtr fd_paddleyolov5_wrapper;
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_CreatesPaddleYOLOv5Wrapper")]
+  private static extern IntPtr FD_C_CreatesPaddleYOLOv5Wrapper(
+      string model_file, string params_file, string config_file,
+      IntPtr fd_runtime_option_wrapper, ModelFormat model_format);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyPaddleYOLOv5Wrapper")]
+  private static extern void
+  FD_C_DestroyPaddleYOLOv5Wrapper(IntPtr fd_paddleyolov5_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_PaddleYOLOv5WrapperPredict")]
+  private static extern bool
+  FD_C_PaddleYOLOv5WrapperPredict(IntPtr fd_paddleyolov5_wrapper, IntPtr img,
+                             ref FD_DetectionResult fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapper")]
+  private static extern IntPtr FD_C_CreateDetectionResultWrapper();
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DestroyDetectionResultWrapper")]
+  private static extern void
+  FD_C_DestroyDetectionResultWrapper(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyDetectionResult")]
+  private static extern void
+  FD_C_DestroyDetectionResult(IntPtr fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DetectionResultWrapperGetData")]
+  private static extern IntPtr
+  FD_C_DetectionResultWrapperGetData(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapperFromData")]
+  private static extern IntPtr
+  FD_C_CreateDetectionResultWrapperFromData(IntPtr fd_detection_result);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_PaddleYOLOv5WrapperInitialized")]
+  private static extern bool
+  FD_C_PaddleYOLOv5WrapperInitialized(IntPtr fd_c_paddleyolov5_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_PaddleYOLOv5WrapperBatchPredict")]
+  private static extern bool
+  FD_C_PaddleYOLOv5WrapperBatchPredict(IntPtr fd_c_paddleyolov5_wrapper,
+                                  ref FD_OneDimMat imgs,
+                                  ref FD_OneDimDetectionResult results);
+}
+
+// PaddleYOLOv6
+
+public class PaddleYOLOv6 {
+
+  public PaddleYOLOv6(string model_file, string params_file, string config_file,
+                 RuntimeOption custom_option = null,
+                 ModelFormat model_format = ModelFormat.PADDLE) {
+    if (custom_option == null) {
+      custom_option = new RuntimeOption();
+    }
+    fd_paddleyolov6_wrapper =
+        FD_C_CreatesPaddleYOLOv6Wrapper(model_file, params_file, config_file,
+                                   custom_option.GetWrapperPtr(), model_format);
+  }
+
+  ~PaddleYOLOv6() { FD_C_DestroyPaddleYOLOv6Wrapper(fd_paddleyolov6_wrapper); }
+
+  public DetectionResult Predict(Mat img) {
+    FD_DetectionResult fd_detection_result = new FD_DetectionResult();
+    if(! FD_C_PaddleYOLOv6WrapperPredict(fd_paddleyolov6_wrapper, img.CvPtr,
+                               ref fd_detection_result))
+    {
+      return null;
+    } // predict
+    
+    DetectionResult detection_result =
+        ConvertResult.ConvertCResultToDetectionResult(fd_detection_result);
+    return detection_result;
+  }
+
+  public List<DetectionResult> BatchPredict(List<Mat> imgs){
+    FD_OneDimMat imgs_in = new FD_OneDimMat();
+    imgs_in.size = (nuint)imgs.Count;
+    // Copy data to unmanaged memory
+    IntPtr[] mat_ptrs = new IntPtr[imgs_in.size];
+    for(int i=0;i < (int)imgs.Count; i++){
+      mat_ptrs[i] = imgs[i].CvPtr;
+    }
+    int size = Marshal.SizeOf(new IntPtr()) * (int)imgs_in.size;
+    imgs_in.data = Marshal.AllocHGlobal(size);
+    Marshal.Copy(mat_ptrs, 0, imgs_in.data,
+                 mat_ptrs.Length);
+    FD_OneDimDetectionResult fd_detection_result_array =  new FD_OneDimDetectionResult();
+    if(!FD_C_PaddleYOLOv6WrapperBatchPredict(fd_paddleyolov6_wrapper, ref imgs_in, ref fd_detection_result_array)){
+      return null;
+    }
+    List<DetectionResult> results_out = new List<DetectionResult>();
+    for(int i=0;i < (int)imgs.Count; i++){
+      FD_DetectionResult fd_detection_result = (FD_DetectionResult)Marshal.PtrToStructure(
+          fd_detection_result_array.data + i * Marshal.SizeOf(new FD_DetectionResult()),
+          typeof(FD_DetectionResult));
+      results_out.Add(ConvertResult.ConvertCResultToDetectionResult(fd_detection_result));
+    }
+    return results_out;
+  }
+
+  public bool Initialized() {
+    return FD_C_PaddleYOLOv6WrapperInitialized(fd_paddleyolov6_wrapper);
+  }
+
+  // below are underlying C api
+  private IntPtr fd_paddleyolov6_wrapper;
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_CreatesPaddleYOLOv6Wrapper")]
+  private static extern IntPtr FD_C_CreatesPaddleYOLOv6Wrapper(
+      string model_file, string params_file, string config_file,
+      IntPtr fd_runtime_option_wrapper, ModelFormat model_format);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyPaddleYOLOv6Wrapper")]
+  private static extern void
+  FD_C_DestroyPaddleYOLOv6Wrapper(IntPtr fd_paddleyolov6_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_PaddleYOLOv6WrapperPredict")]
+  private static extern bool
+  FD_C_PaddleYOLOv6WrapperPredict(IntPtr fd_paddleyolov6_wrapper, IntPtr img,
+                             ref FD_DetectionResult fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapper")]
+  private static extern IntPtr FD_C_CreateDetectionResultWrapper();
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DestroyDetectionResultWrapper")]
+  private static extern void
+  FD_C_DestroyDetectionResultWrapper(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyDetectionResult")]
+  private static extern void
+  FD_C_DestroyDetectionResult(IntPtr fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DetectionResultWrapperGetData")]
+  private static extern IntPtr
+  FD_C_DetectionResultWrapperGetData(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapperFromData")]
+  private static extern IntPtr
+  FD_C_CreateDetectionResultWrapperFromData(IntPtr fd_detection_result);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_PaddleYOLOv6WrapperInitialized")]
+  private static extern bool
+  FD_C_PaddleYOLOv6WrapperInitialized(IntPtr fd_c_paddleyolov6_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_PaddleYOLOv6WrapperBatchPredict")]
+  private static extern bool
+  FD_C_PaddleYOLOv6WrapperBatchPredict(IntPtr fd_c_paddleyolov6_wrapper,
+                                  ref FD_OneDimMat imgs,
+                                  ref FD_OneDimDetectionResult results);
+}
+
+// PaddleYOLOv7
+
+public class PaddleYOLOv7 {
+
+  public PaddleYOLOv7(string model_file, string params_file, string config_file,
+                 RuntimeOption custom_option = null,
+                 ModelFormat model_format = ModelFormat.PADDLE) {
+    if (custom_option == null) {
+      custom_option = new RuntimeOption();
+    }
+    fd_paddleyolov7_wrapper =
+        FD_C_CreatesPaddleYOLOv7Wrapper(model_file, params_file, config_file,
+                                   custom_option.GetWrapperPtr(), model_format);
+  }
+
+  ~PaddleYOLOv7() { FD_C_DestroyPaddleYOLOv7Wrapper(fd_paddleyolov7_wrapper); }
+
+  public DetectionResult Predict(Mat img) {
+    FD_DetectionResult fd_detection_result = new FD_DetectionResult();
+    if(! FD_C_PaddleYOLOv7WrapperPredict(fd_paddleyolov7_wrapper, img.CvPtr,
+                               ref fd_detection_result))
+    {
+      return null;
+    } // predict
+    
+    DetectionResult detection_result =
+        ConvertResult.ConvertCResultToDetectionResult(fd_detection_result);
+    return detection_result;
+  }
+
+  public List<DetectionResult> BatchPredict(List<Mat> imgs){
+    FD_OneDimMat imgs_in = new FD_OneDimMat();
+    imgs_in.size = (nuint)imgs.Count;
+    // Copy data to unmanaged memory
+    IntPtr[] mat_ptrs = new IntPtr[imgs_in.size];
+    for(int i=0;i < (int)imgs.Count; i++){
+      mat_ptrs[i] = imgs[i].CvPtr;
+    }
+    int size = Marshal.SizeOf(new IntPtr()) * (int)imgs_in.size;
+    imgs_in.data = Marshal.AllocHGlobal(size);
+    Marshal.Copy(mat_ptrs, 0, imgs_in.data,
+                 mat_ptrs.Length);
+    FD_OneDimDetectionResult fd_detection_result_array =  new FD_OneDimDetectionResult();
+    if(!FD_C_PaddleYOLOv7WrapperBatchPredict(fd_paddleyolov7_wrapper, ref imgs_in, ref fd_detection_result_array)){
+      return null;
+    }
+    List<DetectionResult> results_out = new List<DetectionResult>();
+    for(int i=0;i < (int)imgs.Count; i++){
+      FD_DetectionResult fd_detection_result = (FD_DetectionResult)Marshal.PtrToStructure(
+          fd_detection_result_array.data + i * Marshal.SizeOf(new FD_DetectionResult()),
+          typeof(FD_DetectionResult));
+      results_out.Add(ConvertResult.ConvertCResultToDetectionResult(fd_detection_result));
+    }
+    return results_out;
+  }
+
+  public bool Initialized() {
+    return FD_C_PaddleYOLOv7WrapperInitialized(fd_paddleyolov7_wrapper);
+  }
+
+  // below are underlying C api
+  private IntPtr fd_paddleyolov7_wrapper;
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_CreatesPaddleYOLOv7Wrapper")]
+  private static extern IntPtr FD_C_CreatesPaddleYOLOv7Wrapper(
+      string model_file, string params_file, string config_file,
+      IntPtr fd_runtime_option_wrapper, ModelFormat model_format);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyPaddleYOLOv7Wrapper")]
+  private static extern void
+  FD_C_DestroyPaddleYOLOv7Wrapper(IntPtr fd_paddleyolov7_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_PaddleYOLOv7WrapperPredict")]
+  private static extern bool
+  FD_C_PaddleYOLOv7WrapperPredict(IntPtr fd_paddleyolov7_wrapper, IntPtr img,
+                             ref FD_DetectionResult fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapper")]
+  private static extern IntPtr FD_C_CreateDetectionResultWrapper();
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DestroyDetectionResultWrapper")]
+  private static extern void
+  FD_C_DestroyDetectionResultWrapper(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyDetectionResult")]
+  private static extern void
+  FD_C_DestroyDetectionResult(IntPtr fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DetectionResultWrapperGetData")]
+  private static extern IntPtr
+  FD_C_DetectionResultWrapperGetData(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapperFromData")]
+  private static extern IntPtr
+  FD_C_CreateDetectionResultWrapperFromData(IntPtr fd_detection_result);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_PaddleYOLOv7WrapperInitialized")]
+  private static extern bool
+  FD_C_PaddleYOLOv7WrapperInitialized(IntPtr fd_c_paddleyolov7_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_PaddleYOLOv7WrapperBatchPredict")]
+  private static extern bool
+  FD_C_PaddleYOLOv7WrapperBatchPredict(IntPtr fd_c_paddleyolov7_wrapper,
+                                  ref FD_OneDimMat imgs,
+                                  ref FD_OneDimDetectionResult results);
+}
+
+// PaddleYOLOv8
+
+public class PaddleYOLOv8 {
+
+  public PaddleYOLOv8(string model_file, string params_file, string config_file,
+                 RuntimeOption custom_option = null,
+                 ModelFormat model_format = ModelFormat.PADDLE) {
+    if (custom_option == null) {
+      custom_option = new RuntimeOption();
+    }
+    fd_paddleyolov8_wrapper =
+        FD_C_CreatesPaddleYOLOv8Wrapper(model_file, params_file, config_file,
+                                   custom_option.GetWrapperPtr(), model_format);
+  }
+
+  ~PaddleYOLOv8() { FD_C_DestroyPaddleYOLOv8Wrapper(fd_paddleyolov8_wrapper); }
+
+  public DetectionResult Predict(Mat img) {
+    FD_DetectionResult fd_detection_result = new FD_DetectionResult();
+    if(! FD_C_PaddleYOLOv8WrapperPredict(fd_paddleyolov8_wrapper, img.CvPtr,
+                               ref fd_detection_result))
+    {
+      return null;
+    } // predict
+    
+    DetectionResult detection_result =
+        ConvertResult.ConvertCResultToDetectionResult(fd_detection_result);
+    return detection_result;
+  }
+
+  public List<DetectionResult> BatchPredict(List<Mat> imgs){
+    FD_OneDimMat imgs_in = new FD_OneDimMat();
+    imgs_in.size = (nuint)imgs.Count;
+    // Copy data to unmanaged memory
+    IntPtr[] mat_ptrs = new IntPtr[imgs_in.size];
+    for(int i=0;i < (int)imgs.Count; i++){
+      mat_ptrs[i] = imgs[i].CvPtr;
+    }
+    int size = Marshal.SizeOf(new IntPtr()) * (int)imgs_in.size;
+    imgs_in.data = Marshal.AllocHGlobal(size);
+    Marshal.Copy(mat_ptrs, 0, imgs_in.data,
+                 mat_ptrs.Length);
+    FD_OneDimDetectionResult fd_detection_result_array =  new FD_OneDimDetectionResult();
+    if(!FD_C_PaddleYOLOv8WrapperBatchPredict(fd_paddleyolov8_wrapper, ref imgs_in, ref fd_detection_result_array)){
+      return null;
+    }
+    List<DetectionResult> results_out = new List<DetectionResult>();
+    for(int i=0;i < (int)imgs.Count; i++){
+      FD_DetectionResult fd_detection_result = (FD_DetectionResult)Marshal.PtrToStructure(
+          fd_detection_result_array.data + i * Marshal.SizeOf(new FD_DetectionResult()),
+          typeof(FD_DetectionResult));
+      results_out.Add(ConvertResult.ConvertCResultToDetectionResult(fd_detection_result));
+    }
+    return results_out;
+  }
+
+  public bool Initialized() {
+    return FD_C_PaddleYOLOv8WrapperInitialized(fd_paddleyolov8_wrapper);
+  }
+
+  // below are underlying C api
+  private IntPtr fd_paddleyolov8_wrapper;
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_CreatesPaddleYOLOv8Wrapper")]
+  private static extern IntPtr FD_C_CreatesPaddleYOLOv8Wrapper(
+      string model_file, string params_file, string config_file,
+      IntPtr fd_runtime_option_wrapper, ModelFormat model_format);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyPaddleYOLOv8Wrapper")]
+  private static extern void
+  FD_C_DestroyPaddleYOLOv8Wrapper(IntPtr fd_paddleyolov8_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_PaddleYOLOv8WrapperPredict")]
+  private static extern bool
+  FD_C_PaddleYOLOv8WrapperPredict(IntPtr fd_paddleyolov8_wrapper, IntPtr img,
+                             ref FD_DetectionResult fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapper")]
+  private static extern IntPtr FD_C_CreateDetectionResultWrapper();
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DestroyDetectionResultWrapper")]
+  private static extern void
+  FD_C_DestroyDetectionResultWrapper(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyDetectionResult")]
+  private static extern void
+  FD_C_DestroyDetectionResult(IntPtr fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DetectionResultWrapperGetData")]
+  private static extern IntPtr
+  FD_C_DetectionResultWrapperGetData(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapperFromData")]
+  private static extern IntPtr
+  FD_C_CreateDetectionResultWrapperFromData(IntPtr fd_detection_result);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_PaddleYOLOv8WrapperInitialized")]
+  private static extern bool
+  FD_C_PaddleYOLOv8WrapperInitialized(IntPtr fd_c_paddleyolov8_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_PaddleYOLOv8WrapperBatchPredict")]
+  private static extern bool
+  FD_C_PaddleYOLOv8WrapperBatchPredict(IntPtr fd_c_paddleyolov8_wrapper,
+                                  ref FD_OneDimMat imgs,
+                                  ref FD_OneDimDetectionResult results);
+}
+
+// RTMDet
+
+public class RTMDet {
+
+  public RTMDet(string model_file, string params_file, string config_file,
+                 RuntimeOption custom_option = null,
+                 ModelFormat model_format = ModelFormat.PADDLE) {
+    if (custom_option == null) {
+      custom_option = new RuntimeOption();
+    }
+    fd_rtmdet_wrapper =
+        FD_C_CreatesRTMDetWrapper(model_file, params_file, config_file,
+                                   custom_option.GetWrapperPtr(), model_format);
+  }
+
+  ~RTMDet() { FD_C_DestroyRTMDetWrapper(fd_rtmdet_wrapper); }
+
+  public DetectionResult Predict(Mat img) {
+    FD_DetectionResult fd_detection_result = new FD_DetectionResult();
+    if(! FD_C_RTMDetWrapperPredict(fd_rtmdet_wrapper, img.CvPtr,
+                               ref fd_detection_result))
+    {
+      return null;
+    } // predict
+    
+    DetectionResult detection_result =
+        ConvertResult.ConvertCResultToDetectionResult(fd_detection_result);
+    return detection_result;
+  }
+
+  public List<DetectionResult> BatchPredict(List<Mat> imgs){
+    FD_OneDimMat imgs_in = new FD_OneDimMat();
+    imgs_in.size = (nuint)imgs.Count;
+    // Copy data to unmanaged memory
+    IntPtr[] mat_ptrs = new IntPtr[imgs_in.size];
+    for(int i=0;i < (int)imgs.Count; i++){
+      mat_ptrs[i] = imgs[i].CvPtr;
+    }
+    int size = Marshal.SizeOf(new IntPtr()) * (int)imgs_in.size;
+    imgs_in.data = Marshal.AllocHGlobal(size);
+    Marshal.Copy(mat_ptrs, 0, imgs_in.data,
+                 mat_ptrs.Length);
+    FD_OneDimDetectionResult fd_detection_result_array =  new FD_OneDimDetectionResult();
+    if(!FD_C_RTMDetWrapperBatchPredict(fd_rtmdet_wrapper, ref imgs_in, ref fd_detection_result_array)){
+      return null;
+    }
+    List<DetectionResult> results_out = new List<DetectionResult>();
+    for(int i=0;i < (int)imgs.Count; i++){
+      FD_DetectionResult fd_detection_result = (FD_DetectionResult)Marshal.PtrToStructure(
+          fd_detection_result_array.data + i * Marshal.SizeOf(new FD_DetectionResult()),
+          typeof(FD_DetectionResult));
+      results_out.Add(ConvertResult.ConvertCResultToDetectionResult(fd_detection_result));
+    }
+    return results_out;
+  }
+
+  public bool Initialized() {
+    return FD_C_RTMDetWrapperInitialized(fd_rtmdet_wrapper);
+  }
+
+  // below are underlying C api
+  private IntPtr fd_rtmdet_wrapper;
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_CreatesRTMDetWrapper")]
+  private static extern IntPtr FD_C_CreatesRTMDetWrapper(
+      string model_file, string params_file, string config_file,
+      IntPtr fd_runtime_option_wrapper, ModelFormat model_format);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyRTMDetWrapper")]
+  private static extern void
+  FD_C_DestroyRTMDetWrapper(IntPtr fd_rtmdet_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_RTMDetWrapperPredict")]
+  private static extern bool
+  FD_C_RTMDetWrapperPredict(IntPtr fd_rtmdet_wrapper, IntPtr img,
+                             ref FD_DetectionResult fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapper")]
+  private static extern IntPtr FD_C_CreateDetectionResultWrapper();
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DestroyDetectionResultWrapper")]
+  private static extern void
+  FD_C_DestroyDetectionResultWrapper(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyDetectionResult")]
+  private static extern void
+  FD_C_DestroyDetectionResult(IntPtr fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DetectionResultWrapperGetData")]
+  private static extern IntPtr
+  FD_C_DetectionResultWrapperGetData(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapperFromData")]
+  private static extern IntPtr
+  FD_C_CreateDetectionResultWrapperFromData(IntPtr fd_detection_result);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RTMDetWrapperInitialized")]
+  private static extern bool
+  FD_C_RTMDetWrapperInitialized(IntPtr fd_c_rtmdet_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RTMDetWrapperBatchPredict")]
+  private static extern bool
+  FD_C_RTMDetWrapperBatchPredict(IntPtr fd_c_rtmdet_wrapper,
+                                  ref FD_OneDimMat imgs,
+                                  ref FD_OneDimDetectionResult results);
+}
+
+// CascadeRCNN
+
+public class CascadeRCNN {
+
+  public CascadeRCNN(string model_file, string params_file, string config_file,
+                 RuntimeOption custom_option = null,
+                 ModelFormat model_format = ModelFormat.PADDLE) {
+    if (custom_option == null) {
+      custom_option = new RuntimeOption();
+    }
+    fd_cascadercnn_wrapper =
+        FD_C_CreatesCascadeRCNNWrapper(model_file, params_file, config_file,
+                                   custom_option.GetWrapperPtr(), model_format);
+  }
+
+  ~CascadeRCNN() { FD_C_DestroyCascadeRCNNWrapper(fd_cascadercnn_wrapper); }
+
+  public DetectionResult Predict(Mat img) {
+    FD_DetectionResult fd_detection_result = new FD_DetectionResult();
+    if(! FD_C_CascadeRCNNWrapperPredict(fd_cascadercnn_wrapper, img.CvPtr,
+                               ref fd_detection_result))
+    {
+      return null;
+    } // predict
+    
+    DetectionResult detection_result =
+        ConvertResult.ConvertCResultToDetectionResult(fd_detection_result);
+    return detection_result;
+  }
+
+  public List<DetectionResult> BatchPredict(List<Mat> imgs){
+    FD_OneDimMat imgs_in = new FD_OneDimMat();
+    imgs_in.size = (nuint)imgs.Count;
+    // Copy data to unmanaged memory
+    IntPtr[] mat_ptrs = new IntPtr[imgs_in.size];
+    for(int i=0;i < (int)imgs.Count; i++){
+      mat_ptrs[i] = imgs[i].CvPtr;
+    }
+    int size = Marshal.SizeOf(new IntPtr()) * (int)imgs_in.size;
+    imgs_in.data = Marshal.AllocHGlobal(size);
+    Marshal.Copy(mat_ptrs, 0, imgs_in.data,
+                 mat_ptrs.Length);
+    FD_OneDimDetectionResult fd_detection_result_array =  new FD_OneDimDetectionResult();
+    if(!FD_C_CascadeRCNNWrapperBatchPredict(fd_cascadercnn_wrapper, ref imgs_in, ref fd_detection_result_array)){
+      return null;
+    }
+    List<DetectionResult> results_out = new List<DetectionResult>();
+    for(int i=0;i < (int)imgs.Count; i++){
+      FD_DetectionResult fd_detection_result = (FD_DetectionResult)Marshal.PtrToStructure(
+          fd_detection_result_array.data + i * Marshal.SizeOf(new FD_DetectionResult()),
+          typeof(FD_DetectionResult));
+      results_out.Add(ConvertResult.ConvertCResultToDetectionResult(fd_detection_result));
+    }
+    return results_out;
+  }
+
+  public bool Initialized() {
+    return FD_C_CascadeRCNNWrapperInitialized(fd_cascadercnn_wrapper);
+  }
+
+  // below are underlying C api
+  private IntPtr fd_cascadercnn_wrapper;
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_CreatesCascadeRCNNWrapper")]
+  private static extern IntPtr FD_C_CreatesCascadeRCNNWrapper(
+      string model_file, string params_file, string config_file,
+      IntPtr fd_runtime_option_wrapper, ModelFormat model_format);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyCascadeRCNNWrapper")]
+  private static extern void
+  FD_C_DestroyCascadeRCNNWrapper(IntPtr fd_cascadercnn_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_CascadeRCNNWrapperPredict")]
+  private static extern bool
+  FD_C_CascadeRCNNWrapperPredict(IntPtr fd_cascadercnn_wrapper, IntPtr img,
+                             ref FD_DetectionResult fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapper")]
+  private static extern IntPtr FD_C_CreateDetectionResultWrapper();
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DestroyDetectionResultWrapper")]
+  private static extern void
+  FD_C_DestroyDetectionResultWrapper(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyDetectionResult")]
+  private static extern void
+  FD_C_DestroyDetectionResult(IntPtr fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DetectionResultWrapperGetData")]
+  private static extern IntPtr
+  FD_C_DetectionResultWrapperGetData(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapperFromData")]
+  private static extern IntPtr
+  FD_C_CreateDetectionResultWrapperFromData(IntPtr fd_detection_result);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CascadeRCNNWrapperInitialized")]
+  private static extern bool
+  FD_C_CascadeRCNNWrapperInitialized(IntPtr fd_c_cascadercnn_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CascadeRCNNWrapperBatchPredict")]
+  private static extern bool
+  FD_C_CascadeRCNNWrapperBatchPredict(IntPtr fd_c_cascadercnn_wrapper,
+                                  ref FD_OneDimMat imgs,
+                                  ref FD_OneDimDetectionResult results);
+}
+
+// PSSDet
+
+public class PSSDet {
+
+  public PSSDet(string model_file, string params_file, string config_file,
+                 RuntimeOption custom_option = null,
+                 ModelFormat model_format = ModelFormat.PADDLE) {
+    if (custom_option == null) {
+      custom_option = new RuntimeOption();
+    }
+    fd_pssdet_wrapper =
+        FD_C_CreatesPSSDetWrapper(model_file, params_file, config_file,
+                                   custom_option.GetWrapperPtr(), model_format);
+  }
+
+  ~PSSDet() { FD_C_DestroyPSSDetWrapper(fd_pssdet_wrapper); }
+
+  public DetectionResult Predict(Mat img) {
+    FD_DetectionResult fd_detection_result = new FD_DetectionResult();
+    if(! FD_C_PSSDetWrapperPredict(fd_pssdet_wrapper, img.CvPtr,
+                               ref fd_detection_result))
+    {
+      return null;
+    } // predict
+    
+    DetectionResult detection_result =
+        ConvertResult.ConvertCResultToDetectionResult(fd_detection_result);
+    return detection_result;
+  }
+
+  public List<DetectionResult> BatchPredict(List<Mat> imgs){
+    FD_OneDimMat imgs_in = new FD_OneDimMat();
+    imgs_in.size = (nuint)imgs.Count;
+    // Copy data to unmanaged memory
+    IntPtr[] mat_ptrs = new IntPtr[imgs_in.size];
+    for(int i=0;i < (int)imgs.Count; i++){
+      mat_ptrs[i] = imgs[i].CvPtr;
+    }
+    int size = Marshal.SizeOf(new IntPtr()) * (int)imgs_in.size;
+    imgs_in.data = Marshal.AllocHGlobal(size);
+    Marshal.Copy(mat_ptrs, 0, imgs_in.data,
+                 mat_ptrs.Length);
+    FD_OneDimDetectionResult fd_detection_result_array =  new FD_OneDimDetectionResult();
+    if(!FD_C_PSSDetWrapperBatchPredict(fd_pssdet_wrapper, ref imgs_in, ref fd_detection_result_array)){
+      return null;
+    }
+    List<DetectionResult> results_out = new List<DetectionResult>();
+    for(int i=0;i < (int)imgs.Count; i++){
+      FD_DetectionResult fd_detection_result = (FD_DetectionResult)Marshal.PtrToStructure(
+          fd_detection_result_array.data + i * Marshal.SizeOf(new FD_DetectionResult()),
+          typeof(FD_DetectionResult));
+      results_out.Add(ConvertResult.ConvertCResultToDetectionResult(fd_detection_result));
+    }
+    return results_out;
+  }
+
+  public bool Initialized() {
+    return FD_C_PSSDetWrapperInitialized(fd_pssdet_wrapper);
+  }
+
+  // below are underlying C api
+  private IntPtr fd_pssdet_wrapper;
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_CreatesPSSDetWrapper")]
+  private static extern IntPtr FD_C_CreatesPSSDetWrapper(
+      string model_file, string params_file, string config_file,
+      IntPtr fd_runtime_option_wrapper, ModelFormat model_format);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyPSSDetWrapper")]
+  private static extern void
+  FD_C_DestroyPSSDetWrapper(IntPtr fd_pssdet_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_PSSDetWrapperPredict")]
+  private static extern bool
+  FD_C_PSSDetWrapperPredict(IntPtr fd_pssdet_wrapper, IntPtr img,
+                             ref FD_DetectionResult fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapper")]
+  private static extern IntPtr FD_C_CreateDetectionResultWrapper();
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DestroyDetectionResultWrapper")]
+  private static extern void
+  FD_C_DestroyDetectionResultWrapper(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyDetectionResult")]
+  private static extern void
+  FD_C_DestroyDetectionResult(IntPtr fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DetectionResultWrapperGetData")]
+  private static extern IntPtr
+  FD_C_DetectionResultWrapperGetData(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapperFromData")]
+  private static extern IntPtr
+  FD_C_CreateDetectionResultWrapperFromData(IntPtr fd_detection_result);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_PSSDetWrapperInitialized")]
+  private static extern bool
+  FD_C_PSSDetWrapperInitialized(IntPtr fd_c_pssdet_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_PSSDetWrapperBatchPredict")]
+  private static extern bool
+  FD_C_PSSDetWrapperBatchPredict(IntPtr fd_c_pssdet_wrapper,
+                                  ref FD_OneDimMat imgs,
+                                  ref FD_OneDimDetectionResult results);
+}
+
+// RetinaNet
+
+public class RetinaNet {
+
+  public RetinaNet(string model_file, string params_file, string config_file,
+                 RuntimeOption custom_option = null,
+                 ModelFormat model_format = ModelFormat.PADDLE) {
+    if (custom_option == null) {
+      custom_option = new RuntimeOption();
+    }
+    fd_retinanet_wrapper =
+        FD_C_CreatesRetinaNetWrapper(model_file, params_file, config_file,
+                                   custom_option.GetWrapperPtr(), model_format);
+  }
+
+  ~RetinaNet() { FD_C_DestroyRetinaNetWrapper(fd_retinanet_wrapper); }
+
+  public DetectionResult Predict(Mat img) {
+    FD_DetectionResult fd_detection_result = new FD_DetectionResult();
+    if(! FD_C_RetinaNetWrapperPredict(fd_retinanet_wrapper, img.CvPtr,
+                               ref fd_detection_result))
+    {
+      return null;
+    } // predict
+    
+    DetectionResult detection_result =
+        ConvertResult.ConvertCResultToDetectionResult(fd_detection_result);
+    return detection_result;
+  }
+
+  public List<DetectionResult> BatchPredict(List<Mat> imgs){
+    FD_OneDimMat imgs_in = new FD_OneDimMat();
+    imgs_in.size = (nuint)imgs.Count;
+    // Copy data to unmanaged memory
+    IntPtr[] mat_ptrs = new IntPtr[imgs_in.size];
+    for(int i=0;i < (int)imgs.Count; i++){
+      mat_ptrs[i] = imgs[i].CvPtr;
+    }
+    int size = Marshal.SizeOf(new IntPtr()) * (int)imgs_in.size;
+    imgs_in.data = Marshal.AllocHGlobal(size);
+    Marshal.Copy(mat_ptrs, 0, imgs_in.data,
+                 mat_ptrs.Length);
+    FD_OneDimDetectionResult fd_detection_result_array =  new FD_OneDimDetectionResult();
+    if(!FD_C_RetinaNetWrapperBatchPredict(fd_retinanet_wrapper, ref imgs_in, ref fd_detection_result_array)){
+      return null;
+    }
+    List<DetectionResult> results_out = new List<DetectionResult>();
+    for(int i=0;i < (int)imgs.Count; i++){
+      FD_DetectionResult fd_detection_result = (FD_DetectionResult)Marshal.PtrToStructure(
+          fd_detection_result_array.data + i * Marshal.SizeOf(new FD_DetectionResult()),
+          typeof(FD_DetectionResult));
+      results_out.Add(ConvertResult.ConvertCResultToDetectionResult(fd_detection_result));
+    }
+    return results_out;
+  }
+
+  public bool Initialized() {
+    return FD_C_RetinaNetWrapperInitialized(fd_retinanet_wrapper);
+  }
+
+  // below are underlying C api
+  private IntPtr fd_retinanet_wrapper;
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_CreatesRetinaNetWrapper")]
+  private static extern IntPtr FD_C_CreatesRetinaNetWrapper(
+      string model_file, string params_file, string config_file,
+      IntPtr fd_runtime_option_wrapper, ModelFormat model_format);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyRetinaNetWrapper")]
+  private static extern void
+  FD_C_DestroyRetinaNetWrapper(IntPtr fd_retinanet_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_RetinaNetWrapperPredict")]
+  private static extern bool
+  FD_C_RetinaNetWrapperPredict(IntPtr fd_retinanet_wrapper, IntPtr img,
+                             ref FD_DetectionResult fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapper")]
+  private static extern IntPtr FD_C_CreateDetectionResultWrapper();
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DestroyDetectionResultWrapper")]
+  private static extern void
+  FD_C_DestroyDetectionResultWrapper(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyDetectionResult")]
+  private static extern void
+  FD_C_DestroyDetectionResult(IntPtr fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DetectionResultWrapperGetData")]
+  private static extern IntPtr
+  FD_C_DetectionResultWrapperGetData(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapperFromData")]
+  private static extern IntPtr
+  FD_C_CreateDetectionResultWrapperFromData(IntPtr fd_detection_result);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RetinaNetWrapperInitialized")]
+  private static extern bool
+  FD_C_RetinaNetWrapperInitialized(IntPtr fd_c_retinanet_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_RetinaNetWrapperBatchPredict")]
+  private static extern bool
+  FD_C_RetinaNetWrapperBatchPredict(IntPtr fd_c_retinanet_wrapper,
+                                  ref FD_OneDimMat imgs,
+                                  ref FD_OneDimDetectionResult results);
+}
+
+// FCOS
+
+public class FCOS {
+
+  public FCOS(string model_file, string params_file, string config_file,
+                 RuntimeOption custom_option = null,
+                 ModelFormat model_format = ModelFormat.PADDLE) {
+    if (custom_option == null) {
+      custom_option = new RuntimeOption();
+    }
+    fd_fcos_wrapper =
+        FD_C_CreatesFCOSWrapper(model_file, params_file, config_file,
+                                   custom_option.GetWrapperPtr(), model_format);
+  }
+
+  ~FCOS() { FD_C_DestroyFCOSWrapper(fd_fcos_wrapper); }
+
+  public DetectionResult Predict(Mat img) {
+    FD_DetectionResult fd_detection_result = new FD_DetectionResult();
+    if(! FD_C_FCOSWrapperPredict(fd_fcos_wrapper, img.CvPtr,
+                               ref fd_detection_result))
+    {
+      return null;
+    } // predict
+    
+    DetectionResult detection_result =
+        ConvertResult.ConvertCResultToDetectionResult(fd_detection_result);
+    return detection_result;
+  }
+
+  public List<DetectionResult> BatchPredict(List<Mat> imgs){
+    FD_OneDimMat imgs_in = new FD_OneDimMat();
+    imgs_in.size = (nuint)imgs.Count;
+    // Copy data to unmanaged memory
+    IntPtr[] mat_ptrs = new IntPtr[imgs_in.size];
+    for(int i=0;i < (int)imgs.Count; i++){
+      mat_ptrs[i] = imgs[i].CvPtr;
+    }
+    int size = Marshal.SizeOf(new IntPtr()) * (int)imgs_in.size;
+    imgs_in.data = Marshal.AllocHGlobal(size);
+    Marshal.Copy(mat_ptrs, 0, imgs_in.data,
+                 mat_ptrs.Length);
+    FD_OneDimDetectionResult fd_detection_result_array =  new FD_OneDimDetectionResult();
+    if(!FD_C_FCOSWrapperBatchPredict(fd_fcos_wrapper, ref imgs_in, ref fd_detection_result_array)){
+      return null;
+    }
+    List<DetectionResult> results_out = new List<DetectionResult>();
+    for(int i=0;i < (int)imgs.Count; i++){
+      FD_DetectionResult fd_detection_result = (FD_DetectionResult)Marshal.PtrToStructure(
+          fd_detection_result_array.data + i * Marshal.SizeOf(new FD_DetectionResult()),
+          typeof(FD_DetectionResult));
+      results_out.Add(ConvertResult.ConvertCResultToDetectionResult(fd_detection_result));
+    }
+    return results_out;
+  }
+
+  public bool Initialized() {
+    return FD_C_FCOSWrapperInitialized(fd_fcos_wrapper);
+  }
+
+  // below are underlying C api
+  private IntPtr fd_fcos_wrapper;
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_CreatesFCOSWrapper")]
+  private static extern IntPtr FD_C_CreatesFCOSWrapper(
+      string model_file, string params_file, string config_file,
+      IntPtr fd_runtime_option_wrapper, ModelFormat model_format);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyFCOSWrapper")]
+  private static extern void
+  FD_C_DestroyFCOSWrapper(IntPtr fd_fcos_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_FCOSWrapperPredict")]
+  private static extern bool
+  FD_C_FCOSWrapperPredict(IntPtr fd_fcos_wrapper, IntPtr img,
+                             ref FD_DetectionResult fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapper")]
+  private static extern IntPtr FD_C_CreateDetectionResultWrapper();
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DestroyDetectionResultWrapper")]
+  private static extern void
+  FD_C_DestroyDetectionResultWrapper(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyDetectionResult")]
+  private static extern void
+  FD_C_DestroyDetectionResult(IntPtr fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DetectionResultWrapperGetData")]
+  private static extern IntPtr
+  FD_C_DetectionResultWrapperGetData(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapperFromData")]
+  private static extern IntPtr
+  FD_C_CreateDetectionResultWrapperFromData(IntPtr fd_detection_result);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_FCOSWrapperInitialized")]
+  private static extern bool
+  FD_C_FCOSWrapperInitialized(IntPtr fd_c_fcos_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_FCOSWrapperBatchPredict")]
+  private static extern bool
+  FD_C_FCOSWrapperBatchPredict(IntPtr fd_c_fcos_wrapper,
+                                  ref FD_OneDimMat imgs,
+                                  ref FD_OneDimDetectionResult results);
+}
+
+// TTFNet
+
+public class TTFNet {
+
+  public TTFNet(string model_file, string params_file, string config_file,
+                 RuntimeOption custom_option = null,
+                 ModelFormat model_format = ModelFormat.PADDLE) {
+    if (custom_option == null) {
+      custom_option = new RuntimeOption();
+    }
+    fd_ttfnet_wrapper =
+        FD_C_CreatesTTFNetWrapper(model_file, params_file, config_file,
+                                   custom_option.GetWrapperPtr(), model_format);
+  }
+
+  ~TTFNet() { FD_C_DestroyTTFNetWrapper(fd_ttfnet_wrapper); }
+
+  public DetectionResult Predict(Mat img) {
+    FD_DetectionResult fd_detection_result = new FD_DetectionResult();
+    if(! FD_C_TTFNetWrapperPredict(fd_ttfnet_wrapper, img.CvPtr,
+                               ref fd_detection_result))
+    {
+      return null;
+    } // predict
+    
+    DetectionResult detection_result =
+        ConvertResult.ConvertCResultToDetectionResult(fd_detection_result);
+    return detection_result;
+  }
+
+  public List<DetectionResult> BatchPredict(List<Mat> imgs){
+    FD_OneDimMat imgs_in = new FD_OneDimMat();
+    imgs_in.size = (nuint)imgs.Count;
+    // Copy data to unmanaged memory
+    IntPtr[] mat_ptrs = new IntPtr[imgs_in.size];
+    for(int i=0;i < (int)imgs.Count; i++){
+      mat_ptrs[i] = imgs[i].CvPtr;
+    }
+    int size = Marshal.SizeOf(new IntPtr()) * (int)imgs_in.size;
+    imgs_in.data = Marshal.AllocHGlobal(size);
+    Marshal.Copy(mat_ptrs, 0, imgs_in.data,
+                 mat_ptrs.Length);
+    FD_OneDimDetectionResult fd_detection_result_array =  new FD_OneDimDetectionResult();
+    if(!FD_C_TTFNetWrapperBatchPredict(fd_ttfnet_wrapper, ref imgs_in, ref fd_detection_result_array)){
+      return null;
+    }
+    List<DetectionResult> results_out = new List<DetectionResult>();
+    for(int i=0;i < (int)imgs.Count; i++){
+      FD_DetectionResult fd_detection_result = (FD_DetectionResult)Marshal.PtrToStructure(
+          fd_detection_result_array.data + i * Marshal.SizeOf(new FD_DetectionResult()),
+          typeof(FD_DetectionResult));
+      results_out.Add(ConvertResult.ConvertCResultToDetectionResult(fd_detection_result));
+    }
+    return results_out;
+  }
+
+  public bool Initialized() {
+    return FD_C_TTFNetWrapperInitialized(fd_ttfnet_wrapper);
+  }
+
+  // below are underlying C api
+  private IntPtr fd_ttfnet_wrapper;
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_CreatesTTFNetWrapper")]
+  private static extern IntPtr FD_C_CreatesTTFNetWrapper(
+      string model_file, string params_file, string config_file,
+      IntPtr fd_runtime_option_wrapper, ModelFormat model_format);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyTTFNetWrapper")]
+  private static extern void
+  FD_C_DestroyTTFNetWrapper(IntPtr fd_ttfnet_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_TTFNetWrapperPredict")]
+  private static extern bool
+  FD_C_TTFNetWrapperPredict(IntPtr fd_ttfnet_wrapper, IntPtr img,
+                             ref FD_DetectionResult fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapper")]
+  private static extern IntPtr FD_C_CreateDetectionResultWrapper();
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DestroyDetectionResultWrapper")]
+  private static extern void
+  FD_C_DestroyDetectionResultWrapper(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyDetectionResult")]
+  private static extern void
+  FD_C_DestroyDetectionResult(IntPtr fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DetectionResultWrapperGetData")]
+  private static extern IntPtr
+  FD_C_DetectionResultWrapperGetData(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapperFromData")]
+  private static extern IntPtr
+  FD_C_CreateDetectionResultWrapperFromData(IntPtr fd_detection_result);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_TTFNetWrapperInitialized")]
+  private static extern bool
+  FD_C_TTFNetWrapperInitialized(IntPtr fd_c_ttfnet_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_TTFNetWrapperBatchPredict")]
+  private static extern bool
+  FD_C_TTFNetWrapperBatchPredict(IntPtr fd_c_ttfnet_wrapper,
+                                  ref FD_OneDimMat imgs,
+                                  ref FD_OneDimDetectionResult results);
+}
+
+// TOOD
+
+public class TOOD {
+
+  public TOOD(string model_file, string params_file, string config_file,
+                 RuntimeOption custom_option = null,
+                 ModelFormat model_format = ModelFormat.PADDLE) {
+    if (custom_option == null) {
+      custom_option = new RuntimeOption();
+    }
+    fd_tood_wrapper =
+        FD_C_CreatesTOODWrapper(model_file, params_file, config_file,
+                                   custom_option.GetWrapperPtr(), model_format);
+  }
+
+  ~TOOD() { FD_C_DestroyTOODWrapper(fd_tood_wrapper); }
+
+  public DetectionResult Predict(Mat img) {
+    FD_DetectionResult fd_detection_result = new FD_DetectionResult();
+    if(! FD_C_TOODWrapperPredict(fd_tood_wrapper, img.CvPtr,
+                               ref fd_detection_result))
+    {
+      return null;
+    } // predict
+    
+    DetectionResult detection_result =
+        ConvertResult.ConvertCResultToDetectionResult(fd_detection_result);
+    return detection_result;
+  }
+
+  public List<DetectionResult> BatchPredict(List<Mat> imgs){
+    FD_OneDimMat imgs_in = new FD_OneDimMat();
+    imgs_in.size = (nuint)imgs.Count;
+    // Copy data to unmanaged memory
+    IntPtr[] mat_ptrs = new IntPtr[imgs_in.size];
+    for(int i=0;i < (int)imgs.Count; i++){
+      mat_ptrs[i] = imgs[i].CvPtr;
+    }
+    int size = Marshal.SizeOf(new IntPtr()) * (int)imgs_in.size;
+    imgs_in.data = Marshal.AllocHGlobal(size);
+    Marshal.Copy(mat_ptrs, 0, imgs_in.data,
+                 mat_ptrs.Length);
+    FD_OneDimDetectionResult fd_detection_result_array =  new FD_OneDimDetectionResult();
+    if(!FD_C_TOODWrapperBatchPredict(fd_tood_wrapper, ref imgs_in, ref fd_detection_result_array)){
+      return null;
+    }
+    List<DetectionResult> results_out = new List<DetectionResult>();
+    for(int i=0;i < (int)imgs.Count; i++){
+      FD_DetectionResult fd_detection_result = (FD_DetectionResult)Marshal.PtrToStructure(
+          fd_detection_result_array.data + i * Marshal.SizeOf(new FD_DetectionResult()),
+          typeof(FD_DetectionResult));
+      results_out.Add(ConvertResult.ConvertCResultToDetectionResult(fd_detection_result));
+    }
+    return results_out;
+  }
+
+  public bool Initialized() {
+    return FD_C_TOODWrapperInitialized(fd_tood_wrapper);
+  }
+
+  // below are underlying C api
+  private IntPtr fd_tood_wrapper;
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_CreatesTOODWrapper")]
+  private static extern IntPtr FD_C_CreatesTOODWrapper(
+      string model_file, string params_file, string config_file,
+      IntPtr fd_runtime_option_wrapper, ModelFormat model_format);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyTOODWrapper")]
+  private static extern void
+  FD_C_DestroyTOODWrapper(IntPtr fd_tood_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_TOODWrapperPredict")]
+  private static extern bool
+  FD_C_TOODWrapperPredict(IntPtr fd_tood_wrapper, IntPtr img,
+                             ref FD_DetectionResult fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapper")]
+  private static extern IntPtr FD_C_CreateDetectionResultWrapper();
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DestroyDetectionResultWrapper")]
+  private static extern void
+  FD_C_DestroyDetectionResultWrapper(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyDetectionResult")]
+  private static extern void
+  FD_C_DestroyDetectionResult(IntPtr fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DetectionResultWrapperGetData")]
+  private static extern IntPtr
+  FD_C_DetectionResultWrapperGetData(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapperFromData")]
+  private static extern IntPtr
+  FD_C_CreateDetectionResultWrapperFromData(IntPtr fd_detection_result);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_TOODWrapperInitialized")]
+  private static extern bool
+  FD_C_TOODWrapperInitialized(IntPtr fd_c_tood_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_TOODWrapperBatchPredict")]
+  private static extern bool
+  FD_C_TOODWrapperBatchPredict(IntPtr fd_c_tood_wrapper,
+                                  ref FD_OneDimMat imgs,
+                                  ref FD_OneDimDetectionResult results);
+}
+
+// GFL
+
+public class GFL {
+
+  public GFL(string model_file, string params_file, string config_file,
+                 RuntimeOption custom_option = null,
+                 ModelFormat model_format = ModelFormat.PADDLE) {
+    if (custom_option == null) {
+      custom_option = new RuntimeOption();
+    }
+    fd_gfl_wrapper =
+        FD_C_CreatesGFLWrapper(model_file, params_file, config_file,
+                                   custom_option.GetWrapperPtr(), model_format);
+  }
+
+  ~GFL() { FD_C_DestroyGFLWrapper(fd_gfl_wrapper); }
+
+  public DetectionResult Predict(Mat img) {
+    FD_DetectionResult fd_detection_result = new FD_DetectionResult();
+    if(! FD_C_GFLWrapperPredict(fd_gfl_wrapper, img.CvPtr,
+                               ref fd_detection_result))
+    {
+      return null;
+    } // predict
+    
+    DetectionResult detection_result =
+        ConvertResult.ConvertCResultToDetectionResult(fd_detection_result);
+    return detection_result;
+  }
+
+  public List<DetectionResult> BatchPredict(List<Mat> imgs){
+    FD_OneDimMat imgs_in = new FD_OneDimMat();
+    imgs_in.size = (nuint)imgs.Count;
+    // Copy data to unmanaged memory
+    IntPtr[] mat_ptrs = new IntPtr[imgs_in.size];
+    for(int i=0;i < (int)imgs.Count; i++){
+      mat_ptrs[i] = imgs[i].CvPtr;
+    }
+    int size = Marshal.SizeOf(new IntPtr()) * (int)imgs_in.size;
+    imgs_in.data = Marshal.AllocHGlobal(size);
+    Marshal.Copy(mat_ptrs, 0, imgs_in.data,
+                 mat_ptrs.Length);
+    FD_OneDimDetectionResult fd_detection_result_array =  new FD_OneDimDetectionResult();
+    if(!FD_C_GFLWrapperBatchPredict(fd_gfl_wrapper, ref imgs_in, ref fd_detection_result_array)){
+      return null;
+    }
+    List<DetectionResult> results_out = new List<DetectionResult>();
+    for(int i=0;i < (int)imgs.Count; i++){
+      FD_DetectionResult fd_detection_result = (FD_DetectionResult)Marshal.PtrToStructure(
+          fd_detection_result_array.data + i * Marshal.SizeOf(new FD_DetectionResult()),
+          typeof(FD_DetectionResult));
+      results_out.Add(ConvertResult.ConvertCResultToDetectionResult(fd_detection_result));
+    }
+    return results_out;
+  }
+
+  public bool Initialized() {
+    return FD_C_GFLWrapperInitialized(fd_gfl_wrapper);
+  }
+
+  // below are underlying C api
+  private IntPtr fd_gfl_wrapper;
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_CreatesGFLWrapper")]
+  private static extern IntPtr FD_C_CreatesGFLWrapper(
+      string model_file, string params_file, string config_file,
+      IntPtr fd_runtime_option_wrapper, ModelFormat model_format);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyGFLWrapper")]
+  private static extern void
+  FD_C_DestroyGFLWrapper(IntPtr fd_gfl_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_GFLWrapperPredict")]
+  private static extern bool
+  FD_C_GFLWrapperPredict(IntPtr fd_gfl_wrapper, IntPtr img,
+                             ref FD_DetectionResult fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapper")]
+  private static extern IntPtr FD_C_CreateDetectionResultWrapper();
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DestroyDetectionResultWrapper")]
+  private static extern void
+  FD_C_DestroyDetectionResultWrapper(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_DestroyDetectionResult")]
+  private static extern void
+  FD_C_DestroyDetectionResult(IntPtr fd_detection_result);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_DetectionResultWrapperGetData")]
+  private static extern IntPtr
+  FD_C_DetectionResultWrapperGetData(IntPtr fd_detection_result_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_CreateDetectionResultWrapperFromData")]
+  private static extern IntPtr
+  FD_C_CreateDetectionResultWrapperFromData(IntPtr fd_detection_result);
+
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_GFLWrapperInitialized")]
+  private static extern bool
+  FD_C_GFLWrapperInitialized(IntPtr fd_c_gfl_wrapper);
+  [DllImport("fastdeploy.dll",
+             EntryPoint = "FD_C_GFLWrapperBatchPredict")]
+  private static extern bool
+  FD_C_GFLWrapperBatchPredict(IntPtr fd_c_gfl_wrapper,
+                                  ref FD_OneDimMat imgs,
+                                  ref FD_OneDimDetectionResult results);
+}
+
 }
 }
 }
\ No newline at end of file
diff --git a/csharp/fastdeploy/vision/result.cs b/csharp/fastdeploy/vision/result.cs
index 5b137bc86..e302674a2 100644
--- a/csharp/fastdeploy/vision/result.cs
+++ b/csharp/fastdeploy/vision/result.cs
@@ -37,7 +37,7 @@ public enum ResultType {
   HEADPOSE
 }
 
-public struct Mask {
+public class Mask {
   public List<byte> data;
   public List<long> shape;
   public ResultType type;
@@ -46,9 +46,24 @@ public struct Mask {
     this.shape = new List<long>();
     this.type = ResultType.MASK;
   }
+
+  public override string ToString() {
+    string information = "Mask(" ;
+    int ndim = this.shape.Count;
+    for (int i = 0; i < ndim; i++) {
+    if (i < ndim - 1) {
+      information += this.shape[i].ToString() + ",";
+    } else {
+      information += this.shape[i].ToString();
+    }
+  }
+    information += ")\n";
+    return information;
+  }
+
 }
 
-public struct ClassifyResult {
+public class ClassifyResult {
   public List<int> label_ids;
   public List<float> scores;
   public ResultType type;
@@ -57,9 +72,24 @@ public struct ClassifyResult {
     this.scores = new List<float>();
     this.type = ResultType.CLASSIFY;
   }
+
+  public string ToString() {  
+    string information;
+    information = "ClassifyResult(\nlabel_ids: ";
+    for (int i = 0; i < label_ids.Count; i++) {
+      information = information + label_ids[i].ToString() + ", ";
+    }
+    information += "\nscores: ";
+    for (int i = 0; i < scores.Count; i++) {
+      information = information + scores[i].ToString() + ", ";
+    }
+    information += "\n)";
+    return information;
+  
+  }
 }
 
-public struct DetectionResult {
+public class DetectionResult {
   public List<float[]> boxes;
   public List<float> scores;
   public List<int> label_ids;
@@ -74,6 +104,30 @@ public struct DetectionResult {
     this.contain_masks = false;
     this.type = ResultType.DETECTION;
   }
+
+  
+  public string ToString() {
+    string information;
+    if (!contain_masks) {
+      information = "DetectionResult: [xmin, ymin, xmax, ymax, score, label_id]\n";
+    } else {
+      information =
+          "DetectionResult: [xmin, ymin, xmax, ymax, score, label_id, mask_shape]\n";
+    }
+    for (int i = 0; i < boxes.Count; i++) {
+      information = information + boxes[i][0].ToString() + "," +
+            boxes[i][1].ToString() + ", " + boxes[i][2].ToString() +
+            ", " + boxes[i][3].ToString() + ", " +
+            scores[i].ToString() + ", " + label_ids[i].ToString();
+      if (!contain_masks) {
+        information += "\n";
+      } else {
+        information += ", " + masks[i].ToString();
+      }
+    }
+    return information;
+  }
+
 }
 
 public class ConvertResult {
@@ -265,6 +319,39 @@ public class ConvertResult {
     detection_result.type = (ResultType)fd_detection_result.type;
     return detection_result;
   }
+
+
+  public static FD_OneDimArrayCstr
+  ConvertStringArrayToCOneDimArrayCstr(string[] strs){
+    FD_OneDimArrayCstr fd_one_dim_cstr = new FD_OneDimArrayCstr();
+    fd_one_dim_cstr.size = (nuint)strs.Length;
+    
+    // Copy data to unmanaged memory
+    FD_Cstr[] c_strs = new FD_Cstr[strs.Length];
+    int size = Marshal.SizeOf(c_strs[0]) * c_strs.Length;
+    fd_one_dim_cstr.data = Marshal.AllocHGlobal(size);
+    for (int i = 0; i < strs.Length; i++) {
+      c_strs[i].size = (nuint)strs[i].Length;
+      c_strs[i].data = strs[i];
+      Marshal.StructureToPtr(
+          c_strs[i],
+          fd_one_dim_cstr.data + i * Marshal.SizeOf(c_strs[0]), true);
+    }
+    return fd_one_dim_cstr;
+  }
+
+  public static string[]
+  ConvertCOneDimArrayCstrToStringArray(FD_OneDimArrayCstr c_strs){
+    string[] strs = new string[c_strs.size];
+    for(int i=0; i<(int)c_strs.size; i++){
+      FD_Cstr cstr = (FD_Cstr)Marshal.PtrToStructure(
+          c_strs.data + i * Marshal.SizeOf(new FD_Cstr()),
+          typeof(FD_Cstr));
+      strs[i] = cstr.data;
+    }
+    return strs;
+  }
+
 }
 
 }
diff --git a/csharp/fastdeploy/vision/visualize.cs b/csharp/fastdeploy/vision/visualize.cs
index 6ed5f168a..a2c83b3af 100644
--- a/csharp/fastdeploy/vision/visualize.cs
+++ b/csharp/fastdeploy/vision/visualize.cs
@@ -35,10 +35,34 @@ public class Visualize {
     return new Mat(result_ptr);
   }
 
+
+  public static Mat VisDetection(Mat im, DetectionResult detection_result,
+                                 string[] labels, 
+                                 float score_threshold = 0.0f,
+                                 int line_size = 1, float font_size = 0.5f) {
+    FD_DetectionResult fd_detection_result =
+        ConvertResult.ConvertDetectionResultToCResult(detection_result);
+    FD_OneDimArrayCstr labels_in = ConvertResult.ConvertStringArrayToCOneDimArrayCstr(labels);
+    IntPtr result_ptr = 
+        FD_C_VisDetectionWithLabel(im.CvPtr, ref fd_detection_result, 
+                          ref labels_in, score_threshold,
+                          line_size, font_size);
+    return new Mat(result_ptr);
+  }
+
+
   [DllImport("fastdeploy.dll", EntryPoint = "FD_C_VisDetection")]
   private static extern IntPtr
   FD_C_VisDetection(IntPtr im, ref FD_DetectionResult fd_detection_result,
                     float score_threshold, int line_size, float font_size);
+
+  
+  [DllImport("fastdeploy.dll", EntryPoint = "FD_C_VisDetectionWithLabel")]
+  private static extern IntPtr
+  FD_C_VisDetectionWithLabel(IntPtr im, ref FD_DetectionResult fd_detection_result,
+                    ref FD_OneDimArrayCstr labels,
+                    float score_threshold, int line_size, float font_size);
+
 }
 
 }

From ee41944f472d5f12e44fb1a850ce304e6375107e Mon Sep 17 00:00:00 2001
From: Jack Zhou <zhoushunjie@baidu.com>
Date: Fri, 17 Feb 2023 14:05:04 +0800
Subject: [PATCH 40/41] [Serving] Update trt backend to 8.5.2.2 (#1326)

* update trt backend

* Add trt version args

* Add cuda cudnn version
---
 serving/Dockerfile                   |  4 +-
 serving/Dockerfile_CUDA_11_4_TRT_8_4 | 59 ++++++++++++++++++++++++++++
 serving/docs/EN/compile-en.md        | 10 +++++
 serving/docs/zh_CN/compile.md        | 12 +++++-
 serving/scripts/build.sh             | 29 +++++++++++---
 5 files changed, 105 insertions(+), 9 deletions(-)
 create mode 100644 serving/Dockerfile_CUDA_11_4_TRT_8_4

diff --git a/serving/Dockerfile b/serving/Dockerfile
index 22087b1c8..9b10cac1a 100644
--- a/serving/Dockerfile
+++ b/serving/Dockerfile
@@ -23,7 +23,7 @@ COPY --from=full /opt/tritonserver/lib /opt/tritonserver/lib
 COPY --from=full /opt/tritonserver/include /opt/tritonserver/include
 COPY --from=full /opt/tritonserver/backends/python /opt/tritonserver/backends/python
 
-COPY serving/TensorRT-8.4.1.5 /opt/TensorRT-8.4.1.5
+COPY serving/TensorRT-8.5.2.2 /opt/TensorRT-8.5.2.2
 
 ENV TZ=Asia/Shanghai \
     DEBIAN_FRONTEND=noninteractive \
@@ -55,5 +55,5 @@ RUN python3 -m pip install paddlepaddle-gpu==2.4.1.post112 -f https://www.paddle
 COPY serving/build/libtriton_fastdeploy.so /opt/tritonserver/backends/fastdeploy/
 COPY build/fastdeploy_install /opt/fastdeploy/
 
-ENV LD_LIBRARY_PATH="/opt/TensorRT-8.4.1.5/lib/:/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/tensorrt/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH"
+ENV LD_LIBRARY_PATH="/opt/TensorRT-8.5.2.2/lib/:/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/tensorrt/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH"
 ENV PATH="/opt/tritonserver/bin:$PATH"
diff --git a/serving/Dockerfile_CUDA_11_4_TRT_8_4 b/serving/Dockerfile_CUDA_11_4_TRT_8_4
new file mode 100644
index 000000000..22087b1c8
--- /dev/null
+++ b/serving/Dockerfile_CUDA_11_4_TRT_8_4
@@ -0,0 +1,59 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ARG http_proxy
+ARG https_proxy
+
+FROM nvcr.io/nvidia/tritonserver:21.10-py3 as full
+FROM nvcr.io/nvidia/tritonserver:21.10-py3-min
+
+COPY --from=full /opt/tritonserver/bin/tritonserver /opt/tritonserver/bin/fastdeployserver
+COPY --from=full /opt/tritonserver/lib /opt/tritonserver/lib
+COPY --from=full /opt/tritonserver/include /opt/tritonserver/include
+COPY --from=full /opt/tritonserver/backends/python /opt/tritonserver/backends/python
+
+COPY serving/TensorRT-8.4.1.5 /opt/TensorRT-8.4.1.5
+
+ENV TZ=Asia/Shanghai \
+    DEBIAN_FRONTEND=noninteractive \
+    DCGM_VERSION=2.2.9 \
+    http_proxy=$http_proxy \
+    https_proxy=$http_proxy
+
+RUN apt-get update \
+    && apt-key del 7fa2af80 \
+    && wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb \
+    && dpkg -i cuda-keyring_1.0-1_all.deb \
+    && apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub \
+    && apt-get update && apt-get install -y --no-install-recommends datacenter-gpu-manager=1:2.2.9
+
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends libre2-5 libb64-0d python3 python3-pip libarchive-dev ffmpeg libsm6 libxext6 \
+    && python3 -m pip install -U pip \
+    && python3 -m pip install paddlenlp fast-tokenizer-python
+
+COPY python/dist/*.whl /opt/fastdeploy/
+RUN python3 -m pip install  /opt/fastdeploy/*.whl \
+    && rm -rf /opt/fastdeploy/*.whl
+
+# unset proxy
+ENV http_proxy=
+ENV https_proxy=
+RUN python3 -m pip install paddlepaddle-gpu==2.4.1.post112 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html
+
+COPY serving/build/libtriton_fastdeploy.so /opt/tritonserver/backends/fastdeploy/
+COPY build/fastdeploy_install /opt/fastdeploy/
+
+ENV LD_LIBRARY_PATH="/opt/TensorRT-8.4.1.5/lib/:/opt/fastdeploy/lib:/opt/fastdeploy/third_libs/install/onnxruntime/lib:/opt/fastdeploy/third_libs/install/paddle2onnx/lib:/opt/fastdeploy/third_libs/install/tensorrt/lib:/opt/fastdeploy/third_libs/install/paddle_inference/paddle/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mkldnn/lib:/opt/fastdeploy/third_libs/install/paddle_inference/third_party/install/mklml/lib:/opt/fastdeploy/third_libs/install/openvino/runtime/lib:$LD_LIBRARY_PATH"
+ENV PATH="/opt/tritonserver/bin:$PATH"
diff --git a/serving/docs/EN/compile-en.md b/serving/docs/EN/compile-en.md
index b023487a7..20865eeb2 100644
--- a/serving/docs/EN/compile-en.md
+++ b/serving/docs/EN/compile-en.md
@@ -18,6 +18,16 @@ cd ../
 docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10 -f serving/Dockerfile .
 ```
 
+The default version of TensorRT is 8.5.2.2. If you need to change the version, you can run the following commands.
+
+```
+cd serving
+bash scripts/build.sh -tv 8.4.1.5
+
+cd ../
+docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.5-21.10 -f serving/Dockerfile_CUDA_11_4_TRT_8_4 .
+```
+
 For example, create an GPU image based on FastDeploy v1.0.3 and ubuntu 20.04，cuda11.2 environment
 ```
 # Enter the serving directory and execute the script to compile the FastDeploy and serving backend
diff --git a/serving/docs/zh_CN/compile.md b/serving/docs/zh_CN/compile.md
index 0ed4a93a2..8961149e1 100644
--- a/serving/docs/zh_CN/compile.md
+++ b/serving/docs/zh_CN/compile.md
@@ -15,7 +15,17 @@ bash scripts/build.sh
 # 退出到FastDeploy主目录，制作镜像
 # x.y.z为FastDeploy版本号，可根据情况自己确定。比如: 1.0.3
 cd ../
-docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10 -f serving/Dockerfile .
+docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.5-21.10 -f serving/Dockerfile .
+```
+
+目前默认 TensorRT 版本为 8.5.2.2，如果需要切换 TensorRT 版本，则可执行以下编译命令：
+
+```
+cd serving
+bash scripts/build.sh -tv 8.4.1.5
+
+cd ../
+docker build -t paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.5-21.10 -f serving/Dockerfile_CUDA_11_4_TRT_8_4 .
 ```
 
 比如在ubuntu 20.04，cuda11.2环境下制作基于FastDeploy v1.0.3的GPU镜像
diff --git a/serving/scripts/build.sh b/serving/scripts/build.sh
index 1038fe030..bd819f7bd 100644
--- a/serving/scripts/build.sh
+++ b/serving/scripts/build.sh
@@ -33,6 +33,9 @@ do
         -hs|--https_proxy)
                 https_proxy="$2"
                 shift;;
+        -tv|--trt_version)
+                trt_version="$2"
+                shift;;
         --)
                 shift
                 break;;
@@ -50,6 +53,20 @@ fi
 
 if [ $WITH_GPU == "ON" ]; then
 
+if [ -z $trt_version ]; then
+    # The optional value of trt_version: ["8.4.1.5", "8.5.2.2"]
+    trt_version="8.5.2.2"
+fi
+
+if [ $trt_version == "8.5.2.2" ]
+then
+    cuda_version="11.8"
+    cudnn_version="8.6"
+else
+    cuda_version="11.6"
+    cudnn_version="8.4"
+fi
+
 echo "start build FD GPU library"
 
 if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
@@ -58,10 +75,10 @@ if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
     rm -rf cmake-3.18.6-Linux-x86_64.tar.gz
 fi
 
-if [ ! -d "./TensorRT-8.4.1.5/" ]; then
-    wget https://fastdeploy.bj.bcebos.com/third_libs/TensorRT-8.4.1.5.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz
-    tar -zxvf TensorRT-8.4.1.5.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz
-    rm -rf TensorRT-8.4.1.5.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz
+if [ ! -d "./TensorRT-${trt_version}/" ]; then
+    wget https://fastdeploy.bj.bcebos.com/resource/TensorRT/TensorRT-${trt_version}.Linux.x86_64-gnu.cuda-${cuda_version}.cudnn${cudnn_version}.tar.gz
+    tar -zxvf TensorRT-${trt_version}.Linux.x86_64-gnu.cuda-${cuda_version}.cudnn${cudnn_version}.tar.gz
+    rm -rf TensorRT-${trt_version}.Linux.x86_64-gnu.cuda-${cuda_version}.cudnn${cudnn_version}.tar.gz
 fi
 
 nvidia-docker run -i --rm --name ${docker_name} \
@@ -78,7 +95,7 @@ nvidia-docker run -i --rm --name ${docker_name} \
             export PATH=/workspace/fastdeploy/serving/cmake-3.18.6-Linux-x86_64/bin:$PATH;
             export WITH_GPU=ON;
             export ENABLE_TRT_BACKEND=OFF;
-            export TRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-8.4.1.5/;
+            export TRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-${trt_version}/;
             export ENABLE_ORT_BACKEND=OFF;
             export ENABLE_PADDLE_BACKEND=OFF;
             export ENABLE_OPENVINO_BACKEND=OFF;
@@ -88,7 +105,7 @@ nvidia-docker run -i --rm --name ${docker_name} \
             python setup.py bdist_wheel;
             cd /workspace/fastdeploy;
             rm -rf build; mkdir -p build;cd build;
-            cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=ON -DTRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE2ONNX=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
+            cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy_install -DWITH_GPU=ON -DTRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-${trt_version}/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE2ONNX=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
             make -j`nproc`;
             make install;
             cd /workspace/fastdeploy/serving;

From db471c3466a11890690c40c3be182b9f622da1fa Mon Sep 17 00:00:00 2001
From: Jason <jiangjiajun@baidu.com>
Date: Fri, 17 Feb 2023 14:32:35 +0800
Subject: [PATCH 41/41] [Other] Optimize poros backend (#1331)

* Optimize poros backend

* Fix pybind error

---------

Co-authored-by: root <root@bjyz-sys-gpu-kongming3.bjyz.baidu.com>
---
 .../runtime/cpp/infer_torchscript_poros.cc    |  6 +-
 fastdeploy/pybind/runtime.cc                  |  2 +-
 fastdeploy/runtime/backends/backend.h         | 57 ++++++++++---------
 .../runtime/backends/poros/poros_backend.h    | 14 +++++
 fastdeploy/runtime/runtime.cc                 | 27 +++++----
 fastdeploy/runtime/runtime.h                  |  5 +-
 6 files changed, 66 insertions(+), 45 deletions(-)

diff --git a/examples/runtime/cpp/infer_torchscript_poros.cc b/examples/runtime/cpp/infer_torchscript_poros.cc
index d9bf4ebad..0257513dc 100644
--- a/examples/runtime/cpp/infer_torchscript_poros.cc
+++ b/examples/runtime/cpp/infer_torchscript_poros.cc
@@ -84,11 +84,13 @@ int main(int argc, char* argv[]) {
   runtime_option.SetModelPath(model_file, "", fd::ModelFormat::TORCHSCRIPT);
   runtime_option.UsePorosBackend();
   runtime_option.UseGpu(0);
-  runtime_option.is_dynamic = true;
 
   // Compile runtime
   std::unique_ptr<fd::Runtime> runtime =
       std::unique_ptr<fd::Runtime>(new fd::Runtime());
+
+  runtime->Init(runtime_option);
+
   if (!runtime->Compile(prewarm_datas, runtime_option)) {
     std::cerr << "--- Init FastDeploy Runitme Failed! "
               << "\n--- Model:  " << model_file << std::endl;
@@ -114,4 +116,4 @@ int main(int argc, char* argv[]) {
 
   output_tensors[0].PrintInfo();
   return 0;
-}
\ No newline at end of file
+}
diff --git a/fastdeploy/pybind/runtime.cc b/fastdeploy/pybind/runtime.cc
index 408c3ced2..a47181dff 100644
--- a/fastdeploy/pybind/runtime.cc
+++ b/fastdeploy/pybind/runtime.cc
@@ -51,7 +51,7 @@ void BindRuntime(pybind11::module& m) {
                         warm_datas[i][j].nbytes());
                }
              }
-             return self.Compile(warm_tensors, _option);
+             return self.Compile(warm_tensors);
            })
       .def("infer",
            [](Runtime& self, std::map<std::string, pybind11::array>& data) {
diff --git a/fastdeploy/runtime/backends/backend.h b/fastdeploy/runtime/backends/backend.h
index 802db6fa1..7566806e5 100644
--- a/fastdeploy/runtime/backends/backend.h
+++ b/fastdeploy/runtime/backends/backend.h
@@ -58,7 +58,10 @@ class BaseBackend {
   virtual bool Initialized() const { return initialized_; }
 
   virtual bool Init(const RuntimeOption& option) {
-    FDERROR << "Not Implement Yet." << std::endl;
+    FDERROR << "Not Implement for "
+            << option.backend << " in "
+            << option.device << "."
+            << std::endl;
     return false;
   }
 
@@ -89,59 +92,59 @@ class BaseBackend {
     return nullptr;
   }
 
-  benchmark::BenchmarkOption benchmark_option_;  
-  benchmark::BenchmarkResult benchmark_result_; 
+  benchmark::BenchmarkOption benchmark_option_;
+  benchmark::BenchmarkResult benchmark_result_;
 };
 
-/** \brief Macros for Runtime benchmark profiling. 
- * The param 'base_loop' for 'RUNTIME_PROFILE_LOOP_BEGIN' 
- * indicates that the least number of times the loop 
+/** \brief Macros for Runtime benchmark profiling.
+ * The param 'base_loop' for 'RUNTIME_PROFILE_LOOP_BEGIN'
+ * indicates that the least number of times the loop
  * will repeat when profiling mode is not enabled.
- * In most cases, the value should be 1, i.e., results are 
- * obtained by running the inference process once, when 
- * the profile mode is turned off, such as ONNX Runtime, 
- * OpenVINO, TensorRT, Paddle Inference, Paddle Lite, 
- * RKNPU2, SOPHGO etc. 
- * 
+ * In most cases, the value should be 1, i.e., results are
+ * obtained by running the inference process once, when
+ * the profile mode is turned off, such as ONNX Runtime,
+ * OpenVINO, TensorRT, Paddle Inference, Paddle Lite,
+ * RKNPU2, SOPHGO etc.
+ *
  * example code @code
- * // OpenVINOBackend::Infer 
+ * // OpenVINOBackend::Infer
  * RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
- * // do something .... 
+ * // do something ....
  * RUNTIME_PROFILE_LOOP_BEGIN(1)
- * // The codes which wrapped by 'BEGIN(1) ~ END' scope 
+ * // The codes which wrapped by 'BEGIN(1) ~ END' scope
  * // will only run once when profiling mode is not enabled.
- * request_.infer();  
+ * request_.infer();
  * RUNTIME_PROFILE_LOOP_END
- * // do something .... 
+ * // do something ....
  * RUNTIME_PROFILE_LOOP_H2D_D2H_END
- * 
+ *
  * @endcode In this case, No global variables inside a function
- * are wrapped by BEGIN and END, which may be required for 
+ * are wrapped by BEGIN and END, which may be required for
  * subsequent tasks. But, some times we need to set 'base_loop'
  * as 0, such as POROS.
- * 
+ *
  * * example code @code
  * // PorosBackend::Infer
  * RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
- * // do something .... 
+ * // do something ....
  * RUNTIME_PROFILE_LOOP_BEGIN(0) // set 'base_loop' as 0
- * // The codes which wrapped by 'BEGIN(0) ~ END' scope 
+ * // The codes which wrapped by 'BEGIN(0) ~ END' scope
  * // will not run when profiling mode is not enabled.
- * auto poros_outputs = _poros_module->forward(poros_inputs); 
+ * auto poros_outputs = _poros_module->forward(poros_inputs);
  * RUNTIME_PROFILE_LOOP_END
  * // Run another inference beyond the scope of 'BEGIN ~ END'
  * // to get valid outputs for subsequent tasks.
- * auto poros_outputs = _poros_module->forward(poros_inputs); 
+ * auto poros_outputs = _poros_module->forward(poros_inputs);
  * // do something .... will use 'poros_outputs' ...
  * if (poros_outputs.isTensor()) {
  * // ...
  * }
  * RUNTIME_PROFILE_LOOP_H2D_D2H_END
- * 
+ *
  * @endcode In this case, 'poros_outputs' inside a function
- * are wrapped by BEGIN and END, which may be required for 
+ * are wrapped by BEGIN and END, which may be required for
  * subsequent tasks. So, we set 'base_loop' as 0 and lanuch
- * another infer to get the valid outputs beyond the scope 
+ * another infer to get the valid outputs beyond the scope
  * of 'BEGIN ~ END' for subsequent tasks.
  */
 
diff --git a/fastdeploy/runtime/backends/poros/poros_backend.h b/fastdeploy/runtime/backends/poros/poros_backend.h
index 0d01a6884..91268efdb 100755
--- a/fastdeploy/runtime/backends/poros/poros_backend.h
+++ b/fastdeploy/runtime/backends/poros/poros_backend.h
@@ -51,6 +51,20 @@ class PorosBackend : public BaseBackend {
 
   void BuildOption(const PorosBackendOption& option);
 
+  bool Init(const RuntimeOption& option) {
+    if (!(Supported(option.model_format, Backend::POROS)
+        && Supported(option.device, Backend::POROS))) {
+      return false;
+    }
+    if (option.model_from_memory_) {
+      FDERROR << "Poros backend doesn't support load model "
+              << "from memory, please load model from disk."
+              << std::endl;
+      return false;
+    }
+    return true;
+  }
+
   bool Compile(const std::string& model_file,
                std::vector<std::vector<FDTensor>>& prewarm_tensors,
                const PorosBackendOption& option = PorosBackendOption());
diff --git a/fastdeploy/runtime/runtime.cc b/fastdeploy/runtime/runtime.cc
index ceb7c590c..0e6eecf32 100644
--- a/fastdeploy/runtime/runtime.cc
+++ b/fastdeploy/runtime/runtime.cc
@@ -417,25 +417,28 @@ Runtime* Runtime::Clone(void* stream, int device_id) {
   return runtime;
 }
 
-// only for poros backend
-bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
-                      const RuntimeOption& _option) {
+void Runtime::CreatePorosBackend() {
+#ifdef ENABLE_POROS_BACKEND
+  backend_ = utils::make_unique<PorosBackend>();
+  FDASSERT(backend_->Init(option), "Failed to initialize Poros backend.");
+#else
+  FDASSERT(false,
+           "PorosBackend is not available, please compiled with "
+           "ENABLE_POROS_BACKEND=ON.");
+#endif
+  FDINFO << "Runtime initialized with Backend::POROS in " << option.device
+         << "." << std::endl;
+}
+
+// only for poros backend
+bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors) {
 #ifdef ENABLE_POROS_BACKEND
-  FDASSERT(
-      option.model_format == ModelFormat::TORCHSCRIPT,
-      "PorosBackend only support model format of ModelFormat::TORCHSCRIPT.");
-  if (option.device != Device::CPU && option.device != Device::GPU) {
-    FDERROR << "PorosBackend only supports CPU/GPU, but now its "
-            << option.device << "." << std::endl;
-    return false;
-  }
   option.poros_option.device = option.device;
   option.poros_option.device_id = option.device_id;
   option.poros_option.enable_fp16 = option.trt_option.enable_fp16;
   option.poros_option.max_batch_size = option.trt_option.max_batch_size;
   option.poros_option.max_workspace_size = option.trt_option.max_workspace_size;
 
-  backend_ = utils::make_unique<PorosBackend>();
   auto casted_backend = dynamic_cast<PorosBackend*>(backend_.get());
   FDASSERT(
       casted_backend->Compile(option.model_file, prewarm_tensors,
diff --git a/fastdeploy/runtime/runtime.h b/fastdeploy/runtime/runtime.h
index 4d045684e..e34b520f8 100755
--- a/fastdeploy/runtime/runtime.h
+++ b/fastdeploy/runtime/runtime.h
@@ -99,11 +99,9 @@ struct FASTDEPLOY_DECL Runtime {
   /** \brief Compile TorchScript Module, only for Poros backend
    *
    * \param[in] prewarm_tensors Prewarm datas for compile
-   * \param[in] _option Runtime option
    * \return true if compile successed, otherwise false
    */
-  bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
-               const RuntimeOption& _option);
+  bool Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors);
   /** \brief Get profile time of Runtime after the profile process is done.
    */
   double GetProfileTime() {
@@ -118,6 +116,7 @@ struct FASTDEPLOY_DECL Runtime {
   void CreateLiteBackend();
   void CreateRKNPU2Backend();
   void CreateSophgoNPUBackend();
+  void CreatePorosBackend();
   std::unique_ptr<BaseBackend> backend_;
   std::vector<FDTensor> input_tensors_;
   std::vector<FDTensor> output_tensors_;