From f91a6126eee74f5cf918b6fe57455160529926ea Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Tue, 29 Nov 2022 08:26:27 +0000
Subject: [PATCH 01/30] Update keypointdetection result docs

---
 docs/api/vision_results/keypointdetection_result.md |  8 ++------
 docs/api_docs/python/vision_results_cn.md           |  3 +--
 docs/api_docs/python/vision_results_en.md           |  3 +--
 tests/models/test_ppyoloe.py                        | 10 +++++++---
 4 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/docs/api/vision_results/keypointdetection_result.md b/docs/api/vision_results/keypointdetection_result.md
index a47057ad8..645880d23 100644
--- a/docs/api/vision_results/keypointdetection_result.md
+++ b/docs/api/vision_results/keypointdetection_result.md
@@ -16,16 +16,13 @@ struct KeyPointDetectionResult {
 };
 ```
 
-- **keypoints**: 成员变量，表示识别到的目标行为的关键点坐标。`keypoints.size()= N * J * 2`，
+- **keypoints**: 成员变量，表示识别到的目标行为的关键点坐标。`keypoints.size()= N * J`，
     - `N`：图片中的目标数量
     - `J`：num_joints（一个目标的关键点数量）
-    - `3`:坐标信息[x, y]
 - **scores**: 成员变量，表示识别到的目标行为的关键点坐标的置信度。`scores.size()= N * J`
     - `N`：图片中的目标数量
     - `J`:num_joints（一个目标的关键点数量）
 - **num_joints**: 成员变量，一个目标的关键点数量
-
-- **num_joints**: 成员变量，一个目标的关键点数量
 - **Clear()**: 成员函数，用于清除结构体中存储的结果
 - **Str()**: 成员函数，将结构体中的信息以字符串形式输出（用于Debug）
 
@@ -34,10 +31,9 @@ struct KeyPointDetectionResult {
 `fastdeploy.vision.KeyPointDetectionResult`
 
 - **keypoints**(list of list(float)): 成员变量，表示识别到的目标行为的关键点坐标。
-                                      `keypoints.size()= N * J * 2`
+                                      `keypoints.size()= N * J`
                                       `N`:图片中的目标数量
                                       `J`:num_joints（关键点数量）
-                                     `3`:坐标信息[x, y, conf]
 - **scores**(list of float): 成员变量，表示识别到的目标行为的关键点坐标的置信度。
                              `scores.size()= N * J`
                              `N`:图片中的目标数量
diff --git a/docs/api_docs/python/vision_results_cn.md b/docs/api_docs/python/vision_results_cn.md
index 19b2a6662..e9dc00621 100644
--- a/docs/api_docs/python/vision_results_cn.md
+++ b/docs/api_docs/python/vision_results_cn.md
@@ -46,10 +46,9 @@ API:`fastdeploy.vision.FaceDetectionResult` , 该结果返回:
 KeyPointDetectionResult 代码定义在`fastdeploy/vision/common/result.h`中，用于表明图像中目标行为的各个关键点坐标和置信度。
 
 API:`fastdeploy.vision.KeyPointDetectionResult` , 该结果返回:
-- **keypoints**(list of list(float)): 成员变量，表示识别到的目标行为的关键点坐标。`keypoints.size()= N * J * 2`，
+- **keypoints**(list of list(float)): 成员变量，表示识别到的目标行为的关键点坐标。`keypoints.size()= N * J`，
     - `N`：图片中的目标数量
     - `J`：num_joints（一个目标的关键点数量）
-    - `3`:坐标信息[x, y]
 - **scores**(list of float): 成员变量，表示识别到的目标行为的关键点坐标的置信度。`scores.size()= N * J`
     - `N`：图片中的目标数量
     - `J`:num_joints（一个目标的关键点数量）
diff --git a/docs/api_docs/python/vision_results_en.md b/docs/api_docs/python/vision_results_en.md
index dc02d0b4f..f297331d1 100644
--- a/docs/api_docs/python/vision_results_en.md
+++ b/docs/api_docs/python/vision_results_en.md
@@ -49,10 +49,9 @@ API: `fastdeploy.vision.FaceDetectionResult`, The FaceDetectionResult will retur
 The KeyPointDetectionResult code is defined in `fastdeploy/vision/common/result.h` and is used to indicate the coordinates and confidence of each keypoint of the target behavior in the image.
 
 API:`fastdeploy.vision.KeyPointDetectionResult`, The KeyPointDetectionResult will return:
-- **keypoints**(list of list(float)): Member variable, representing the key point coordinates of the identified target behavior. `keypoints.size()= N * J * 2`，
+- **keypoints**(list of list(float)): Member variable, representing the key point coordinates of the identified target behavior. `keypoints.size()= N * J`，
     - `N`: number of objects in the picture
     - `J`: num_joints（number of keypoints for a target）
-    - `3`: 坐标信息[x, y]
 - **scores**(list of float): Member variable, representing the confidence of the keypoint coordinates of the recognized target behavior. `scores.size()= N * J`
     - `N`: number of objects in the picture
     - `J`: num_joints（number of keypoints for a target）
diff --git a/tests/models/test_ppyoloe.py b/tests/models/test_ppyoloe.py
index 08b19bf91..a479f90e1 100755
--- a/tests/models/test_ppyoloe.py
+++ b/tests/models/test_ppyoloe.py
@@ -60,6 +60,7 @@ def test_detection_ppyoloe():
         assert diff_label_ids[scores > score_threshold].max(
         ) < 1e-04, "There's diff in label_ids."
 
+
 def test_detection_ppyoloe1():
     model_url = "https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz"
     input_url1 = "https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg"
@@ -75,15 +76,18 @@ def test_detection_ppyoloe1():
 
     preprocessor = fd.vision.detection.PaddleDetPreprocessor(config_file)
     postprocessor = fd.vision.detection.PaddleDetPostprocessor()
-    
+
     rc.test_option.set_model_path(model_file, params_file)
-    runtime = fd.Runtime(rc.test_option);
+    runtime = fd.Runtime(rc.test_option)
 
     # compare diff
     im1 = cv2.imread("./resources/000000014439.jpg")
     for i in range(2):
         input_tensors = preprocessor.run([im1])
-        output_tensors = runtime.infer({"image": input_tensors[0], "scale_factor": input_tensors[1]})
+        output_tensors = runtime.infer({
+            "image": input_tensors[0],
+            "scale_factor": input_tensors[1]
+        })
         results = postprocessor.run(output_tensors)
         result = results[0]
         with open("resources/ppyoloe_baseline.pkl", "rb") as f:

From 2f6f6977d76fff2176e46c796ad3551c5b911c7b Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Sun, 11 Dec 2022 14:10:16 +0000
Subject: [PATCH 02/30] Update im.copy() to im in examples

---
 examples/vision/classification/resnet/python/infer.py         | 2 +-
 examples/vision/classification/yolov5cls/python/infer.py      | 2 +-
 examples/vision/detection/nanodet_plus/python/infer.py        | 2 +-
 examples/vision/detection/scaledyolov4/python/infer.py        | 2 +-
 examples/vision/detection/yolor/python/infer.py               | 2 +-
 examples/vision/detection/yolov5/python/infer.py              | 2 +-
 examples/vision/detection/yolov5/quantize/python/infer.py     | 2 +-
 examples/vision/detection/yolov5lite/python/infer.py          | 2 +-
 examples/vision/detection/yolov6/python/infer.py              | 2 +-
 examples/vision/detection/yolov6/quantize/python/infer.py     | 2 +-
 examples/vision/detection/yolov7/python/infer.py              | 2 +-
 examples/vision/detection/yolov7/quantize/python/infer.py     | 2 +-
 examples/vision/detection/yolov7end2end_ort/python/infer.py   | 2 +-
 examples/vision/detection/yolov7end2end_trt/python/infer.py   | 2 +-
 examples/vision/detection/yolox/python/infer.py               | 2 +-
 examples/vision/facealign/face_landmark_1000/python/infer.py  | 2 +-
 examples/vision/facealign/pfld/python/infer.py                | 2 +-
 examples/vision/facealign/pipnet/python/infer.py              | 2 +-
 examples/vision/facedet/retinaface/python/infer.py            | 2 +-
 examples/vision/facedet/scrfd/python/infer.py                 | 2 +-
 examples/vision/facedet/scrfd/rknpu2/python/infer.py          | 2 +-
 examples/vision/facedet/ultraface/python/infer.py             | 2 +-
 examples/vision/facedet/yolov5face/python/infer.py            | 2 +-
 examples/vision/headpose/fsanet/python/infer.py               | 2 +-
 examples/vision/matting/modnet/python/infer.py                | 2 +-
 examples/vision/matting/ppmatting/python/infer.py             | 2 +-
 examples/vision/matting/rvm/python/infer.py                   | 4 ++--
 examples/vision/segmentation/paddleseg/python/infer.py        | 2 +-
 .../vision/segmentation/paddleseg/quantize/python/infer.py    | 2 +-
 examples/vision/segmentation/paddleseg/rknpu2/python/infer.py | 2 +-
 30 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/examples/vision/classification/resnet/python/infer.py b/examples/vision/classification/resnet/python/infer.py
index b8b268f3a..ba22304fc 100644
--- a/examples/vision/classification/resnet/python/infer.py
+++ b/examples/vision/classification/resnet/python/infer.py
@@ -46,5 +46,5 @@ model = fd.vision.classification.ResNet(
     args.model, runtime_option=runtime_option)
 # 预测图片分类结果
 im = cv2.imread(args.image)
-result = model.predict(im.copy(), args.topk)
+result = model.predict(im, args.topk)
 print(result)
diff --git a/examples/vision/classification/yolov5cls/python/infer.py b/examples/vision/classification/yolov5cls/python/infer.py
index 576db32f2..55974a764 100644
--- a/examples/vision/classification/yolov5cls/python/infer.py
+++ b/examples/vision/classification/yolov5cls/python/infer.py
@@ -47,5 +47,5 @@ model = fd.vision.classification.YOLOv5Cls(
 
 # 预测图片分类结果
 im = cv2.imread(args.image)
-result = model.predict(im.copy(), args.topk)
+result = model.predict(im, args.topk)
 print(result)
diff --git a/examples/vision/detection/nanodet_plus/python/infer.py b/examples/vision/detection/nanodet_plus/python/infer.py
index 9e7bf184f..a60f3a3b4 100644
--- a/examples/vision/detection/nanodet_plus/python/infer.py
+++ b/examples/vision/detection/nanodet_plus/python/infer.py
@@ -52,7 +52,7 @@ if args.image is None:
 else:
     image = args.image
 im = cv2.imread(image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 
 # 预测结果可视化
diff --git a/examples/vision/detection/scaledyolov4/python/infer.py b/examples/vision/detection/scaledyolov4/python/infer.py
index 9d990d77b..cd23e84e3 100644
--- a/examples/vision/detection/scaledyolov4/python/infer.py
+++ b/examples/vision/detection/scaledyolov4/python/infer.py
@@ -52,7 +52,7 @@ if args.image is None:
 else:
     image = args.image
 im = cv2.imread(image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 
 # 预测结果可视化
diff --git a/examples/vision/detection/yolor/python/infer.py b/examples/vision/detection/yolor/python/infer.py
index d5ab3dd7f..4111fa0ae 100644
--- a/examples/vision/detection/yolor/python/infer.py
+++ b/examples/vision/detection/yolor/python/infer.py
@@ -54,7 +54,7 @@ else:
     image = args.image
 
 im = cv2.imread(image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 
 # 预测结果可视化
diff --git a/examples/vision/detection/yolov5/python/infer.py b/examples/vision/detection/yolov5/python/infer.py
index fdded06c9..462740e9c 100644
--- a/examples/vision/detection/yolov5/python/infer.py
+++ b/examples/vision/detection/yolov5/python/infer.py
@@ -52,7 +52,7 @@ if args.image is None:
 else:
     image = args.image
 im = cv2.imread(image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 
 # 预测结果可视化
diff --git a/examples/vision/detection/yolov5/quantize/python/infer.py b/examples/vision/detection/yolov5/quantize/python/infer.py
index da502fe93..996bc5419 100644
--- a/examples/vision/detection/yolov5/quantize/python/infer.py
+++ b/examples/vision/detection/yolov5/quantize/python/infer.py
@@ -71,7 +71,7 @@ model = fd.vision.detection.YOLOv5(
 
 # 预测图片检测结果
 im = cv2.imread(args.image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 
 # 预测结果可视化
diff --git a/examples/vision/detection/yolov5lite/python/infer.py b/examples/vision/detection/yolov5lite/python/infer.py
index b1ec69046..2242a33a5 100644
--- a/examples/vision/detection/yolov5lite/python/infer.py
+++ b/examples/vision/detection/yolov5lite/python/infer.py
@@ -52,7 +52,7 @@ if args.image is None:
 else:
     image = args.image
 im = cv2.imread(image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 
 # 预测结果可视化
diff --git a/examples/vision/detection/yolov6/python/infer.py b/examples/vision/detection/yolov6/python/infer.py
index 0a0a163fb..47bf3e689 100644
--- a/examples/vision/detection/yolov6/python/infer.py
+++ b/examples/vision/detection/yolov6/python/infer.py
@@ -52,7 +52,7 @@ if args.image is None:
 else:
     image = args.image
 im = cv2.imread(image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 
 # 预测结果可视化
diff --git a/examples/vision/detection/yolov6/quantize/python/infer.py b/examples/vision/detection/yolov6/quantize/python/infer.py
index da9fa3d97..77f46d4c2 100644
--- a/examples/vision/detection/yolov6/quantize/python/infer.py
+++ b/examples/vision/detection/yolov6/quantize/python/infer.py
@@ -71,7 +71,7 @@ model = fd.vision.detection.YOLOv6(
 
 # 预测图片检测结果
 im = cv2.imread(args.image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 
 # 预测结果可视化
diff --git a/examples/vision/detection/yolov7/python/infer.py b/examples/vision/detection/yolov7/python/infer.py
index b0ece7e47..468b47dc5 100644
--- a/examples/vision/detection/yolov7/python/infer.py
+++ b/examples/vision/detection/yolov7/python/infer.py
@@ -52,7 +52,7 @@ if args.image is None:
 else:
     image = args.image
 im = cv2.imread(image)
-result = model.predict(im.copy())
+result = model.predict(im)
 
 # 预测结果可视化
 vis_im = fd.vision.vis_detection(im, result)
diff --git a/examples/vision/detection/yolov7/quantize/python/infer.py b/examples/vision/detection/yolov7/quantize/python/infer.py
index de84e4061..d07834c7c 100644
--- a/examples/vision/detection/yolov7/quantize/python/infer.py
+++ b/examples/vision/detection/yolov7/quantize/python/infer.py
@@ -71,7 +71,7 @@ model = fd.vision.detection.YOLOv7(
 
 # 预测图片检测结果
 im = cv2.imread(args.image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 
 # 预测结果可视化
diff --git a/examples/vision/detection/yolov7end2end_ort/python/infer.py b/examples/vision/detection/yolov7end2end_ort/python/infer.py
index 2b812b71a..914facb59 100644
--- a/examples/vision/detection/yolov7end2end_ort/python/infer.py
+++ b/examples/vision/detection/yolov7end2end_ort/python/infer.py
@@ -44,7 +44,7 @@ model = fd.vision.detection.YOLOv7End2EndORT(
 
 # 预测图片检测结果
 im = cv2.imread(args.image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 
 # 预测结果可视化
diff --git a/examples/vision/detection/yolov7end2end_trt/python/infer.py b/examples/vision/detection/yolov7end2end_trt/python/infer.py
index d179de06f..b1ad8ea0c 100644
--- a/examples/vision/detection/yolov7end2end_trt/python/infer.py
+++ b/examples/vision/detection/yolov7end2end_trt/python/infer.py
@@ -44,7 +44,7 @@ model = fd.vision.detection.YOLOv7End2EndTRT(
 
 # 预测图片检测结果
 im = cv2.imread(args.image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 
 # 预测结果可视化
diff --git a/examples/vision/detection/yolox/python/infer.py b/examples/vision/detection/yolox/python/infer.py
index 69203010b..c5e71754a 100644
--- a/examples/vision/detection/yolox/python/infer.py
+++ b/examples/vision/detection/yolox/python/infer.py
@@ -52,7 +52,7 @@ if args.image is None:
 else:
     image = args.image
 im = cv2.imread(image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 # 预测结果可视化
 vis_im = fd.vision.vis_detection(im, result)
diff --git a/examples/vision/facealign/face_landmark_1000/python/infer.py b/examples/vision/facealign/face_landmark_1000/python/infer.py
index bbb3a19c8..9eaedfb63 100644
--- a/examples/vision/facealign/face_landmark_1000/python/infer.py
+++ b/examples/vision/facealign/face_landmark_1000/python/infer.py
@@ -82,7 +82,7 @@ model = fd.vision.facealign.FaceLandmark1000(
 
 # for image
 im = cv2.imread(args.image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 # 可视化结果
 vis_im = fd.vision.vis_face_alignment(im, result)
diff --git a/examples/vision/facealign/pfld/python/infer.py b/examples/vision/facealign/pfld/python/infer.py
index 622fbf822..0a695e8c6 100755
--- a/examples/vision/facealign/pfld/python/infer.py
+++ b/examples/vision/facealign/pfld/python/infer.py
@@ -80,7 +80,7 @@ model = fd.vision.facealign.PFLD(args.model, runtime_option=runtime_option)
 
 # for image
 im = cv2.imread(args.image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 # 可视化结果
 vis_im = fd.vision.vis_face_alignment(im, result)
diff --git a/examples/vision/facealign/pipnet/python/infer.py b/examples/vision/facealign/pipnet/python/infer.py
index 628b86f79..46f9c2d90 100644
--- a/examples/vision/facealign/pipnet/python/infer.py
+++ b/examples/vision/facealign/pipnet/python/infer.py
@@ -85,7 +85,7 @@ model = fd.vision.facealign.PIPNet(args.model, runtime_option=runtime_option)
 model.num_landmarks = args.num_landmarks
 # for image
 im = cv2.imread(args.image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 # 可视化结果
 vis_im = fd.vision.vis_face_alignment(im, result)
diff --git a/examples/vision/facedet/retinaface/python/infer.py b/examples/vision/facedet/retinaface/python/infer.py
index 3b0152b1c..6b7f63e5e 100644
--- a/examples/vision/facedet/retinaface/python/infer.py
+++ b/examples/vision/facedet/retinaface/python/infer.py
@@ -43,7 +43,7 @@ model = fd.vision.facedet.RetinaFace(args.model, runtime_option=runtime_option)
 
 # 预测图片检测结果
 im = cv2.imread(args.image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 # 预测结果可视化
 vis_im = fd.vision.vis_face_detection(im, result)
diff --git a/examples/vision/facedet/scrfd/python/infer.py b/examples/vision/facedet/scrfd/python/infer.py
index a99e66385..af141e011 100644
--- a/examples/vision/facedet/scrfd/python/infer.py
+++ b/examples/vision/facedet/scrfd/python/infer.py
@@ -43,7 +43,7 @@ model = fd.vision.facedet.SCRFD(args.model, runtime_option=runtime_option)
 
 # 预测图片检测结果
 im = cv2.imread(args.image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 # 预测结果可视化
 vis_im = fd.vision.vis_face_detection(im, result)
diff --git a/examples/vision/facedet/scrfd/rknpu2/python/infer.py b/examples/vision/facedet/scrfd/rknpu2/python/infer.py
index 3b3fc9d83..77e57a0f1 100644
--- a/examples/vision/facedet/scrfd/rknpu2/python/infer.py
+++ b/examples/vision/facedet/scrfd/rknpu2/python/infer.py
@@ -49,7 +49,7 @@ model.disable_normalize_and_permute()
 
 # 预测图片分割结果
 im = cv2.imread(args.image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 
 # 可视化结果
diff --git a/examples/vision/facedet/ultraface/python/infer.py b/examples/vision/facedet/ultraface/python/infer.py
index 5399110b9..2087ce5bf 100644
--- a/examples/vision/facedet/ultraface/python/infer.py
+++ b/examples/vision/facedet/ultraface/python/infer.py
@@ -43,7 +43,7 @@ model = fd.vision.facedet.UltraFace(args.model, runtime_option=runtime_option)
 
 # 预测图片检测结果
 im = cv2.imread(args.image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 # 预测结果可视化
 vis_im = fd.vision.vis_face_detection(im, result)
diff --git a/examples/vision/facedet/yolov5face/python/infer.py b/examples/vision/facedet/yolov5face/python/infer.py
index a9f044682..ca907afb4 100644
--- a/examples/vision/facedet/yolov5face/python/infer.py
+++ b/examples/vision/facedet/yolov5face/python/infer.py
@@ -43,7 +43,7 @@ model = fd.vision.facedet.YOLOv5Face(args.model, runtime_option=runtime_option)
 
 # 预测图片检测结果
 im = cv2.imread(args.image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 # 预测结果可视化
 vis_im = fd.vision.vis_face_detection(im, result)
diff --git a/examples/vision/headpose/fsanet/python/infer.py b/examples/vision/headpose/fsanet/python/infer.py
index 866ce6d5c..488e35153 100644
--- a/examples/vision/headpose/fsanet/python/infer.py
+++ b/examples/vision/headpose/fsanet/python/infer.py
@@ -80,7 +80,7 @@ model = fd.vision.headpose.FSANet(args.model, runtime_option=runtime_option)
 
 # for image
 im = cv2.imread(args.image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 # 可视化结果
 vis_im = fd.vision.vis_headpose(im, result)
diff --git a/examples/vision/matting/modnet/python/infer.py b/examples/vision/matting/modnet/python/infer.py
index 408ba2340..abb9be037 100644
--- a/examples/vision/matting/modnet/python/infer.py
+++ b/examples/vision/matting/modnet/python/infer.py
@@ -52,7 +52,7 @@ model.size = (256, 256)
 # 预测图片抠图结果
 im = cv2.imread(args.image)
 bg = cv2.imread(args.bg)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 # 可视化结果
 vis_im = fd.vision.vis_matting_alpha(im, result)
diff --git a/examples/vision/matting/ppmatting/python/infer.py b/examples/vision/matting/ppmatting/python/infer.py
index 61031e1b6..32a81eca7 100644
--- a/examples/vision/matting/ppmatting/python/infer.py
+++ b/examples/vision/matting/ppmatting/python/infer.py
@@ -56,7 +56,7 @@ model = fd.vision.matting.PPMatting(
 # 预测图片抠图结果
 im = cv2.imread(args.image)
 bg = cv2.imread(args.bg)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 # 可视化结果
 vis_im = fd.vision.vis_matting(im, result)
diff --git a/examples/vision/matting/rvm/python/infer.py b/examples/vision/matting/rvm/python/infer.py
index 11951b00f..fcde64fb2 100755
--- a/examples/vision/matting/rvm/python/infer.py
+++ b/examples/vision/matting/rvm/python/infer.py
@@ -73,7 +73,7 @@ if args.video is not None:
     # for video
     cap = cv2.VideoCapture(args.video)
     # Define the codec and create VideoWriter object
-    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
     composition = cv2.VideoWriter(output_composition, fourcc, 20.0,
                                   (1080, 1920))
     alpha = cv2.VideoWriter(output_alpha, fourcc, 20.0, (1080, 1920))
@@ -100,7 +100,7 @@ if args.video is not None:
 if args.image is not None:
     # for image
     im = cv2.imread(args.image)
-    result = model.predict(im.copy())
+    result = model.predict(im)
     print(result)
     # 可视化结果
     vis_im = fd.vision.vis_matting(im, result)
diff --git a/examples/vision/segmentation/paddleseg/python/infer.py b/examples/vision/segmentation/paddleseg/python/infer.py
index 866e32bfb..9df7665a2 100644
--- a/examples/vision/segmentation/paddleseg/python/infer.py
+++ b/examples/vision/segmentation/paddleseg/python/infer.py
@@ -49,7 +49,7 @@ model = fd.vision.segmentation.PaddleSegModel(
 
 # 预测图片分割结果
 im = cv2.imread(args.image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 
 # 可视化结果
diff --git a/examples/vision/segmentation/paddleseg/quantize/python/infer.py b/examples/vision/segmentation/paddleseg/quantize/python/infer.py
index f95f04d17..85a875c1e 100644
--- a/examples/vision/segmentation/paddleseg/quantize/python/infer.py
+++ b/examples/vision/segmentation/paddleseg/quantize/python/infer.py
@@ -72,5 +72,5 @@ model = fd.vision.segmentation.PaddleSegModel(
 
 # 预测图片检测结果
 im = cv2.imread(args.image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
diff --git a/examples/vision/segmentation/paddleseg/rknpu2/python/infer.py b/examples/vision/segmentation/paddleseg/rknpu2/python/infer.py
index 8841132a9..d7239eb42 100644
--- a/examples/vision/segmentation/paddleseg/rknpu2/python/infer.py
+++ b/examples/vision/segmentation/paddleseg/rknpu2/python/infer.py
@@ -53,7 +53,7 @@ model.disable_normalize_and_permute()
 
 # 预测图片分割结果
 im = cv2.imread(args.image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 
 # 可视化结果

From bfc9ac9ee1f0810fdbeb50761a96aa6a21b60285 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Mon, 12 Dec 2022 04:13:40 +0000
Subject: [PATCH 03/30] Update new Api, fastdeploy::vision::Visualize to
 fastdeploy::vision

---
 examples/vision/detection/nanodet_plus/cpp/infer.cc      | 6 +++---
 examples/vision/detection/scaledyolov4/cpp/infer.cc      | 6 +++---
 examples/vision/detection/yolor/cpp/infer.cc             | 6 +++---
 examples/vision/detection/yolov5/cpp/infer.cc            | 6 +++---
 examples/vision/detection/yolov5/quantize/cpp/infer.cc   | 2 +-
 examples/vision/detection/yolov5lite/cpp/infer.cc        | 6 +++---
 examples/vision/detection/yolov6/cpp/infer.cc            | 6 +++---
 examples/vision/detection/yolov6/quantize/cpp/infer.cc   | 2 +-
 examples/vision/detection/yolov7/cpp/infer.cc            | 6 +++---
 examples/vision/detection/yolov7/quantize/cpp/infer.cc   | 2 +-
 examples/vision/detection/yolov7end2end_ort/cpp/infer.cc | 6 +++---
 examples/vision/detection/yolov7end2end_trt/cpp/infer.cc | 6 +++---
 examples/vision/detection/yolox/cpp/infer.cc             | 6 +++---
 examples/vision/facedet/retinaface/cpp/infer.cc          | 6 +++---
 examples/vision/facedet/scrfd/cpp/infer.cc               | 6 +++---
 examples/vision/facedet/scrfd/rknpu2/cpp/infer.cc        | 2 +-
 examples/vision/facedet/ultraface/cpp/infer.cc           | 6 +++---
 examples/vision/facedet/yolov5face/cpp/infer.cc          | 6 +++---
 examples/vision/matting/modnet/cpp/infer.cc              | 6 +++---
 examples/vision/matting/ppmatting/cpp/infer.cc           | 6 +++---
 20 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/examples/vision/detection/nanodet_plus/cpp/infer.cc b/examples/vision/detection/nanodet_plus/cpp/infer.cc
index 8443639cc..8c9f3338f 100644
--- a/examples/vision/detection/nanodet_plus/cpp/infer.cc
+++ b/examples/vision/detection/nanodet_plus/cpp/infer.cc
@@ -30,7 +30,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
     return;
   }
   std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -55,7 +55,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -82,7 +82,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/detection/scaledyolov4/cpp/infer.cc b/examples/vision/detection/scaledyolov4/cpp/infer.cc
index 7d912b223..040823a1d 100644
--- a/examples/vision/detection/scaledyolov4/cpp/infer.cc
+++ b/examples/vision/detection/scaledyolov4/cpp/infer.cc
@@ -31,7 +31,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -56,7 +56,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -83,7 +83,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/detection/yolor/cpp/infer.cc b/examples/vision/detection/yolor/cpp/infer.cc
index 0fe8913d4..105855a02 100644
--- a/examples/vision/detection/yolor/cpp/infer.cc
+++ b/examples/vision/detection/yolor/cpp/infer.cc
@@ -31,7 +31,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -55,7 +55,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -81,7 +81,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/detection/yolov5/cpp/infer.cc b/examples/vision/detection/yolov5/cpp/infer.cc
index a7ac1fe81..dea29450b 100644
--- a/examples/vision/detection/yolov5/cpp/infer.cc
+++ b/examples/vision/detection/yolov5/cpp/infer.cc
@@ -31,7 +31,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -55,7 +55,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -81,7 +81,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/detection/yolov5/quantize/cpp/infer.cc b/examples/vision/detection/yolov5/quantize/cpp/infer.cc
index e429b2aad..690c19ddf 100644
--- a/examples/vision/detection/yolov5/quantize/cpp/infer.cc
+++ b/examples/vision/detection/yolov5/quantize/cpp/infer.cc
@@ -39,7 +39,7 @@ void InitAndInfer(const std::string& model_dir, const std::string& image_file,
 
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/detection/yolov5lite/cpp/infer.cc b/examples/vision/detection/yolov5lite/cpp/infer.cc
index ac32bca93..0d9c58201 100644
--- a/examples/vision/detection/yolov5lite/cpp/infer.cc
+++ b/examples/vision/detection/yolov5lite/cpp/infer.cc
@@ -31,7 +31,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -56,7 +56,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -83,7 +83,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/detection/yolov6/cpp/infer.cc b/examples/vision/detection/yolov6/cpp/infer.cc
index 72b2e7bed..fd00796f1 100644
--- a/examples/vision/detection/yolov6/cpp/infer.cc
+++ b/examples/vision/detection/yolov6/cpp/infer.cc
@@ -31,7 +31,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -55,7 +55,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -81,7 +81,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/detection/yolov6/quantize/cpp/infer.cc b/examples/vision/detection/yolov6/quantize/cpp/infer.cc
index b40200962..57754ca30 100644
--- a/examples/vision/detection/yolov6/quantize/cpp/infer.cc
+++ b/examples/vision/detection/yolov6/quantize/cpp/infer.cc
@@ -39,7 +39,7 @@ void InitAndInfer(const std::string& model_dir, const std::string& image_file,
 
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/detection/yolov7/cpp/infer.cc b/examples/vision/detection/yolov7/cpp/infer.cc
index cf79a16ad..803a7227a 100644
--- a/examples/vision/detection/yolov7/cpp/infer.cc
+++ b/examples/vision/detection/yolov7/cpp/infer.cc
@@ -31,7 +31,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -55,7 +55,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -81,7 +81,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/detection/yolov7/quantize/cpp/infer.cc b/examples/vision/detection/yolov7/quantize/cpp/infer.cc
index 0eda80b6c..6033e3454 100644
--- a/examples/vision/detection/yolov7/quantize/cpp/infer.cc
+++ b/examples/vision/detection/yolov7/quantize/cpp/infer.cc
@@ -39,7 +39,7 @@ void InitAndInfer(const std::string& model_dir, const std::string& image_file,
 
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/detection/yolov7end2end_ort/cpp/infer.cc b/examples/vision/detection/yolov7end2end_ort/cpp/infer.cc
index a0e70544a..72ffe1a91 100644
--- a/examples/vision/detection/yolov7end2end_ort/cpp/infer.cc
+++ b/examples/vision/detection/yolov7end2end_ort/cpp/infer.cc
@@ -31,7 +31,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -56,7 +56,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -83,7 +83,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/detection/yolov7end2end_trt/cpp/infer.cc b/examples/vision/detection/yolov7end2end_trt/cpp/infer.cc
index 1c7a17d37..20d963edf 100644
--- a/examples/vision/detection/yolov7end2end_trt/cpp/infer.cc
+++ b/examples/vision/detection/yolov7end2end_trt/cpp/infer.cc
@@ -31,7 +31,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -56,7 +56,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -83,7 +83,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/detection/yolox/cpp/infer.cc b/examples/vision/detection/yolox/cpp/infer.cc
index 2eeaccbf8..bed65f982 100644
--- a/examples/vision/detection/yolox/cpp/infer.cc
+++ b/examples/vision/detection/yolox/cpp/infer.cc
@@ -31,7 +31,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -55,7 +55,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -81,7 +81,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/facedet/retinaface/cpp/infer.cc b/examples/vision/facedet/retinaface/cpp/infer.cc
index a1fd27b6e..ceacce746 100644
--- a/examples/vision/facedet/retinaface/cpp/infer.cc
+++ b/examples/vision/facedet/retinaface/cpp/infer.cc
@@ -31,7 +31,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisFaceDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -55,7 +55,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisFaceDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -81,7 +81,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisFaceDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/facedet/scrfd/cpp/infer.cc b/examples/vision/facedet/scrfd/cpp/infer.cc
index c804218ee..28823242a 100644
--- a/examples/vision/facedet/scrfd/cpp/infer.cc
+++ b/examples/vision/facedet/scrfd/cpp/infer.cc
@@ -31,7 +31,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisFaceDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -55,7 +55,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisFaceDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -81,7 +81,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisFaceDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/facedet/scrfd/rknpu2/cpp/infer.cc b/examples/vision/facedet/scrfd/rknpu2/cpp/infer.cc
index a01f1b184..cd06004fe 100644
--- a/examples/vision/facedet/scrfd/rknpu2/cpp/infer.cc
+++ b/examples/vision/facedet/scrfd/rknpu2/cpp/infer.cc
@@ -73,7 +73,7 @@ void InferScrfd(const std::string& device) {
          (dur / CLOCKS_PER_SEC));
 
   std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::Visualize::VisFaceDetection(im, res);
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
\ No newline at end of file
diff --git a/examples/vision/facedet/ultraface/cpp/infer.cc b/examples/vision/facedet/ultraface/cpp/infer.cc
index b45bb3b0b..6327c63d6 100644
--- a/examples/vision/facedet/ultraface/cpp/infer.cc
+++ b/examples/vision/facedet/ultraface/cpp/infer.cc
@@ -31,7 +31,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisFaceDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -55,7 +55,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisFaceDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -81,7 +81,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisFaceDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/facedet/yolov5face/cpp/infer.cc b/examples/vision/facedet/yolov5face/cpp/infer.cc
index 418834e1e..58e15c8ef 100644
--- a/examples/vision/facedet/yolov5face/cpp/infer.cc
+++ b/examples/vision/facedet/yolov5face/cpp/infer.cc
@@ -31,7 +31,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisFaceDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -55,7 +55,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisFaceDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -81,7 +81,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::Visualize::VisFaceDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im_bak, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/matting/modnet/cpp/infer.cc b/examples/vision/matting/modnet/cpp/infer.cc
index fe1ebc910..d11c9717f 100644
--- a/examples/vision/matting/modnet/cpp/infer.cc
+++ b/examples/vision/matting/modnet/cpp/infer.cc
@@ -34,7 +34,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file,
 
   auto vis_im = fastdeploy::vision::VisMatting(im_bak, res);
   auto vis_im_with_bg =
-      fastdeploy::vision::Visualize::SwapBackgroundMatting(im_bak, bg, res);
+      fastdeploy::vision::SwapBackgroundMatting(im_bak, bg, res);
   cv::imwrite("visualized_result.jpg", vis_im_with_bg);
   cv::imwrite("visualized_result_fg.jpg", vis_im);
   std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
@@ -65,7 +65,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file,
 
   auto vis_im = fastdeploy::vision::VisMatting(im_bak, res);
   auto vis_im_with_bg =
-      fastdeploy::vision::Visualize::SwapBackgroundMatting(im_bak, bg, res);
+      fastdeploy::vision::SwapBackgroundMatting(im_bak, bg, res);
   cv::imwrite("visualized_result.jpg", vis_im_with_bg);
   cv::imwrite("visualized_result_fg.jpg", vis_im);
   std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
@@ -97,7 +97,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file,
 
   auto vis_im = fastdeploy::vision::VisMatting(im_bak, res);
   auto vis_im_with_bg =
-      fastdeploy::vision::Visualize::SwapBackgroundMatting(im_bak, bg, res);
+      fastdeploy::vision::SwapBackgroundMatting(im_bak, bg, res);
   cv::imwrite("visualized_result.jpg", vis_im_with_bg);
   cv::imwrite("visualized_result_fg.jpg", vis_im);
   std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
diff --git a/examples/vision/matting/ppmatting/cpp/infer.cc b/examples/vision/matting/ppmatting/cpp/infer.cc
index 304e4239a..9c917a77f 100644
--- a/examples/vision/matting/ppmatting/cpp/infer.cc
+++ b/examples/vision/matting/ppmatting/cpp/infer.cc
@@ -44,7 +44,7 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file,
   }
   auto vis_im = fastdeploy::vision::VisMatting(im_bak, res);
   auto vis_im_with_bg =
-      fastdeploy::vision::Visualize::SwapBackgroundMatting(im_bak, bg, res);
+      fastdeploy::vision::SwapBackgroundMatting(im_bak, bg, res);
   cv::imwrite("visualized_result.jpg", vis_im_with_bg);
   cv::imwrite("visualized_result_fg.jpg", vis_im);
   std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
@@ -78,7 +78,7 @@ void GpuInfer(const std::string& model_dir, const std::string& image_file,
   }
   auto vis_im = fastdeploy::vision::VisMatting(im_bak, res);
   auto vis_im_with_bg =
-      fastdeploy::vision::Visualize::SwapBackgroundMatting(im_bak, bg, res);
+      fastdeploy::vision::SwapBackgroundMatting(im_bak, bg, res);
   cv::imwrite("visualized_result.jpg", vis_im_with_bg);
   cv::imwrite("visualized_result_fg.jpg", vis_im);
   std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
@@ -113,7 +113,7 @@ void TrtInfer(const std::string& model_dir, const std::string& image_file,
   }
   auto vis_im = fastdeploy::vision::VisMatting(im_bak, res);
   auto vis_im_with_bg =
-      fastdeploy::vision::Visualize::SwapBackgroundMatting(im_bak, bg, res);
+      fastdeploy::vision::SwapBackgroundMatting(im_bak, bg, res);
   cv::imwrite("visualized_result.jpg", vis_im_with_bg);
   cv::imwrite("visualized_result_fg.jpg", vis_im);
   std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "

From 37359c65f3af287f97462ee8853786304fb52496 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Mon, 12 Dec 2022 10:59:15 +0000
Subject: [PATCH 04/30] Update SwapBackgroundSegmentation &&
 SwapBackgroundMatting to SwapBackground

---
 .../vision/detection/nanodet_plus/cpp/infer.cc    |  9 +++------
 .../vision/detection/scaledyolov4/cpp/infer.cc    |  9 +++------
 examples/vision/detection/yolor/cpp/infer.cc      |  9 +++------
 examples/vision/detection/yolov5/cpp/infer.cc     |  9 +++------
 .../vision/detection/yolov5/quantize/cpp/infer.cc |  3 +--
 examples/vision/detection/yolov5lite/cpp/infer.cc |  9 +++------
 examples/vision/detection/yolov6/cpp/infer.cc     |  9 +++------
 .../vision/detection/yolov6/quantize/cpp/infer.cc |  3 +--
 examples/vision/detection/yolov7/cpp/infer.cc     |  9 +++------
 .../vision/detection/yolov7/quantize/cpp/infer.cc |  3 +--
 .../detection/yolov7end2end_ort/cpp/infer.cc      |  9 +++------
 .../detection/yolov7end2end_trt/cpp/infer.cc      | 11 ++++-------
 examples/vision/detection/yolox/cpp/infer.cc      |  9 +++------
 examples/vision/facedet/retinaface/cpp/infer.cc   |  9 +++------
 examples/vision/facedet/scrfd/cpp/infer.cc        |  9 +++------
 examples/vision/facedet/ultraface/cpp/infer.cc    |  9 +++------
 examples/vision/facedet/yolov5face/cpp/infer.cc   |  9 +++------
 examples/vision/matting/modnet/cpp/infer.cc       | 15 ++++++---------
 examples/vision/matting/modnet/python/infer.py    |  2 +-
 examples/vision/matting/ppmatting/cpp/infer.cc    | 15 ++++++---------
 examples/vision/matting/ppmatting/python/infer.py |  2 +-
 examples/vision/matting/rvm/python/infer.py       |  4 ++--
 22 files changed, 62 insertions(+), 113 deletions(-)

diff --git a/examples/vision/detection/nanodet_plus/cpp/infer.cc b/examples/vision/detection/nanodet_plus/cpp/infer.cc
index 8c9f3338f..ec0bbee98 100644
--- a/examples/vision/detection/nanodet_plus/cpp/infer.cc
+++ b/examples/vision/detection/nanodet_plus/cpp/infer.cc
@@ -22,7 +22,6 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -30,7 +29,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
     return;
   }
   std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -46,7 +45,6 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -55,7 +53,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -73,7 +71,6 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -82,7 +79,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/detection/scaledyolov4/cpp/infer.cc b/examples/vision/detection/scaledyolov4/cpp/infer.cc
index 040823a1d..683156cc9 100644
--- a/examples/vision/detection/scaledyolov4/cpp/infer.cc
+++ b/examples/vision/detection/scaledyolov4/cpp/infer.cc
@@ -22,7 +22,6 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -31,7 +30,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -47,7 +46,6 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -56,7 +54,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -74,7 +72,6 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -83,7 +80,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/detection/yolor/cpp/infer.cc b/examples/vision/detection/yolor/cpp/infer.cc
index 105855a02..bacc18b40 100644
--- a/examples/vision/detection/yolor/cpp/infer.cc
+++ b/examples/vision/detection/yolor/cpp/infer.cc
@@ -22,7 +22,6 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -31,7 +30,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -46,7 +45,6 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -55,7 +53,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -72,7 +70,6 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -81,7 +78,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/detection/yolov5/cpp/infer.cc b/examples/vision/detection/yolov5/cpp/infer.cc
index dea29450b..1c3907918 100644
--- a/examples/vision/detection/yolov5/cpp/infer.cc
+++ b/examples/vision/detection/yolov5/cpp/infer.cc
@@ -22,7 +22,6 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -31,7 +30,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -46,7 +45,6 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -55,7 +53,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -72,7 +70,6 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -81,7 +78,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/detection/yolov5/quantize/cpp/infer.cc b/examples/vision/detection/yolov5/quantize/cpp/infer.cc
index 690c19ddf..1addf1507 100644
--- a/examples/vision/detection/yolov5/quantize/cpp/infer.cc
+++ b/examples/vision/detection/yolov5/quantize/cpp/infer.cc
@@ -29,7 +29,6 @@ void InitAndInfer(const std::string& model_dir, const std::string& image_file,
   assert(model.Initialized());
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -39,7 +38,7 @@ void InitAndInfer(const std::string& model_dir, const std::string& image_file,
 
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/detection/yolov5lite/cpp/infer.cc b/examples/vision/detection/yolov5lite/cpp/infer.cc
index 0d9c58201..0a3f7b81b 100644
--- a/examples/vision/detection/yolov5lite/cpp/infer.cc
+++ b/examples/vision/detection/yolov5lite/cpp/infer.cc
@@ -22,7 +22,6 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -31,7 +30,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -47,7 +46,6 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -56,7 +54,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -74,7 +72,6 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -83,7 +80,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/detection/yolov6/cpp/infer.cc b/examples/vision/detection/yolov6/cpp/infer.cc
index fd00796f1..dbca64f40 100644
--- a/examples/vision/detection/yolov6/cpp/infer.cc
+++ b/examples/vision/detection/yolov6/cpp/infer.cc
@@ -22,7 +22,6 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -31,7 +30,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -46,7 +45,6 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -55,7 +53,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -72,7 +70,6 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -81,7 +78,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/detection/yolov6/quantize/cpp/infer.cc b/examples/vision/detection/yolov6/quantize/cpp/infer.cc
index 57754ca30..adda977f3 100644
--- a/examples/vision/detection/yolov6/quantize/cpp/infer.cc
+++ b/examples/vision/detection/yolov6/quantize/cpp/infer.cc
@@ -29,7 +29,6 @@ void InitAndInfer(const std::string& model_dir, const std::string& image_file,
   assert(model.Initialized());
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -39,7 +38,7 @@ void InitAndInfer(const std::string& model_dir, const std::string& image_file,
 
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/detection/yolov7/cpp/infer.cc b/examples/vision/detection/yolov7/cpp/infer.cc
index 803a7227a..5fd848b8e 100644
--- a/examples/vision/detection/yolov7/cpp/infer.cc
+++ b/examples/vision/detection/yolov7/cpp/infer.cc
@@ -22,7 +22,6 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -31,7 +30,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -46,7 +45,6 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -55,7 +53,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -72,7 +70,6 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -81,7 +78,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/detection/yolov7/quantize/cpp/infer.cc b/examples/vision/detection/yolov7/quantize/cpp/infer.cc
index 6033e3454..9ae42b582 100644
--- a/examples/vision/detection/yolov7/quantize/cpp/infer.cc
+++ b/examples/vision/detection/yolov7/quantize/cpp/infer.cc
@@ -29,7 +29,6 @@ void InitAndInfer(const std::string& model_dir, const std::string& image_file,
   assert(model.Initialized());
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -39,7 +38,7 @@ void InitAndInfer(const std::string& model_dir, const std::string& image_file,
 
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/detection/yolov7end2end_ort/cpp/infer.cc b/examples/vision/detection/yolov7end2end_ort/cpp/infer.cc
index 72ffe1a91..5c9df2816 100644
--- a/examples/vision/detection/yolov7end2end_ort/cpp/infer.cc
+++ b/examples/vision/detection/yolov7end2end_ort/cpp/infer.cc
@@ -22,7 +22,6 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -31,7 +30,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -47,7 +46,6 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -56,7 +54,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -74,7 +72,6 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -83,7 +80,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/detection/yolov7end2end_trt/cpp/infer.cc b/examples/vision/detection/yolov7end2end_trt/cpp/infer.cc
index 20d963edf..4f7a2f791 100644
--- a/examples/vision/detection/yolov7end2end_trt/cpp/infer.cc
+++ b/examples/vision/detection/yolov7end2end_trt/cpp/infer.cc
@@ -22,7 +22,6 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -31,7 +30,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -47,7 +46,6 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -56,7 +54,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -74,7 +72,6 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -83,7 +80,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -107,4 +104,4 @@ int main(int argc, char* argv[]) {
     TrtInfer(argv[1], argv[2]);
   }
   return 0;
-}
\ No newline at end of file
+}
diff --git a/examples/vision/detection/yolox/cpp/infer.cc b/examples/vision/detection/yolox/cpp/infer.cc
index bed65f982..836ab7e63 100644
--- a/examples/vision/detection/yolox/cpp/infer.cc
+++ b/examples/vision/detection/yolox/cpp/infer.cc
@@ -22,7 +22,6 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -31,7 +30,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -46,7 +45,6 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -55,7 +53,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -72,7 +70,6 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::DetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -81,7 +78,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/facedet/retinaface/cpp/infer.cc b/examples/vision/facedet/retinaface/cpp/infer.cc
index ceacce746..f125e4ba7 100644
--- a/examples/vision/facedet/retinaface/cpp/infer.cc
+++ b/examples/vision/facedet/retinaface/cpp/infer.cc
@@ -22,7 +22,6 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::FaceDetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -31,7 +30,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisFaceDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -46,7 +45,6 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::FaceDetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -55,7 +53,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisFaceDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -72,7 +70,6 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::FaceDetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -81,7 +78,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisFaceDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/facedet/scrfd/cpp/infer.cc b/examples/vision/facedet/scrfd/cpp/infer.cc
index 28823242a..c09403741 100644
--- a/examples/vision/facedet/scrfd/cpp/infer.cc
+++ b/examples/vision/facedet/scrfd/cpp/infer.cc
@@ -22,7 +22,6 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::FaceDetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -31,7 +30,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisFaceDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -46,7 +45,6 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::FaceDetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -55,7 +53,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisFaceDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -72,7 +70,6 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::FaceDetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -81,7 +78,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisFaceDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/facedet/ultraface/cpp/infer.cc b/examples/vision/facedet/ultraface/cpp/infer.cc
index 6327c63d6..b50674e1d 100644
--- a/examples/vision/facedet/ultraface/cpp/infer.cc
+++ b/examples/vision/facedet/ultraface/cpp/infer.cc
@@ -22,7 +22,6 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::FaceDetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -31,7 +30,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisFaceDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -46,7 +45,6 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::FaceDetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -55,7 +53,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisFaceDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -72,7 +70,6 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::FaceDetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -81,7 +78,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisFaceDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/facedet/yolov5face/cpp/infer.cc b/examples/vision/facedet/yolov5face/cpp/infer.cc
index 58e15c8ef..ecdcfcd0c 100644
--- a/examples/vision/facedet/yolov5face/cpp/infer.cc
+++ b/examples/vision/facedet/yolov5face/cpp/infer.cc
@@ -22,7 +22,6 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::FaceDetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -31,7 +30,7 @@ void CpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisFaceDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -46,7 +45,6 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::FaceDetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -55,7 +53,7 @@ void GpuInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisFaceDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -72,7 +70,6 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
 
   fastdeploy::vision::FaceDetectionResult res;
   if (!model.Predict(&im, &res)) {
@@ -81,7 +78,7 @@ void TrtInfer(const std::string& model_file, const std::string& image_file) {
   }
   std::cout << res.Str() << std::endl;
 
-  auto vis_im = fastdeploy::vision::VisFaceDetection(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisFaceDetection(im, res);
   cv::imwrite("vis_result.jpg", vis_im);
   std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
diff --git a/examples/vision/matting/modnet/cpp/infer.cc b/examples/vision/matting/modnet/cpp/infer.cc
index d11c9717f..aa280eddd 100644
--- a/examples/vision/matting/modnet/cpp/infer.cc
+++ b/examples/vision/matting/modnet/cpp/infer.cc
@@ -23,7 +23,6 @@ void CpuInfer(const std::string& model_file, const std::string& image_file,
   }
   model.size = {256, 256};
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
   cv::Mat bg = cv::imread(background_file);
 
   fastdeploy::vision::MattingResult res;
@@ -32,9 +31,9 @@ void CpuInfer(const std::string& model_file, const std::string& image_file,
     return;
   }
 
-  auto vis_im = fastdeploy::vision::VisMatting(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisMatting(im, res);
   auto vis_im_with_bg =
-      fastdeploy::vision::SwapBackgroundMatting(im_bak, bg, res);
+      fastdeploy::vision::SwapBackground(im, bg, res);
   cv::imwrite("visualized_result.jpg", vis_im_with_bg);
   cv::imwrite("visualized_result_fg.jpg", vis_im);
   std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
@@ -54,7 +53,6 @@ void GpuInfer(const std::string& model_file, const std::string& image_file,
   model.size = {256, 256};
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
   cv::Mat bg = cv::imread(background_file);
 
   fastdeploy::vision::MattingResult res;
@@ -63,9 +61,9 @@ void GpuInfer(const std::string& model_file, const std::string& image_file,
     return;
   }
 
-  auto vis_im = fastdeploy::vision::VisMatting(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisMatting(im, res);
   auto vis_im_with_bg =
-      fastdeploy::vision::SwapBackgroundMatting(im_bak, bg, res);
+      fastdeploy::vision::SwapBackground(im, bg, res);
   cv::imwrite("visualized_result.jpg", vis_im_with_bg);
   cv::imwrite("visualized_result_fg.jpg", vis_im);
   std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
@@ -86,7 +84,6 @@ void TrtInfer(const std::string& model_file, const std::string& image_file,
   }
   model.size = {256, 256};
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
   cv::Mat bg = cv::imread(background_file);
 
   fastdeploy::vision::MattingResult res;
@@ -95,9 +92,9 @@ void TrtInfer(const std::string& model_file, const std::string& image_file,
     return;
   }
 
-  auto vis_im = fastdeploy::vision::VisMatting(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisMatting(im, res);
   auto vis_im_with_bg =
-      fastdeploy::vision::SwapBackgroundMatting(im_bak, bg, res);
+      fastdeploy::vision::SwapBackground(im, bg, res);
   cv::imwrite("visualized_result.jpg", vis_im_with_bg);
   cv::imwrite("visualized_result_fg.jpg", vis_im);
   std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
diff --git a/examples/vision/matting/modnet/python/infer.py b/examples/vision/matting/modnet/python/infer.py
index abb9be037..37c749010 100644
--- a/examples/vision/matting/modnet/python/infer.py
+++ b/examples/vision/matting/modnet/python/infer.py
@@ -56,7 +56,7 @@ result = model.predict(im)
 print(result)
 # 可视化结果
 vis_im = fd.vision.vis_matting_alpha(im, result)
-vis_im_with_bg = fd.vision.swap_background_matting(im, bg, result)
+vis_im_with_bg = fd.vision.swap_background(im, bg, result)
 cv2.imwrite("visualized_result_fg.jpg", vis_im)
 cv2.imwrite("visualized_result_replaced_bg.jpg", vis_im_with_bg)
 print(
diff --git a/examples/vision/matting/ppmatting/cpp/infer.cc b/examples/vision/matting/ppmatting/cpp/infer.cc
index 9c917a77f..2acb2a8ca 100644
--- a/examples/vision/matting/ppmatting/cpp/infer.cc
+++ b/examples/vision/matting/ppmatting/cpp/infer.cc
@@ -35,16 +35,15 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file,
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
   cv::Mat bg = cv::imread(background_file);
   fastdeploy::vision::MattingResult res;
   if (!model.Predict(&im, &res)) {
     std::cerr << "Failed to predict." << std::endl;
     return;
   }
-  auto vis_im = fastdeploy::vision::VisMatting(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisMatting(im, res);
   auto vis_im_with_bg =
-      fastdeploy::vision::SwapBackgroundMatting(im_bak, bg, res);
+      fastdeploy::vision::SwapBackground(im, bg, res);
   cv::imwrite("visualized_result.jpg", vis_im_with_bg);
   cv::imwrite("visualized_result_fg.jpg", vis_im);
   std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
@@ -69,16 +68,15 @@ void GpuInfer(const std::string& model_dir, const std::string& image_file,
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
   cv::Mat bg = cv::imread(background_file);
   fastdeploy::vision::MattingResult res;
   if (!model.Predict(&im, &res)) {
     std::cerr << "Failed to predict." << std::endl;
     return;
   }
-  auto vis_im = fastdeploy::vision::VisMatting(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisMatting(im, res);
   auto vis_im_with_bg =
-      fastdeploy::vision::SwapBackgroundMatting(im_bak, bg, res);
+      fastdeploy::vision::SwapBackground(im, bg, res);
   cv::imwrite("visualized_result.jpg", vis_im_with_bg);
   cv::imwrite("visualized_result_fg.jpg", vis_im);
   std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
@@ -104,16 +102,15 @@ void TrtInfer(const std::string& model_dir, const std::string& image_file,
   }
 
   auto im = cv::imread(image_file);
-  auto im_bak = im.clone();
   cv::Mat bg = cv::imread(background_file);
   fastdeploy::vision::MattingResult res;
   if (!model.Predict(&im, &res)) {
     std::cerr << "Failed to predict." << std::endl;
     return;
   }
-  auto vis_im = fastdeploy::vision::VisMatting(im_bak, res);
+  auto vis_im = fastdeploy::vision::VisMatting(im, res);
   auto vis_im_with_bg =
-      fastdeploy::vision::SwapBackgroundMatting(im_bak, bg, res);
+      fastdeploy::vision::SwapBackground(im, bg, res);
   cv::imwrite("visualized_result.jpg", vis_im_with_bg);
   cv::imwrite("visualized_result_fg.jpg", vis_im);
   std::cout << "Visualized result save in ./visualized_result_replaced_bg.jpg "
diff --git a/examples/vision/matting/ppmatting/python/infer.py b/examples/vision/matting/ppmatting/python/infer.py
index 32a81eca7..89913cd11 100644
--- a/examples/vision/matting/ppmatting/python/infer.py
+++ b/examples/vision/matting/ppmatting/python/infer.py
@@ -60,7 +60,7 @@ result = model.predict(im)
 print(result)
 # 可视化结果
 vis_im = fd.vision.vis_matting(im, result)
-vis_im_with_bg = fd.vision.swap_background_matting(im, bg, result)
+vis_im_with_bg = fd.vision.swap_background(im, bg, result)
 cv2.imwrite("visualized_result_fg.jpg", vis_im)
 cv2.imwrite("visualized_result_replaced_bg.jpg", vis_im_with_bg)
 print(
diff --git a/examples/vision/matting/rvm/python/infer.py b/examples/vision/matting/rvm/python/infer.py
index fcde64fb2..0e9eb6b21 100755
--- a/examples/vision/matting/rvm/python/infer.py
+++ b/examples/vision/matting/rvm/python/infer.py
@@ -86,7 +86,7 @@ if args.video is not None:
             break
         result = model.predict(frame)
         vis_im = fd.vision.vis_matting(frame, result)
-        vis_im_with_bg = fd.vision.swap_background_matting(frame, bg, result)
+        vis_im_with_bg = fd.vision.swap_background(frame, bg, result)
         alpha.write(vis_im)
         composition.write(vis_im_with_bg)
         cv2.waitKey(30)
@@ -104,7 +104,7 @@ if args.image is not None:
     print(result)
     # 可视化结果
     vis_im = fd.vision.vis_matting(im, result)
-    vis_im_with_bg = fd.vision.swap_background_matting(im, bg, result)
+    vis_im_with_bg = fd.vision.swap_background(im, bg, result)
     cv2.imwrite("visualized_result_fg.jpg", vis_im)
     cv2.imwrite("visualized_result_replaced_bg.jpg", vis_im_with_bg)
     print(

From 37590ad2dc00abed74f043ce848a3dd179cb451a Mon Sep 17 00:00:00 2001
From: huangjianhui <852142024@qq.com>
Date: Mon, 12 Dec 2022 21:29:57 +0800
Subject: [PATCH 05/30] Update README_CN.md

---
 README_CN.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README_CN.md b/README_CN.md
index 8814215fe..d296e37b3 100755
--- a/README_CN.md
+++ b/README_CN.md
@@ -118,7 +118,7 @@ model = vision.detection.PPYOLOE("ppyoloe_crn_l_300e_coco/model.pdmodel",
                                  "ppyoloe_crn_l_300e_coco/model.pdiparams",
                                  "ppyoloe_crn_l_300e_coco/infer_cfg.yml")
 im = cv2.imread("000000014439.jpg")
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
 
 vis_im = vision.vis_detection(im, result, score_threshold=0.5)

From 8d038e430856110fa666736fd456a6fda0c40464 Mon Sep 17 00:00:00 2001
From: huangjianhui <852142024@qq.com>
Date: Mon, 12 Dec 2022 21:36:15 +0800
Subject: [PATCH 06/30] Update README_CN.md

---
 README_CN.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README_CN.md b/README_CN.md
index d296e37b3..30dc326f8 100755
--- a/README_CN.md
+++ b/README_CN.md
@@ -165,7 +165,7 @@ int main(int argc, char* argv[]) {
   vision::DetectionResult res;
   model.Predict(&im, &res);
 
-  auto vis_im = vision::Visualize::VisDetection(im, res, 0.5);
+  auto vis_im = vision::VisDetection(im, res, 0.5);
   cv::imwrite("vis_image.jpg", vis_im);
   return 0;
 }

From b016b762cf3f9bcd6451c54cdfe16f569feed867 Mon Sep 17 00:00:00 2001
From: huangjianhui <852142024@qq.com>
Date: Wed, 18 Jan 2023 16:40:07 +0800
Subject: [PATCH 07/30] Update preprocessor.h

---
 fastdeploy/vision/segmentation/ppseg/preprocessor.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fastdeploy/vision/segmentation/ppseg/preprocessor.h b/fastdeploy/vision/segmentation/ppseg/preprocessor.h
index 6452e8e0e..1b27863e4 100644
--- a/fastdeploy/vision/segmentation/ppseg/preprocessor.h
+++ b/fastdeploy/vision/segmentation/ppseg/preprocessor.h
@@ -31,7 +31,8 @@ class FASTDEPLOY_DECL PaddleSegPreprocessor {
   /** \brief Process the input image and prepare input tensors for runtime
    *
    * \param[in] images The input image data list, all the elements are returned by cv::imread()
-   * \param[in] outputs The output tensors which will feed in runtime, include image
+   * \param[in] outputs The output tensors which will feed in runtime
+   * \param[in] imgs_info The original input images shape info map, key is "shape_info", value is vector<array<int, 2>> a{{height, width}} 
    * \return true if the preprocess successed, otherwise false
    */
   virtual bool Run(

From 03cdc4cde36056f48bcd1b9bbd41e4ba3e6b07fa Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Wed, 18 Jan 2023 12:20:05 +0000
Subject: [PATCH 08/30] PaddleSeg supports triton serving

---
 .../serving/models/paddleseg/1/README.md      |   3 +
 .../serving/models/paddleseg/config.pbtxt     |  67 ++++++++++
 .../serving/models/postprocess/1/model.py     | 115 +++++++++++++++++
 .../serving/models/postprocess/config.pbtxt   |  30 +++++
 .../serving/models/preprocess/1/deploy.yaml   |  12 ++
 .../serving/models/preprocess/1/model.py      | 117 ++++++++++++++++++
 .../serving/models/preprocess/config.pbtxt    |  34 +++++
 .../serving/models/runtime/1/README.md        |   5 +
 .../serving/models/runtime/config.pbtxt       |  60 +++++++++
 9 files changed, 443 insertions(+)
 create mode 100644 examples/vision/segmentation/paddleseg/serving/models/paddleseg/1/README.md
 create mode 100644 examples/vision/segmentation/paddleseg/serving/models/paddleseg/config.pbtxt
 create mode 100755 examples/vision/segmentation/paddleseg/serving/models/postprocess/1/model.py
 create mode 100644 examples/vision/segmentation/paddleseg/serving/models/postprocess/config.pbtxt
 create mode 100644 examples/vision/segmentation/paddleseg/serving/models/preprocess/1/deploy.yaml
 create mode 100644 examples/vision/segmentation/paddleseg/serving/models/preprocess/1/model.py
 create mode 100644 examples/vision/segmentation/paddleseg/serving/models/preprocess/config.pbtxt
 create mode 100644 examples/vision/segmentation/paddleseg/serving/models/runtime/1/README.md
 create mode 100644 examples/vision/segmentation/paddleseg/serving/models/runtime/config.pbtxt

diff --git a/examples/vision/segmentation/paddleseg/serving/models/paddleseg/1/README.md b/examples/vision/segmentation/paddleseg/serving/models/paddleseg/1/README.md
new file mode 100644
index 000000000..42ae7e483
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/serving/models/paddleseg/1/README.md
@@ -0,0 +1,3 @@
+# PaddleSeg Pipeline
+
+The pipeline directory does not have model files, but a version number directory needs to be maintained.
diff --git a/examples/vision/segmentation/paddleseg/serving/models/paddleseg/config.pbtxt b/examples/vision/segmentation/paddleseg/serving/models/paddleseg/config.pbtxt
new file mode 100644
index 000000000..9571a5b91
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/serving/models/paddleseg/config.pbtxt
@@ -0,0 +1,67 @@
+platform: "ensemble"
+
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_UINT8
+    dims: [-1, -1, -1, 3 ]
+  }
+]
+
+output [
+  {
+    name: "SEG_RESULT"
+    data_type: TYPE_STRING
+    dims: [ -1 ]
+  }
+]
+
+ensemble_scheduling {
+  step [
+    {
+      model_name: "preprocess"
+      model_version: 1
+      input_map {
+        key: "preprocess_input"
+        value: "INPUT"
+      }
+      output_map {
+        key: "preprocess_output_1"
+        value: "RUNTIME_INPUT_1"
+      }
+      output_map {
+        key: "preprocess_output_2"
+        value: "POSTPROCESS_INPUT_2"
+      }
+    },
+    {
+      model_name: "runtime"
+      model_version: 1
+      input_map {
+        key: "x"
+        value: "RUNTIME_INPUT_1"
+      }
+      output_map {
+        key: "argmax_0.tmp_0"
+        value: "RUNTIME_OUTPUT"
+      }
+    },
+    {
+      model_name: "postprocess"
+      model_version: 1
+      input_map {
+        key: "post_input_1"
+        value: "RUNTIME_OUTPUT"
+      }
+      input_map {
+        key: "post_input_2"
+        value: "POSTPROCESS_INPUT_2"
+      }
+      output_map {
+        key: "post_output"
+        value: "SEG_RESULT"
+      }
+    }
+  ]
+}
+
diff --git a/examples/vision/segmentation/paddleseg/serving/models/postprocess/1/model.py b/examples/vision/segmentation/paddleseg/serving/models/postprocess/1/model.py
new file mode 100755
index 000000000..510aad6ea
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/serving/models/postprocess/1/model.py
@@ -0,0 +1,115 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import numpy as np
+import time
+import os
+import fastdeploy as fd
+
+# triton_python_backend_utils is available in every Triton Python model. You
+# need to use this module to create inference requests and responses. It also
+# contains some utility functions for extracting information from model_config
+# and converting Triton input/output types to numpy types.
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    """Your Python model must use the same class name. Every Python model
+    that is created must have "TritonPythonModel" as the class name.
+    """
+
+    def initialize(self, args):
+        """`initialize` is called only once when the model is being loaded.
+        Implementing `initialize` function is optional. This function allows
+        the model to intialize any state associated with this model.
+        Parameters
+        ----------
+        args : dict
+          Both keys and values are strings. The dictionary keys and values are:
+          * model_config: A JSON string containing the model configuration
+          * model_instance_kind: A string containing model instance kind
+          * model_instance_device_id: A string containing model instance device ID
+          * model_repository: Model repository path
+          * model_version: Model version
+          * model_name: Model name
+        """
+        # You must parse model_config. JSON string is not parsed here
+        self.model_config = json.loads(args['model_config'])
+        print("model_config:", self.model_config)
+
+        self.input_names = []
+        for input_config in self.model_config["input"]:
+            self.input_names.append(input_config["name"])
+        print("postprocess input names:", self.input_names)
+
+        self.output_names = []
+        self.output_dtype = []
+        for output_config in self.model_config["output"]:
+            self.output_names.append(output_config["name"])
+            dtype = pb_utils.triton_string_to_numpy(output_config["data_type"])
+            self.output_dtype.append(dtype)
+        print("postprocess output names:", self.output_names)
+
+        yaml_path = os.path.abspath(os.path.dirname(__file__)) + "/deploy.yaml"
+        self.postprocess_ = fd.vision.segmentation.PaddleSegPostprocessor(
+            yaml_path)
+
+    def execute(self, requests):
+        """`execute` must be implemented in every Python model. `execute`
+        function receives a list of pb_utils.InferenceRequest as the only
+        argument. This function is called when an inference is requested
+        for this model. Depending on the batching configuration (e.g. Dynamic
+        Batching) used, `requests` may contain multiple requests. Every
+        Python model, must create one pb_utils.InferenceResponse for every
+        pb_utils.InferenceRequest in `requests`. If there is an error, you can
+        set the error argument when creating a pb_utils.InferenceResponse.
+        Parameters
+        ----------
+        requests : list
+          A list of pb_utils.InferenceRequest
+        Returns
+        -------
+        list
+          A list of pb_utils.InferenceResponse. The length of this list must
+          be the same as `requests`
+        """
+        responses = []
+        for request in requests:
+            infer_outputs = pb_utils.get_input_tensor_by_name(
+                request, self.input_names[0])
+            im_info = pb_utils.get_input_tensor_by_name(request,
+                                                        self.input_names[1])
+            infer_outputs = infer_outputs.as_numpy()
+            im_info = im_info.as_numpy()
+            for i in range(im_info.shape[0]):
+                im_info[i] = json.loads(im_info[i].decode('utf-8').replace(
+                    "'", '"'))
+
+            results = self.postprocess_.run([infer_outputs], im_info[0])
+            r_str = fd.vision.utils.fd_result_to_json(results)
+
+            r_np = np.array(r_str, dtype=np.object_)
+            out_tensor = pb_utils.Tensor(self.output_names[0], r_np)
+            inference_response = pb_utils.InferenceResponse(
+                output_tensors=[out_tensor, ])
+            responses.append(inference_response)
+        return responses
+
+    def finalize(self):
+        """`finalize` is called only once when the model is being unloaded.
+        Implementing `finalize` function is optional. This function allows
+        the model to perform any necessary clean ups before exit.
+        """
+        print('Cleaning up...')
diff --git a/examples/vision/segmentation/paddleseg/serving/models/postprocess/config.pbtxt b/examples/vision/segmentation/paddleseg/serving/models/postprocess/config.pbtxt
new file mode 100644
index 000000000..81f31ba08
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/serving/models/postprocess/config.pbtxt
@@ -0,0 +1,30 @@
+name: "postprocess"
+backend: "python"
+
+input [
+  {
+    name: "post_input_1"
+    data_type: TYPE_INT32
+    dims: [-1, -1, -1]
+  },
+  {
+    name: "post_input_2"
+    data_type: TYPE_STRING
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "post_output"
+    data_type: TYPE_STRING
+    dims: [ -1 ]
+  }
+]
+
+instance_group [
+  {
+      count: 1
+      kind: KIND_CPU
+  }
+]
diff --git a/examples/vision/segmentation/paddleseg/serving/models/preprocess/1/deploy.yaml b/examples/vision/segmentation/paddleseg/serving/models/preprocess/1/deploy.yaml
new file mode 100644
index 000000000..6d33e5009
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/serving/models/preprocess/1/deploy.yaml
@@ -0,0 +1,12 @@
+Deploy:
+  input_shape:
+  - -1
+  - 3
+  - -1
+  - -1
+  model: model.pdmodel
+  output_dtype: int32
+  output_op: argmax
+  params: model.pdiparams
+  transforms:
+  - type: Normalize
diff --git a/examples/vision/segmentation/paddleseg/serving/models/preprocess/1/model.py b/examples/vision/segmentation/paddleseg/serving/models/preprocess/1/model.py
new file mode 100644
index 000000000..48a72d6fa
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/serving/models/preprocess/1/model.py
@@ -0,0 +1,117 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import numpy as np
+import os
+
+import fastdeploy as fd
+
+# triton_python_backend_utils is available in every Triton Python model. You
+# need to use this module to create inference requests and responses. It also
+# contains some utility functions for extracting information from model_config
+# and converting Triton input/output types to numpy types.
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+    """Your Python model must use the same class name. Every Python model
+    that is created must have "TritonPythonModel" as the class name.
+    """
+
+    def initialize(self, args):
+        """`initialize` is called only once when the model is being loaded.
+        Implementing `initialize` function is optional. This function allows
+        the model to intialize any state associated with this model.
+        Parameters
+        ----------
+        args : dict
+          Both keys and values are strings. The dictionary keys and values are:
+          * model_config: A JSON string containing the model configuration
+          * model_instance_kind: A string containing model instance kind
+          * model_instance_device_id: A string containing model instance device ID
+          * model_repository: Model repository path
+          * model_version: Model version
+          * model_name: Model name
+        """
+        # You must parse model_config. JSON string is not parsed here
+        self.model_config = json.loads(args['model_config'])
+        print("model_config:", self.model_config)
+
+        self.input_names = []
+        for input_config in self.model_config["input"]:
+            self.input_names.append(input_config["name"])
+        print("preprocess input names:", self.input_names)
+
+        self.output_names = []
+        self.output_dtype = []
+        for output_config in self.model_config["output"]:
+            self.output_names.append(output_config["name"])
+            # dtype = pb_utils.triton_string_to_numpy(output_config["data_type"])
+            # self.output_dtype.append(dtype)
+            self.output_dtype.append(output_config["data_type"])
+        print("preprocess output names:", self.output_names)
+
+        # init PaddleSegPreprocess class
+        yaml_path = os.path.abspath(os.path.dirname(__file__)) + "/deploy.yaml"
+        self.preprocess_ = fd.vision.segmentation.PaddleSegPreprocessor(
+            yaml_path)
+        #if args['model_instance_kind'] == 'GPU':
+        #    device_id = int(args['model_instance_device_id'])
+        #    self.preprocess_.use_gpu(device_id)
+
+    def execute(self, requests):
+        """`execute` must be implemented in every Python model. `execute`
+        function receives a list of pb_utils.InferenceRequest as the only
+        argument. This function is called when an inference is requested
+        for this model. Depending on the batching configuration (e.g. Dynamic
+        Batching) used, `requests` may contain multiple requests. Every
+        Python model, must create one pb_utils.InferenceResponse for every
+        pb_utils.InferenceRequest in `requests`. If there is an error, you can
+        set the error argument when creating a pb_utils.InferenceResponse.
+        Parameters
+        ----------
+        requests : list
+          A list of pb_utils.InferenceRequest
+        Returns
+        -------
+        list
+          A list of pb_utils.InferenceResponse. The length of this list must
+          be the same as `requests`
+        """
+        responses = []
+        for request in requests:
+            data = pb_utils.get_input_tensor_by_name(request,
+                                                     self.input_names[0])
+            data = data.as_numpy()
+            outputs, im_info = self.preprocess_.run(data)
+
+            # PaddleSeg preprocess has two outputs
+            dlpack_tensor = outputs[0].to_dlpack()
+            output_tensor_0 = pb_utils.Tensor.from_dlpack(self.output_names[0],
+                                                          dlpack_tensor)
+            output_tensor_1 = pb_utils.Tensor(
+                self.output_names[1], np.array(
+                    [im_info], dtype=np.object_))
+            inference_response = pb_utils.InferenceResponse(
+                output_tensors=[output_tensor_0, output_tensor_1])
+            responses.append(inference_response)
+        return responses
+
+    def finalize(self):
+        """`finalize` is called only once when the model is being unloaded.
+        Implementing `finalize` function is optional. This function allows
+        the model to perform any necessary clean ups before exit.
+        """
+        print('Cleaning up...')
diff --git a/examples/vision/segmentation/paddleseg/serving/models/preprocess/config.pbtxt b/examples/vision/segmentation/paddleseg/serving/models/preprocess/config.pbtxt
new file mode 100644
index 000000000..01cb94869
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/serving/models/preprocess/config.pbtxt
@@ -0,0 +1,34 @@
+name: "preprocess"
+backend: "python"
+
+input [
+  {
+    name: "preprocess_input"
+    data_type: TYPE_UINT8
+    dims: [-1, -1, -1, 3 ]
+  }
+]
+
+output [
+  {
+    name: "preprocess_output_1"
+    data_type: TYPE_FP32
+    dims: [-1, 3, -1, -1 ]
+  },
+  {
+    name: "preprocess_output_2"
+    data_type: TYPE_STRING
+    dims: [ -1]
+  }
+]
+
+instance_group [
+  {
+    # The number of instances is 1
+    count: 1
+    # Use CPU, GPU inference option is:KIND_GPU
+    kind: KIND_CPU
+    # The instance is deployed on the 0th GPU card
+    # gpus: [0]
+  }
+]
diff --git a/examples/vision/segmentation/paddleseg/serving/models/runtime/1/README.md b/examples/vision/segmentation/paddleseg/serving/models/runtime/1/README.md
new file mode 100644
index 000000000..1e5d914b4
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/serving/models/runtime/1/README.md
@@ -0,0 +1,5 @@
+# Runtime Directory
+
+This directory holds the model files.
+Paddle models must be model.pdmodel and model.pdiparams files.
+ONNX models must be model.onnx files.
diff --git a/examples/vision/segmentation/paddleseg/serving/models/runtime/config.pbtxt b/examples/vision/segmentation/paddleseg/serving/models/runtime/config.pbtxt
new file mode 100644
index 000000000..bd145c590
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/serving/models/runtime/config.pbtxt
@@ -0,0 +1,60 @@
+# optional, If name is specified it must match the name of the model repository directory containing the model.
+name: "runtime"
+backend: "fastdeploy"
+
+# Input configuration of the model
+input [
+  {
+    # input name
+    name: "x"
+    # input type such as TYPE_FP32、TYPE_UINT8、TYPE_INT8、TYPE_INT16、TYPE_INT32、TYPE_INT64、TYPE_FP16、TYPE_STRING
+    data_type: TYPE_FP32
+    # input shape
+    dims: [-1, 3, -1, -1 ]
+  }
+]
+
+# The output of the model is configured in the same format as the input
+output [
+  {
+    name: "argmax_0.tmp_0"
+    data_type: TYPE_INT32
+    dims: [ -1, -1, -1 ]
+  }
+]
+
+# Number of instances of the model
+instance_group [
+  {
+    # The number of instances is 1
+    count: 1
+    # Use GPU, CPU inference option is:KIND_CPU
+    kind: KIND_GPU
+    # The instance is deployed on the 0th GPU card
+    gpus: [0]
+  }
+]
+
+optimization {
+  execution_accelerators {
+  gpu_execution_accelerator : [ {
+    # use TRT engine
+    name: "paddle",
+    #name: "tensorrt",
+    # use fp16 on TRT engine
+    parameters { key: "precision" value: "trt_fp32" }
+  },
+  {
+    name: "min_shape"
+    parameters { key: "x" value: "1 3 256 256" }
+  },
+  {
+    name: "opt_shape"
+    parameters { key: "x" value: "1 3 1024 1024" }
+  },
+  {
+    name: "max_shape"
+    parameters { key: "x" value: "16 3 2048 2048" }
+  }
+  ]
+}}

From 1a3d0f86217feebd6d006b6091e44638ad7df9c0 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Wed, 18 Jan 2023 13:23:50 +0000
Subject: [PATCH 09/30] Add PaddleSeg simple serving example

---
 .../paddleseg/python/serving/README.md        | 36 ++++++++++++++++++
 .../paddleseg/python/serving/README_CN.md     | 36 ++++++++++++++++++
 .../paddleseg/python/serving/client.py        | 23 +++++++++++
 .../paddleseg/python/serving/server.py        | 38 +++++++++++++++++++
 4 files changed, 133 insertions(+)
 create mode 100644 examples/vision/segmentation/paddleseg/python/serving/README.md
 create mode 100644 examples/vision/segmentation/paddleseg/python/serving/README_CN.md
 create mode 100644 examples/vision/segmentation/paddleseg/python/serving/client.py
 create mode 100644 examples/vision/segmentation/paddleseg/python/serving/server.py

diff --git a/examples/vision/segmentation/paddleseg/python/serving/README.md b/examples/vision/segmentation/paddleseg/python/serving/README.md
new file mode 100644
index 000000000..da41a3a00
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/python/serving/README.md
@@ -0,0 +1,36 @@
+English | [简体中文](README_CN.md)
+
+# PaddleSegmentation Python Simple Serving Demo
+
+
+## Environment
+
+- 1. Prepare environment and install FastDeploy Python whl, refer to [download_prebuilt_libraries](../../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
+
+Server:
+```bash
+# Download demo code
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/segmentation/paddleseg/python/serving
+
+# Download PP_LiteSeg model
+wget  https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
+tar -xvf PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
+
+# Launch server, change the configurations in server.py to select hardware, backend, etc.
+# and use --host, --port to specify IP and port
+fastdeploy simple_serving --app server:app
+```
+
+Client:
+```bash
+# Download demo code
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/segmentation/paddleseg/python/serving
+
+# Download test image
+wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
+
+# Send request and get inference result (Please adapt the IP and port if necessary)
+python client.py
+```
diff --git a/examples/vision/segmentation/paddleseg/python/serving/README_CN.md b/examples/vision/segmentation/paddleseg/python/serving/README_CN.md
new file mode 100644
index 000000000..3f382c904
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/python/serving/README_CN.md
@@ -0,0 +1,36 @@
+简体中文 | [English](README.md)
+
+# PaddleSegmentation Python轻量服务化部署示例
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. FastDeploy Python whl包安装，参考[FastDeploy Python安装](../../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+服务端：
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/segmentation/paddleseg/python/serving
+
+# 下载PP_LiteSeg模型文件
+wget  https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
+tar -xvf PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
+
+# 启动服务，可修改server.py中的配置项来指定硬件、后端等
+# 可通过--host、--port指定IP和端口号
+fastdeploy simple_serving --app server:app
+```
+
+客户端：
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/detection/paddledetection/python/serving
+
+# 下载测试图片
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+
+# 请求服务，获取推理结果（如有必要，请修改脚本中的IP和端口号）
+python client.py
+```
diff --git a/examples/vision/segmentation/paddleseg/python/serving/client.py b/examples/vision/segmentation/paddleseg/python/serving/client.py
new file mode 100644
index 000000000..e652c4462
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/python/serving/client.py
@@ -0,0 +1,23 @@
+import requests
+import json
+import cv2
+import fastdeploy as fd
+from fastdeploy.serving.utils import cv2_to_base64
+
+if __name__ == '__main__':
+    url = "http://127.0.0.1:8000/fd/ppliteseg"
+    headers = {"Content-Type": "application/json"}
+
+    im = cv2.imread("cityscapes_demo.png")
+    data = {"data": {"image": cv2_to_base64(im)}, "parameters": {}}
+
+    resp = requests.post(url=url, headers=headers, data=json.dumps(data))
+    if resp.status_code == 200:
+        r_json = json.loads(resp.json()["result"])
+        result = fd.vision.utils.json_to_segmentation(r_json)
+        vis_im = fd.vision.vis_segmentation(im, result, weight=0.5)
+        cv2.imwrite("visualized_result.jpg", vis_im)
+        print("Visualized result save in ./visualized_result.jpg")
+    else:
+        print("Error code:", resp.status_code)
+        print(resp.text)
diff --git a/examples/vision/segmentation/paddleseg/python/serving/server.py b/examples/vision/segmentation/paddleseg/python/serving/server.py
new file mode 100644
index 000000000..2ae2df09c
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/python/serving/server.py
@@ -0,0 +1,38 @@
+import fastdeploy as fd
+from fastdeploy.serving.server import SimpleServer
+import os
+import logging
+
+logging.getLogger().setLevel(logging.INFO)
+
+# Configurations
+model_dir = 'PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer'
+device = 'cpu'
+use_trt = False
+
+# Prepare model
+model_file = os.path.join(model_dir, "model.pdmodel")
+params_file = os.path.join(model_dir, "model.pdiparams")
+config_file = os.path.join(model_dir, "deploy.yaml")
+
+# Setup runtime option to select hardware, backend, etc.
+option = fd.RuntimeOption()
+if device.lower() == 'gpu':
+    option.use_gpu()
+if use_trt:
+    option.use_trt_backend()
+    option.set_trt_cache_file('pp_lite_seg.trt')
+
+# Create model instance
+model_instance = fd.vision.segmentation.PaddleSegModel(
+    model_file=model_file,
+    params_file=params_file,
+    config_file=config_file,
+    runtime_option=option)
+
+# Create server, setup REST API
+app = SimpleServer()
+app.register(
+    task_name="fd/ppliteseg",
+    model_handler=fd.serving.handler.VisionModelHandler,
+    predictor=model_instance)

From 03fd1aa5cff4cb8adaae1c8a29aed404a38f2283 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Wed, 18 Jan 2023 13:25:00 +0000
Subject: [PATCH 10/30] Add PaddleSeg triton serving client code

---
 .../serving/paddleseg_grpc_client.py          | 116 ++++++++++++++++++
 1 file changed, 116 insertions(+)
 create mode 100644 examples/vision/segmentation/paddleseg/serving/paddleseg_grpc_client.py

diff --git a/examples/vision/segmentation/paddleseg/serving/paddleseg_grpc_client.py b/examples/vision/segmentation/paddleseg/serving/paddleseg_grpc_client.py
new file mode 100644
index 000000000..1fe3828db
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/serving/paddleseg_grpc_client.py
@@ -0,0 +1,116 @@
+import logging
+import numpy as np
+import time
+from typing import Optional
+import cv2
+import json
+
+from tritonclient import utils as client_utils
+from tritonclient.grpc import InferenceServerClient, InferInput, InferRequestedOutput, service_pb2_grpc, service_pb2
+
+LOGGER = logging.getLogger("run_inference_on_triton")
+
+
+class SyncGRPCTritonRunner:
+    DEFAULT_MAX_RESP_WAIT_S = 120
+
+    def __init__(
+            self,
+            server_url: str,
+            model_name: str,
+            model_version: str,
+            *,
+            verbose=False,
+            resp_wait_s: Optional[float]=None, ):
+        self._server_url = server_url
+        self._model_name = model_name
+        self._model_version = model_version
+        self._verbose = verbose
+        self._response_wait_t = self.DEFAULT_MAX_RESP_WAIT_S if resp_wait_s is None else resp_wait_s
+
+        self._client = InferenceServerClient(
+            self._server_url, verbose=self._verbose)
+        error = self._verify_triton_state(self._client)
+        if error:
+            raise RuntimeError(
+                f"Could not communicate to Triton Server: {error}")
+
+        LOGGER.debug(
+            f"Triton server {self._server_url} and model {self._model_name}:{self._model_version} "
+            f"are up and ready!")
+
+        model_config = self._client.get_model_config(self._model_name,
+                                                     self._model_version)
+        model_metadata = self._client.get_model_metadata(self._model_name,
+                                                         self._model_version)
+        LOGGER.info(f"Model config {model_config}")
+        LOGGER.info(f"Model metadata {model_metadata}")
+
+        for tm in model_metadata.inputs:
+            print("tm:", tm)
+        self._inputs = {tm.name: tm for tm in model_metadata.inputs}
+        self._input_names = list(self._inputs)
+        self._outputs = {tm.name: tm for tm in model_metadata.outputs}
+        self._output_names = list(self._outputs)
+        self._outputs_req = [
+            InferRequestedOutput(name) for name in self._outputs
+        ]
+
+    def Run(self, inputs):
+        """
+        Args:
+            inputs: list, Each value corresponds to an input name of self._input_names
+        Returns:
+            results: dict, {name : numpy.array}
+        """
+        infer_inputs = []
+        for idx, data in enumerate(inputs):
+            infer_input = InferInput(self._input_names[idx], data.shape,
+                                     "UINT8")
+            infer_input.set_data_from_numpy(data)
+            infer_inputs.append(infer_input)
+
+        results = self._client.infer(
+            model_name=self._model_name,
+            model_version=self._model_version,
+            inputs=infer_inputs,
+            outputs=self._outputs_req,
+            client_timeout=self._response_wait_t, )
+        results = {name: results.as_numpy(name) for name in self._output_names}
+        return results
+
+    def _verify_triton_state(self, triton_client):
+        if not triton_client.is_server_live():
+            return f"Triton server {self._server_url} is not live"
+        elif not triton_client.is_server_ready():
+            return f"Triton server {self._server_url} is not ready"
+        elif not triton_client.is_model_ready(self._model_name,
+                                              self._model_version):
+            return f"Model {self._model_name}:{self._model_version} is not ready"
+        return None
+
+
+if __name__ == "__main__":
+    model_name = "paddleseg"
+    model_version = "1"
+    url = "localhost:8001"
+    runner = SyncGRPCTritonRunner(url, model_name, model_version)
+    im = cv2.imread("cityscapes_demo.png")
+    im = np.array([im, ])
+    # batch input
+    # im = np.array([im, im, im])
+    for i in range(1):
+        result = runner.Run([im, ])
+        for name, values in result.items():
+            print("output_name:", name)
+            # values is batch
+            for value in values:
+                value = json.loads(value)
+                #print(value)
+                import fastdeploy as fd
+                result = fd.C.vision.SegmentationResult()
+                result.label_map = value["label_map"]
+                result.shape = value["shape"]
+                im = cv2.imread("cityscapes_demo.png")
+                vis_im = fd.vision.vis_segmentation(im, result, 0.5)
+                cv2.imwrite("seg.png", vis_im)

From 9d30be35a49628c36bf601c0e09ca31edbac5d69 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Thu, 19 Jan 2023 03:37:14 +0000
Subject: [PATCH 11/30] Update triton serving runtime config.pbtxt

---
 .../paddleseg/serving/models/runtime/config.pbtxt    | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/vision/segmentation/paddleseg/serving/models/runtime/config.pbtxt b/examples/vision/segmentation/paddleseg/serving/models/runtime/config.pbtxt
index bd145c590..875086e2b 100644
--- a/examples/vision/segmentation/paddleseg/serving/models/runtime/config.pbtxt
+++ b/examples/vision/segmentation/paddleseg/serving/models/runtime/config.pbtxt
@@ -1,6 +1,7 @@
 # optional, If name is specified it must match the name of the model repository directory containing the model.
 name: "runtime"
 backend: "fastdeploy"
+max_batch_size: 1
 
 # Input configuration of the model
 input [
@@ -10,7 +11,7 @@ input [
     # input type such as TYPE_FP32、TYPE_UINT8、TYPE_INT8、TYPE_INT16、TYPE_INT32、TYPE_INT64、TYPE_FP16、TYPE_STRING
     data_type: TYPE_FP32
     # input shape
-    dims: [-1, 3, -1, -1 ]
+    dims: [3, -1, -1 ]
   }
 ]
 
@@ -19,7 +20,7 @@ output [
   {
     name: "argmax_0.tmp_0"
     data_type: TYPE_INT32
-    dims: [ -1, -1, -1 ]
+    dims: [ -1, -1 ]
   }
 ]
 
@@ -39,9 +40,8 @@ optimization {
   execution_accelerators {
   gpu_execution_accelerator : [ {
     # use TRT engine
-    name: "paddle",
-    #name: "tensorrt",
-    # use fp16 on TRT engine
+    name: "tensorrt",
+    # use fp32 on TRT engine
     parameters { key: "precision" value: "trt_fp32" }
   },
   {
@@ -54,7 +54,7 @@ optimization {
   },
   {
     name: "max_shape"
-    parameters { key: "x" value: "16 3 2048 2048" }
+    parameters { key: "x" value: "1 3 2048 2048" }
   }
   ]
 }}

From 7d145b5be386b98a4c5b709b6b57d89ca8a37bd5 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Thu, 19 Jan 2023 06:26:04 +0000
Subject: [PATCH 12/30] Update paddleseg grpc client

---
 .../paddleseg/serving/paddleseg_grpc_client.py       | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/examples/vision/segmentation/paddleseg/serving/paddleseg_grpc_client.py b/examples/vision/segmentation/paddleseg/serving/paddleseg_grpc_client.py
index 1fe3828db..f200dec25 100644
--- a/examples/vision/segmentation/paddleseg/serving/paddleseg_grpc_client.py
+++ b/examples/vision/segmentation/paddleseg/serving/paddleseg_grpc_client.py
@@ -106,11 +106,7 @@ if __name__ == "__main__":
             # values is batch
             for value in values:
                 value = json.loads(value)
-                #print(value)
-                import fastdeploy as fd
-                result = fd.C.vision.SegmentationResult()
-                result.label_map = value["label_map"]
-                result.shape = value["shape"]
-                im = cv2.imread("cityscapes_demo.png")
-                vis_im = fd.vision.vis_segmentation(im, result, 0.5)
-                cv2.imwrite("seg.png", vis_im)
+                print(
+                    "Only print the first 20 labels in label_map of SEG_RESULT")
+                value["label_map"] = value["label_map"][:20]
+                print(value)

From 175c6b0f39b4eeb4e63082bff7c6ed7bce2e0624 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Thu, 19 Jan 2023 06:27:07 +0000
Subject: [PATCH 13/30] Add paddle serving README

---
 .../segmentation/paddleseg/serving/README.md  | 62 +++++++++++++++++
 .../paddleseg/serving/README_CN.md            | 68 +++++++++++++++++++
 2 files changed, 130 insertions(+)
 create mode 100644 examples/vision/segmentation/paddleseg/serving/README.md
 create mode 100644 examples/vision/segmentation/paddleseg/serving/README_CN.md

diff --git a/examples/vision/segmentation/paddleseg/serving/README.md b/examples/vision/segmentation/paddleseg/serving/README.md
new file mode 100644
index 000000000..a451e8730
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/serving/README.md
@@ -0,0 +1,62 @@
+English | [简体中文](README_CN.md)
+# PaddleSegmentation Serving Deployment Demo
+
+## Launch Serving
+
+```bash
+# Download demo code
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/segmentation/paddleseg/serving
+
+#Download PP_LiteSeg model file
+wget  https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
+tar -xvf PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
+
+# Move the model files to models/infer/1
+mv yolov5s.onnx models/infer/1/
+
+# Pull fastdeploy image, x.y.z is FastDeploy version, example 1.0.2.
+docker pull paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10
+
+# Run the docker. The docker name is fd_serving, and the current directory is mounted as the docker's /serving directory
+nvidia-docker run -it --net=host --name fd_serving -v `pwd`/:/serving paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10  bash
+
+# Start the service (Without setting the CUDA_VISIBLE_DEVICES environment variable, it will have scheduling privileges for all GPU cards)
+CUDA_VISIBLE_DEVICES=0 fastdeployserver --model-repository=/serving/models --backend-config=python,shm-default-byte-size=10485760
+```
+
+Output the following contents if serving is launched
+
+```
+......
+I0928 04:51:15.784517 206 grpc_server.cc:4117] Started GRPCInferenceService at 0.0.0.0:8001
+I0928 04:51:15.785177 206 http_server.cc:2815] Started HTTPService at 0.0.0.0:8000
+I0928 04:51:15.826578 206 http_server.cc:167] Started Metrics Service at 0.0.0.0:8002
+```
+
+## Client Requests
+
+Execute the following command in the physical machine to send a grpc request and output the result
+
+```
+#Download test images
+wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
+
+#Installing client-side dependencies
+python3 -m pip install tritonclient\[all\]
+
+# Send requests
+python3 paddleseg_grpc_client.py
+```
+
+When the request is sent successfully, the results are returned in json format and printed out:
+
+```
+
+```
+
+## Modify Configs
+
+
+
+The default is to run ONNXRuntime on CPU. If developers need to run it on GPU or other inference engines, please see the  [Configs File](../../../../../serving/docs/EN/model_configuration-en.md) to modify the configs in `models/runtime/config.pbtxt`.
diff --git a/examples/vision/segmentation/paddleseg/serving/README_CN.md b/examples/vision/segmentation/paddleseg/serving/README_CN.md
new file mode 100644
index 000000000..676272f2e
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/serving/README_CN.md
@@ -0,0 +1,68 @@
+[English](README.md) | 简体中文
+# PaddleSegmentation 服务化部署示例
+
+在服务化部署前，需确认
+
+- 1. 服务化镜像的软硬件环境要求和镜像拉取命令请参考[FastDeploy服务化部署](../../../../../serving/README_CN.md)
+
+
+## 启动服务
+
+```bash
+#下载部署示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/segmentation/paddleseg/serving
+
+#下载yolov5模型文件
+wget  https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
+tar -xvf PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
+
+# 将模型文件放入 models/runtime/1目录下
+mv PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer/model.pdmodel models/runtime/1/
+mv PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer/model.pdiparams models/runtime/1/
+
+# 拉取fastdeploy镜像(x.y.z为镜像版本号，需参照serving文档替换为数字)
+# GPU镜像
+docker pull registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10
+# CPU镜像
+docker pull registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-cpu-only-21.10
+
+# 运行容器.容器名字为 fd_serving, 并挂载当前目录为容器的 /serving 目录
+nvidia-docker run -it --net=host --name fd_serving -v `pwd`/:/serving registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10  bash
+
+# 启动服务(不设置CUDA_VISIBLE_DEVICES环境变量，会拥有所有GPU卡的调度权限)
+CUDA_VISIBLE_DEVICES=0 fastdeployserver --model-repository=/serving/models --backend-config=python,shm-default-byte-size=10485760
+```
+>> **注意**: 当出现"Address already in use", 请使用`--grpc-port`指定端口号来启动服务，同时更改paddleseg_grpc_client.py中的请求端口号
+
+服务启动成功后， 会有以下输出:
+```
+......
+I0928 04:51:15.784517 206 grpc_server.cc:4117] Started GRPCInferenceService at 0.0.0.0:8001
+I0928 04:51:15.785177 206 http_server.cc:2815] Started HTTPService at 0.0.0.0:8000
+I0928 04:51:15.826578 206 http_server.cc:167] Started Metrics Service at 0.0.0.0:8002
+```
+
+
+## 客户端请求
+
+在物理机器中执行以下命令，发送grpc请求并输出结果
+```
+#下载测试图片
+wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
+
+#安装客户端依赖
+python3 -m pip install tritonclient[all]
+
+# 发送请求
+python3 paddleseg_grpc_client.py
+```
+
+发送请求成功后，会返回json格式的检测结果并打印输出:
+```
+
+```
+
+## 配置修改
+
+当前默认配置在CPU上运行ONNXRuntime引擎， 如果要在GPU或其他推理引擎上运行。 需要修改`models/runtime/config.pbtxt`中配置，详情请参考[配置文档](../../../../../serving/docs/zh_CN/model_configuration.md)

From 4b293a89de7de52f40ad1f98e2819488f69d051f Mon Sep 17 00:00:00 2001
From: wwbitejotunn <wang_bojun@outlook.com>
Date: Thu, 9 Feb 2023 05:51:30 +0000
Subject: [PATCH 14/30] fix paddle backend

---
 fastdeploy/runtime/backends/paddle/paddle_backend.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fastdeploy/runtime/backends/paddle/paddle_backend.cc b/fastdeploy/runtime/backends/paddle/paddle_backend.cc
index e0e908c36..19493f90b 100644
--- a/fastdeploy/runtime/backends/paddle/paddle_backend.cc
+++ b/fastdeploy/runtime/backends/paddle/paddle_backend.cc
@@ -44,6 +44,7 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
                "file will save to the directory where paddle model saved."
             << std::endl;
         use_static = true;
+        config_.SetOptimCacheDir(option.trt_option.serialize_file);
       }
       config_.EnableTensorRtEngine(option.trt_option.max_workspace_size,
                                    option.trt_option.max_batch_size, 3,

From 59c5fedc36d7e3dc5538fb385d1201a0aa184df6 Mon Sep 17 00:00:00 2001
From: wwbitejotunn <wang_bojun@outlook.com>
Date: Thu, 9 Feb 2023 12:53:08 +0000
Subject: [PATCH 15/30] set stream by raw size_t

---
 fastdeploy/runtime/option_pybind.cc  | 1 +
 fastdeploy/runtime/runtime_option.cc | 4 ++++
 fastdeploy/runtime/runtime_option.h  | 2 ++
 3 files changed, 7 insertions(+)

diff --git a/fastdeploy/runtime/option_pybind.cc b/fastdeploy/runtime/option_pybind.cc
index 1c786459b..d5a6187c8 100644
--- a/fastdeploy/runtime/option_pybind.cc
+++ b/fastdeploy/runtime/option_pybind.cc
@@ -48,6 +48,7 @@ void BindOption(pybind11::module& m) {
       .def_readwrite("poros_option", &RuntimeOption::poros_option)
       .def_readwrite("paddle_infer_option", &RuntimeOption::paddle_infer_option)
       .def("set_external_stream", &RuntimeOption::SetExternalStream)
+      .def("set_external_raw_stream", &RuntimeOption::SetExternalRawStream)
       .def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum)
       .def("use_paddle_backend", &RuntimeOption::UsePaddleBackend)
       .def("use_poros_backend", &RuntimeOption::UsePorosBackend)
diff --git a/fastdeploy/runtime/runtime_option.cc b/fastdeploy/runtime/runtime_option.cc
index 7538f3ea6..97b7acea7 100644
--- a/fastdeploy/runtime/runtime_option.cc
+++ b/fastdeploy/runtime/runtime_option.cc
@@ -93,6 +93,10 @@ void RuntimeOption::SetExternalStream(void* external_stream) {
   external_stream_ = external_stream;
 }
 
+void RuntimeOption::SetExternalRawStream(size_t external_stream) {
+  external_stream_ = (void*) external_stream;
+}
+
 void RuntimeOption::SetCpuThreadNum(int thread_num) {
   FDASSERT(thread_num > 0, "The thread_num must be greater than 0.");
   cpu_thread_num = thread_num;
diff --git a/fastdeploy/runtime/runtime_option.h b/fastdeploy/runtime/runtime_option.h
index ecb51fe2a..6f21a2860 100644
--- a/fastdeploy/runtime/runtime_option.h
+++ b/fastdeploy/runtime/runtime_option.h
@@ -111,6 +111,8 @@ struct FASTDEPLOY_DECL RuntimeOption {
 
   void SetExternalStream(void* external_stream);
 
+  void SetExternalRawStream(size_t external_stream);
+
   /*
    * @brief Set number of cpu threads while inference on CPU, by default it will decided by the different backends
    */

From 942cee83d7d0c94ed0598bb040e9fe7ec3e7f873 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Thu, 9 Feb 2023 14:04:10 +0000
Subject: [PATCH 16/30] Add decrypt function to load encrypted model

---
 fastdeploy/runtime/option_pybind.cc  |  1 +
 fastdeploy/runtime/runtime.cc        | 26 +++++++++++++++++++++++++-
 fastdeploy/runtime/runtime.h         |  3 +++
 fastdeploy/runtime/runtime_option.cc |  9 +++++++++
 python/fastdeploy/runtime.py         |  7 +++++++
 5 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/fastdeploy/runtime/option_pybind.cc b/fastdeploy/runtime/option_pybind.cc
index 1c786459b..7af90d831 100644
--- a/fastdeploy/runtime/option_pybind.cc
+++ b/fastdeploy/runtime/option_pybind.cc
@@ -35,6 +35,7 @@ void BindOption(pybind11::module& m) {
       .def(pybind11::init())
       .def("set_model_path", &RuntimeOption::SetModelPath)
       .def("set_model_buffer", &RuntimeOption::SetModelBuffer)
+      .def("set_encryption_key", &RuntimeOption::SetEncryptionKey)
       .def("use_gpu", &RuntimeOption::UseGpu)
       .def("use_cpu", &RuntimeOption::UseCpu)
       .def("use_rknpu2", &RuntimeOption::UseRKNPU2)
diff --git a/fastdeploy/runtime/runtime.cc b/fastdeploy/runtime/runtime.cc
index 70714e4f0..2a00dfda4 100644
--- a/fastdeploy/runtime/runtime.cc
+++ b/fastdeploy/runtime/runtime.cc
@@ -104,7 +104,31 @@ bool AutoSelectBackend(RuntimeOption& option) {
 
 bool Runtime::Init(const RuntimeOption& _option) {
   option = _option;
-
+  if ("" != option.encryption_key_) {
+     #ifdef ENABLE_ENCRYPTION
+      if (option.model_from_memory_) {
+        option.model_file = Decrypt(option.model_file, option.encryption_key_);
+        if (!(option.params_file.empty())) {
+          option.params_file = Decrypt(option.params_file, option.encryption_key_);
+        }
+      } else {
+        std::string model_buffer = "";
+        FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
+                "Fail to read binary from model file");
+        option.model_file = Decrypt(model_buffer, option.encryption_key_);
+        if (!(option.params_file.empty())) {
+          std::string params_buffer = "";
+          FDASSERT(ReadBinaryFromFile(option.params_file, &params_buffer),
+                  "Fail to read binary from parameter file");
+          option.params_file = Decrypt(params_buffer, option.encryption_key_);
+        }
+        option.model_from_memory_ = true;
+      }
+    #else
+      FDERROR << "The FastDeploy didn't compile with encryption function."
+              << std::endl;
+    #endif
+  }
   // Choose default backend by model format and device if backend is not
   // specified
   if (option.backend == Backend::UNKNOWN) {
diff --git a/fastdeploy/runtime/runtime.h b/fastdeploy/runtime/runtime.h
index 6e7dc9629..772773007 100755
--- a/fastdeploy/runtime/runtime.h
+++ b/fastdeploy/runtime/runtime.h
@@ -23,6 +23,9 @@
 #include "fastdeploy/core/fd_tensor.h"
 #include "fastdeploy/runtime/runtime_option.h"
 #include "fastdeploy/utils/perf.h"
+#ifdef ENABLE_ENCRYPTION
+  #include "fastdeploy/encryption/include/decrypt.h"
+#endif
 
 /** \brief All C++ FastDeploy APIs are defined inside this namespace
 *
diff --git a/fastdeploy/runtime/runtime_option.cc b/fastdeploy/runtime/runtime_option.cc
index c09352d58..8568b3b7f 100644
--- a/fastdeploy/runtime/runtime_option.cc
+++ b/fastdeploy/runtime/runtime_option.cc
@@ -36,6 +36,15 @@ void RuntimeOption::SetModelBuffer(const std::string& model_buffer,
   model_from_memory_ = true;
 }
 
+void RuntimeOption::SetEncryptionKey(const std::string& encryption_key) {
+  #ifdef ENABLE_ENCRYPTION
+    encryption_key_ = encryption_key;
+  #else
+    FDERROR << "The FastDeploy didn't compile with encryption function."
+            << std::endl;
+  #endif
+}
+
 void RuntimeOption::UseGpu(int gpu_id) {
 #ifdef WITH_GPU
   device = Device::GPU;
diff --git a/python/fastdeploy/runtime.py b/python/fastdeploy/runtime.py
index cd7b6641b..1d2fc1c1d 100644
--- a/python/fastdeploy/runtime.py
+++ b/python/fastdeploy/runtime.py
@@ -187,6 +187,13 @@ class RuntimeOption:
         return self._option.set_model_buffer(model_buffer, params_buffer,
                                              model_format)
 
+    def set_encryption_key(self,
+                           encryption_key):
+        """When loading encrypted model, encryption_key is required to decrypte model
+        :param encryption_key: (str)The key for decrypting model
+        """
+        return self._option.set_encryption_key(encryption_key)
+
     def use_gpu(self, device_id=0):
         """Inference with Nvidia GPU
 

From 5160771a1c95035fedc608fb4ab4325f82459f05 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Fri, 10 Feb 2023 03:24:45 +0000
Subject: [PATCH 17/30] Update runtime_option.h

---
 fastdeploy/runtime/runtime_option.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fastdeploy/runtime/runtime_option.h b/fastdeploy/runtime/runtime_option.h
index 0aa6bbec8..b263f4ad3 100644
--- a/fastdeploy/runtime/runtime_option.h
+++ b/fastdeploy/runtime/runtime_option.h
@@ -59,6 +59,12 @@ struct FASTDEPLOY_DECL RuntimeOption {
                       const std::string& params_buffer = "",
                       const ModelFormat& format = ModelFormat::PADDLE);
 
+  /** \brief When loading encrypted model, encryption_key is required to decrypte model
+   *
+   * \param[in] encryption_key The key for decrypting model
+   */
+  void SetEncryptionKey(const std::string& encryption_key);
+
   /// Use cpu to inference, the runtime will inference on CPU by default
   void UseCpu();
   /// Use Nvidia GPU to inference
@@ -178,6 +184,8 @@ struct FASTDEPLOY_DECL RuntimeOption {
   /// format of input model
   ModelFormat model_format = ModelFormat::PADDLE;
 
+  std::string encryption_key_ = "";
+
   // for cpu inference
   // default will let the backend choose their own default value
   int cpu_thread_num = -1;

From 9d8b08415570d7cbb14f08c1025a52d0276d974f Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Fri, 10 Feb 2023 05:06:20 +0000
Subject: [PATCH 18/30] Update tutorials for encryption model

---
 tutorials/encrypt_model/README.md    | 46 ++++++++++++++++++++++++++
 tutorials/encrypt_model/README_CN.md | 48 ++++++++++++++++++++++++++++
 tutorials/encrypt_model/encrypt.py   | 33 +++++++++++++++++++
 3 files changed, 127 insertions(+)
 create mode 100644 tutorials/encrypt_model/README.md
 create mode 100644 tutorials/encrypt_model/README_CN.md
 create mode 100644 tutorials/encrypt_model/encrypt.py

diff --git a/tutorials/encrypt_model/README.md b/tutorials/encrypt_model/README.md
new file mode 100644
index 000000000..8a49c107c
--- /dev/null
+++ b/tutorials/encrypt_model/README.md
@@ -0,0 +1,46 @@
+English | [中文](README_CN.md)
+
+# FastDeploy generates an encrypted model
+
+This directory provides `encrypt.py` to quickly complete the encryption of the model and parameter files of ResNet50_vd
+
+## encryption
+```bash
+# Download deployment example code 
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd  FastDeploy/tutorials/encrypt_model
+
+# Download the ResNet50_vd model file
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ResNet50_vd_infer.tgz
+tar -xvf ResNet50_vd_infer.tgz
+
+python encrypt.py --model ResNet50_vd_infer
+```
+>> **Note** After the encryption is completed, the ResNet50_vd_infer_encrypt folder will be generated, including `__model__.encrypted`, `__params__.encrypted`, `encryption_key.txt` three files, where `encryption_key.txt` contains the encrypted key. At the same time, you need to copy the `inference_cls.yaml` configuration file in the original folder to the ResNet50_vd_infer_encrypt folder for subsequent deployment
+
+### Python encryption interface
+
+Use the encrypted interface through the following interface settings
+```python
+import fastdeploy as fd
+import os
+# when key is not given, key will be automatically generated.
+# otherwise, the file will be encrypted by specific key
+encrypted_model, key = fd.encryption.encrypt(model_file.read())
+encrypted_params, key= fd.encryption.encrypt(params_file.read(), key)
+```
+
+### FastDeploy deployment encryption model (decryption)
+
+Through the setting of the following interface, FastDeploy can deploy the encryption model
+```python
+import fastdeploy as fd
+option = fd.RuntimeOption()
+option.set_encryption_key(key)
+```
+
+```C++
+fastdeploy::RuntimeOption option;
+option.SetEncryptionKey(key)
+```
+>> **Note** For more details about RuntimeOption, please refer to [RuntimeOption Python Documentation](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/runtime_option.html), [ RuntimeOption C++ Documentation](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/structfastdeploy_1_1RuntimeOption.html)
\ No newline at end of file
diff --git a/tutorials/encrypt_model/README_CN.md b/tutorials/encrypt_model/README_CN.md
new file mode 100644
index 000000000..8230f68e6
--- /dev/null
+++ b/tutorials/encrypt_model/README_CN.md
@@ -0,0 +1,48 @@
+[English](README.md) | 中文
+
+# 使用FastDeploy生成加密模型
+
+本目录下提供`encrypt.py`快速完成ResNet50_vd的模型和参数文件加密
+
+FastDeploy支持对称加密的方案，通过调用OpenSSL中的对称加密算法（AES）对模型进行加密并产生密钥
+
+## 加密
+```bash
+#下载加密示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd  FastDeploy/tutorials/encrypt_model
+
+# 下载ResNet50_vd模型文件
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ResNet50_vd_infer.tgz
+tar -xvf ResNet50_vd_infer.tgz
+
+python encrypt.py --model ResNet50_vd_infer
+```
+>> **注意** 加密完成后会生成ResNet50_vd_infer_encrypt文件夹，包含`__model__.encrypted`,`__params__.encrypted`,`encryption_key.txt`三个文件，其中`encryption_key.txt`包含加密后的秘钥，同时需要将原文件夹中的、`inference_cls.yaml`配置文件 拷贝至ResNet50_vd_infer_encrypt文件夹，以便后续部署使用
+
+### Python加密接口
+
+通过如下接口的设定，使用加密接口（解密）
+```python
+import fastdeploy as fd
+import os
+# when key is not given, key will be automatically generated.
+# otherwise, the file will be encrypted by specific key
+encrypted_model, key = fd.encryption.encrypt(model_file.read())
+encrypted_params, key= fd.encryption.encrypt(params_file.read(), key)
+```
+
+### FastDeploy 部署加密模型
+
+通过如下接口的设定，完成加密模型的推理
+```python
+import fastdeploy as fd
+option = fd.RuntimeOption()
+option.set_encryption_key(key)
+```
+
+```C++
+fastdeploy::RuntimeOption option;
+option.SetEncryptionKey(key)
+```
+>> **注意** RuntimeOption的更多详细信息，请参考[RuntimeOption Python文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/runtime_option.html)，[RuntimeOption C++文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/structfastdeploy_1_1RuntimeOption.html)
\ No newline at end of file
diff --git a/tutorials/encrypt_model/encrypt.py b/tutorials/encrypt_model/encrypt.py
new file mode 100644
index 000000000..380509042
--- /dev/null
+++ b/tutorials/encrypt_model/encrypt.py
@@ -0,0 +1,33 @@
+import fastdeploy as fd
+import os
+
+def parse_arguments():
+    import argparse
+    import ast
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model", required=True, help="Path of model directory.")
+    return parser.parse_args()
+
+if __name__ == "__main__":
+    args = parse_arguments()
+    model_file = os.path.join(args.model, "inference.pdmodel")
+    params_file = os.path.join(args.model, "inference.pdiparams")
+    config_file = os.path.join(args.model, "inference_cls.yaml")
+    model_buffer = open(model_file, 'rb')
+    params_buffer = open(params_file, 'rb')
+    encrypted_model, key = fd.encryption.encrypt(model_buffer.read())
+    encrypted_params, key= fd.encryption.encrypt(params_buffer.read(), key)
+    encrypted_model_dir = args.model + "_encrypt"
+    model_buffer.close()
+    params_buffer.close()
+    os.mkdir(encrypted_model_dir)
+    with open(os.path.join(encrypted_model_dir, "__model__.encrypted"), "w") as f:
+        f.write(encrypted_model)
+
+    with open(os.path.join(encrypted_model_dir, "__params__.encrypted"), "w") as f:
+        f.write(encrypted_params)
+
+    with open(os.path.join(encrypted_model_dir, "encryption_key.txt"), "w") as f:
+        f.write(key)
+    print("encryption success")
\ No newline at end of file

From bf61f84d22177b8fce47036d0fa89723d0666189 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Mon, 13 Feb 2023 02:35:55 +0000
Subject: [PATCH 19/30] Add some args in encrypt.py

---
 tutorials/encrypt_model/README.md    |  6 ++---
 tutorials/encrypt_model/README_CN.md |  4 +--
 tutorials/encrypt_model/encrypt.py   | 38 +++++++++++++++++++---------
 3 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/tutorials/encrypt_model/README.md b/tutorials/encrypt_model/README.md
index 8a49c107c..755671686 100644
--- a/tutorials/encrypt_model/README.md
+++ b/tutorials/encrypt_model/README.md
@@ -6,7 +6,7 @@ This directory provides `encrypt.py` to quickly complete the encryption of the m
 
 ## encryption
 ```bash
-# Download deployment example code 
+# Download deployment example code
 git clone https://github.com/PaddlePaddle/FastDeploy.git
 cd  FastDeploy/tutorials/encrypt_model
 
@@ -14,7 +14,7 @@ cd  FastDeploy/tutorials/encrypt_model
 wget https://bj.bcebos.com/paddlehub/fastdeploy/ResNet50_vd_infer.tgz
 tar -xvf ResNet50_vd_infer.tgz
 
-python encrypt.py --model ResNet50_vd_infer
+python encrypt.py --model_file ResNet50_vd_infer/inference.pdmodel  --params_file ResNet50_vd_infer/inference.pdiparams --encrypted_model_dir ResNet50_vd_infer_encrypt
 ```
 >> **Note** After the encryption is completed, the ResNet50_vd_infer_encrypt folder will be generated, including `__model__.encrypted`, `__params__.encrypted`, `encryption_key.txt` three files, where `encryption_key.txt` contains the encrypted key. At the same time, you need to copy the `inference_cls.yaml` configuration file in the original folder to the ResNet50_vd_infer_encrypt folder for subsequent deployment
 
@@ -43,4 +43,4 @@ option.set_encryption_key(key)
 fastdeploy::RuntimeOption option;
 option.SetEncryptionKey(key)
 ```
->> **Note** For more details about RuntimeOption, please refer to [RuntimeOption Python Documentation](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/runtime_option.html), [ RuntimeOption C++ Documentation](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/structfastdeploy_1_1RuntimeOption.html)
\ No newline at end of file
+>> **Note** For more details about RuntimeOption, please refer to [RuntimeOption Python Documentation](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/runtime_option.html), [ RuntimeOption C++ Documentation](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/structfastdeploy_1_1RuntimeOption.html)
diff --git a/tutorials/encrypt_model/README_CN.md b/tutorials/encrypt_model/README_CN.md
index 8230f68e6..c2f80ffd4 100644
--- a/tutorials/encrypt_model/README_CN.md
+++ b/tutorials/encrypt_model/README_CN.md
@@ -16,7 +16,7 @@ cd  FastDeploy/tutorials/encrypt_model
 wget https://bj.bcebos.com/paddlehub/fastdeploy/ResNet50_vd_infer.tgz
 tar -xvf ResNet50_vd_infer.tgz
 
-python encrypt.py --model ResNet50_vd_infer
+python encrypt.py --model_file ResNet50_vd_infer/inference.pdmodel  --params_file ResNet50_vd_infer/inference.pdiparams --encrypted_model_dir ResNet50_vd_infer_encrypt
 ```
 >> **注意** 加密完成后会生成ResNet50_vd_infer_encrypt文件夹，包含`__model__.encrypted`,`__params__.encrypted`,`encryption_key.txt`三个文件，其中`encryption_key.txt`包含加密后的秘钥，同时需要将原文件夹中的、`inference_cls.yaml`配置文件 拷贝至ResNet50_vd_infer_encrypt文件夹，以便后续部署使用
 
@@ -45,4 +45,4 @@ option.set_encryption_key(key)
 fastdeploy::RuntimeOption option;
 option.SetEncryptionKey(key)
 ```
->> **注意** RuntimeOption的更多详细信息，请参考[RuntimeOption Python文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/runtime_option.html)，[RuntimeOption C++文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/structfastdeploy_1_1RuntimeOption.html)
\ No newline at end of file
+>> **注意** RuntimeOption的更多详细信息，请参考[RuntimeOption Python文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/runtime_option.html)，[RuntimeOption C++文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/structfastdeploy_1_1RuntimeOption.html)
diff --git a/tutorials/encrypt_model/encrypt.py b/tutorials/encrypt_model/encrypt.py
index 380509042..f4d80ed2f 100644
--- a/tutorials/encrypt_model/encrypt.py
+++ b/tutorials/encrypt_model/encrypt.py
@@ -1,33 +1,47 @@
 import fastdeploy as fd
 import os
 
+
 def parse_arguments():
     import argparse
     import ast
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        "--model", required=True, help="Path of model directory.")
+        "--encrypted_model_dir",
+        required=False,
+        help="Path of model directory.")
+    parser.add_argument(
+        "--model_file", required=True, help="Path of model file directory.")
+    parser.add_argument(
+        "--params_file",
+        required=True,
+        help="Path of parameters file directory.")
     return parser.parse_args()
 
+
 if __name__ == "__main__":
     args = parse_arguments()
-    model_file = os.path.join(args.model, "inference.pdmodel")
-    params_file = os.path.join(args.model, "inference.pdiparams")
-    config_file = os.path.join(args.model, "inference_cls.yaml")
-    model_buffer = open(model_file, 'rb')
-    params_buffer = open(params_file, 'rb')
+    model_buffer = open(args.model_file, 'rb')
+    params_buffer = open(args.params_file, 'rb')
     encrypted_model, key = fd.encryption.encrypt(model_buffer.read())
-    encrypted_params, key= fd.encryption.encrypt(params_buffer.read(), key)
-    encrypted_model_dir = args.model + "_encrypt"
+    # use the same key to encrypt parameter file
+    encrypted_params, key = fd.encryption.encrypt(params_buffer.read(), key)
+    encrypted_model_dir = "encrypt_model_dir"
+    if args.encrypted_model_dir:
+        encrypted_model_dir = args.encrypted_model_dir
     model_buffer.close()
     params_buffer.close()
     os.mkdir(encrypted_model_dir)
-    with open(os.path.join(encrypted_model_dir, "__model__.encrypted"), "w") as f:
+    with open(os.path.join(encrypted_model_dir, "__model__.encrypted"),
+              "w") as f:
         f.write(encrypted_model)
 
-    with open(os.path.join(encrypted_model_dir, "__params__.encrypted"), "w") as f:
+    with open(os.path.join(encrypted_model_dir, "__params__.encrypted"),
+              "w") as f:
         f.write(encrypted_params)
 
-    with open(os.path.join(encrypted_model_dir, "encryption_key.txt"), "w") as f:
+    with open(os.path.join(encrypted_model_dir, "encryption_key.txt"),
+              "w") as f:
         f.write(key)
-    print("encryption success")
\ No newline at end of file
+    print("encryption key: ", key)
+    print("encryption success")

From abfa9fd850025d4c94d45578f72f3aa51124f06e Mon Sep 17 00:00:00 2001
From: wwbitejotunn <wang_bojun@outlook.com>
Date: Mon, 13 Feb 2023 03:11:31 +0000
Subject: [PATCH 20/30] prebind output by shareExternalData

---
 fastdeploy/pybind/fd_tensor.cc                | 93 +++++++++++++------
 fastdeploy/pybind/runtime.cc                  |  1 +
 .../runtime/backends/paddle/paddle_backend.cc | 38 ++++++--
 .../runtime/backends/paddle/paddle_backend.h  |  5 +-
 fastdeploy/runtime/backends/paddle/util.cc    | 37 ++++++++
 fastdeploy/runtime/runtime.cc                 | 20 ++++
 fastdeploy/runtime/runtime.h                  |  8 +-
 python/fastdeploy/runtime.py                  | 11 ++-
 8 files changed, 174 insertions(+), 39 deletions(-)

diff --git a/fastdeploy/pybind/fd_tensor.cc b/fastdeploy/pybind/fd_tensor.cc
index 0f1d145b3..6e34019f5 100644
--- a/fastdeploy/pybind/fd_tensor.cc
+++ b/fastdeploy/pybind/fd_tensor.cc
@@ -15,9 +15,9 @@
 #include <dlpack/dlpack.h>
 
 #include "fastdeploy/core/fd_type.h"
-#include "fastdeploy/utils/utils.h"
 #include "fastdeploy/fastdeploy_model.h"
 #include "fastdeploy/pybind/main.h"
+#include "fastdeploy/utils/utils.h"
 
 namespace fastdeploy {
 
@@ -68,8 +68,8 @@ DLDataType FDToDlpackType(FDDataType fd_dtype) {
       break;
 
     default:
-      FDASSERT(false,
-              "Convert to DlPack, FDType \"%s\" is not supported.", Str(fd_dtype).c_str());
+      FDASSERT(false, "Convert to DlPack, FDType \"%s\" is not supported.",
+               Str(fd_dtype).c_str());
   }
 
   dl_dtype.code = dl_code;
@@ -77,10 +77,8 @@ DLDataType FDToDlpackType(FDDataType fd_dtype) {
   return dl_dtype;
 }
 
-FDDataType
-DlpackToFDType(const DLDataType& data_type) {
-  FDASSERT(data_type.lanes == 1,
-          "FDTensor does not support dlpack lanes != 1")
+FDDataType DlpackToFDType(const DLDataType& data_type) {
+  FDASSERT(data_type.lanes == 1, "FDTensor does not support dlpack lanes != 1")
 
   if (data_type.code == DLDataTypeCode::kDLFloat) {
     if (data_type.bits == 16) {
@@ -152,7 +150,7 @@ pybind11::capsule FDTensorToDLPack(FDTensor& fd_tensor) {
   dlpack_tensor->dl_tensor.dtype = FDToDlpackType(fd_tensor.dtype);
 
   dlpack_tensor->dl_tensor.device.device_id = fd_tensor.device_id;
-  if(fd_tensor.device == Device::GPU) {
+  if (fd_tensor.device == Device::GPU) {
     if (fd_tensor.is_pinned_memory) {
       dlpack_tensor->dl_tensor.device.device_type = DLDeviceType::kDLCUDAHost;
     } else {
@@ -162,8 +160,8 @@ pybind11::capsule FDTensorToDLPack(FDTensor& fd_tensor) {
     dlpack_tensor->dl_tensor.device.device_type = DLDeviceType::kDLCPU;
   }
 
-  return pybind11::capsule(
-      static_cast<void*>(dlpack_tensor), "dltensor", &DeleteUnusedDltensor);
+  return pybind11::capsule(static_cast<void*>(dlpack_tensor), "dltensor",
+                           &DeleteUnusedDltensor);
 }
 
 FDTensor FDTensorFromDLPack(const std::string& name,
@@ -178,9 +176,8 @@ FDTensor FDTensorFromDLPack(const std::string& name,
   int64_t* strides = dl_managed_tensor->dl_tensor.strides;
 
   int ndim = dl_managed_tensor->dl_tensor.ndim;
-  std::vector<int64_t> dims(
-      dl_managed_tensor->dl_tensor.shape,
-      dl_managed_tensor->dl_tensor.shape + ndim);
+  std::vector<int64_t> dims(dl_managed_tensor->dl_tensor.shape,
+                            dl_managed_tensor->dl_tensor.shape + ndim);
 
   // Check if the input is contiguous and in C order
   if (strides != nullptr) {
@@ -196,8 +193,8 @@ FDTensor FDTensorFromDLPack(const std::string& name,
     }
 
     FDASSERT(is_contiguous_c_order,
-        "DLPack tensor is not contiguous. Only contiguous DLPack "
-        "tensors that are stored in C-Order are supported.");
+             "DLPack tensor is not contiguous. Only contiguous DLPack "
+             "tensors that are stored in C-Order are supported.");
   }
 
   Device device;
@@ -216,21 +213,20 @@ FDTensor FDTensorFromDLPack(const std::string& name,
       is_pinned_memory = true;
       break;
     default:
-      FDASSERT(false,
+      FDASSERT(
+          false,
           ("DLDevice type " +
-          std::to_string(dl_managed_tensor->dl_tensor.device.device_type) +
-          " is not support by Python backend.").c_str());
+           std::to_string(dl_managed_tensor->dl_tensor.device.device_type) +
+           " is not support by Python backend.")
+              .c_str());
       break;
   }
 
-  FDDataType dtype =
-      DlpackToFDType(dl_managed_tensor->dl_tensor.dtype);
+  FDDataType dtype = DlpackToFDType(dl_managed_tensor->dl_tensor.dtype);
 
   PyCapsule_SetName(dlpack_tensor.ptr(), "used_dlpack");
   FDTensor fd_tensor(name);
-  fd_tensor.SetExternalData(
-    dims, dtype, memory_ptr, device, device_id
-  );
+  fd_tensor.SetExternalData(dims, dtype, memory_ptr, device, device_id);
   fd_tensor.is_pinned_memory = is_pinned_memory;
   return fd_tensor;
 }
@@ -242,15 +238,52 @@ void BindFDTensor(pybind11::module& m) {
       .def_readonly("shape", &FDTensor::shape)
       .def_readonly("dtype", &FDTensor::dtype)
       .def_readonly("device", &FDTensor::device)
-      .def("numpy", [](FDTensor& self) {
-        return TensorToPyArray(self);
-      })
+      .def("numpy", [](FDTensor& self) { return TensorToPyArray(self); })
       .def("data", &FDTensor::MutableData)
-      .def("from_numpy", [](FDTensor& self, pybind11::array& pyarray, bool share_buffer = false) {
-        PyArrayToTensor(pyarray, &self, share_buffer);
-      })
+      .def("from_numpy",
+           [](FDTensor& self, pybind11::array& pyarray,
+              bool share_buffer = false) {
+             PyArrayToTensor(pyarray, &self, share_buffer);
+           })
+      .def("from_external_data",
+           [](const std::string& name, size_t data_addr,
+              const std::vector<int64_t>& shape, const std::string& data_type,
+              const std::string& data_place, int device_id) {
+             auto fd_data_type = FDDataType::UNKNOWN1;
+             if (data_type == "FP32") {
+               fd_data_type = FDDataType::FP32;
+             } else if (data_type == "FP16") {
+               fd_data_type = FDDataType::FP16;
+             } else if (data_type == "INT32") {
+               fd_data_type = FDDataType::INT32;
+             } else if (data_type == "INT64") {
+               fd_data_type = FDDataType::INT64;
+             } else {
+               FDASSERT(false,
+                        "FDTensor.from_external_data, datatype \"%s\" is not "
+                        "supported.",
+                        data_type.c_str());
+             }
+
+             Device fd_data_place;
+             if (data_place.find("gpu") != data_place.npos) {
+               fd_data_place = Device::GPU;
+             } else {
+               FDASSERT(false,
+                        ("Device type " + data_place +
+                         " is not support by FDTensor.from_external_data.")
+                            .c_str());
+             }
+             void* data_ptr = nullptr;
+             data_ptr = reinterpret_cast<void*>(data_addr);
+             FDTensor fd_tensor(name);
+             fd_tensor.SetExternalData(shape, fd_data_type,
+                                       static_cast<void*>(data_ptr),
+                                       fd_data_place, device_id);
+             return fd_tensor;
+           })
       .def("to_dlpack", &FDTensorToDLPack)
-      .def("from_dlpack",&FDTensorFromDLPack)
+      .def("from_dlpack", &FDTensorFromDLPack)
       .def("print_info", &FDTensor::PrintInfo);
 }
 
diff --git a/fastdeploy/pybind/runtime.cc b/fastdeploy/pybind/runtime.cc
index ca2f4886b..408c3ced2 100644
--- a/fastdeploy/pybind/runtime.cc
+++ b/fastdeploy/pybind/runtime.cc
@@ -110,6 +110,7 @@ void BindRuntime(pybind11::module& m) {
              return outputs;
            })
       .def("bind_input_tensor", &Runtime::BindInputTensor)
+      .def("bind_output_tensor", &Runtime::BindOutputTensor)
       .def("infer", [](Runtime& self) { self.Infer(); })
       .def("get_output_tensor",
            [](Runtime& self, const std::string& name) {
diff --git a/fastdeploy/runtime/backends/paddle/paddle_backend.cc b/fastdeploy/runtime/backends/paddle/paddle_backend.cc
index 19493f90b..49cb2532b 100644
--- a/fastdeploy/runtime/backends/paddle/paddle_backend.cc
+++ b/fastdeploy/runtime/backends/paddle/paddle_backend.cc
@@ -25,6 +25,7 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
   if (option.device == Device::GPU) {
     config_.EnableUseGpu(option.gpu_mem_init_size, option.device_id);
     if (option_.external_stream_) {
+      FDINFO << "Will use external stream for Paddle Backend." << std::endl;
       config_.SetExecStream(option_.external_stream_);
     }
     if (option.enable_trt) {
@@ -47,7 +48,7 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
         config_.SetOptimCacheDir(option.trt_option.serialize_file);
       }
       config_.EnableTensorRtEngine(option.trt_option.max_workspace_size,
-                                   option.trt_option.max_batch_size, 3,
+                                   option.trt_option.max_batch_size, 20,
                                    precision, use_static);
       SetTRTDynamicShapeToConfig(option);
     }
@@ -124,9 +125,10 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_buffer,
                  "file will save to the directory where paddle model saved."
               << std::endl;
           use_static = true;
+          config_.SetOptimCacheDir(option.trt_option.serialize_file);
         }
         config_.EnableTensorRtEngine(option.trt_option.max_workspace_size,
-                                     option.trt_option.max_batch_size, 3,
+                                     option.trt_option.max_batch_size, 20,
                                      paddle_infer::PrecisionType::kInt8,
                                      use_static, false);
         SetTRTDynamicShapeToConfig(option);
@@ -223,23 +225,47 @@ bool PaddleBackend::Infer(std::vector<FDTensor>& inputs,
             << inputs_desc_.size() << ")." << std::endl;
     return false;
   }
+  // output share backend memory only support CPU or GPU
+  if (option_.device == Device::IPU) {
+    copy_to_fd = true;
+  }
 
   RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
   for (size_t i = 0; i < inputs.size(); ++i) {
     auto handle = predictor_->GetInputHandle(inputs[i].name);
     ShareTensorFromFDTensor(handle.get(), inputs[i]);
   }
+  std::unordered_set<std::string> prebinded_output_name;
+  // prebinded output only support for GPU
+  if (!copy_to_fd) {
+    for (size_t i = 0; i < (*outputs).size(); ++i) {
+      auto output_name = (*outputs)[i].name;
+      // if a output is not prebinded,
+      // the name of output is expected to be empty.
+      // We skip here
+      if (output_name.empty()) {
+        continue;
+      }
+      // Record the prebinded output_name.
+      // Those outputs do not need PaddleTensorToFDTensor
+      // after predictor_.Run()
+      prebinded_output_name.insert(output_name);
+      auto handle = predictor_->GetOutputHandle(output_name);
+      ShareOutTensorFromFDTensor(handle.get(), (*outputs)[i]);
+    }
+  }
 
   RUNTIME_PROFILE_LOOP_BEGIN(1)
   predictor_->Run();
   RUNTIME_PROFILE_LOOP_END
 
-  // output share backend memory only support CPU or GPU
-  if (option_.device == Device::IPU) {
-    copy_to_fd = true;
-  }
   outputs->resize(outputs_desc_.size());
   for (size_t i = 0; i < outputs_desc_.size(); ++i) {
+    // skip prebinded output
+    if (copy_to_fd == false &&
+        prebinded_output_name.count(outputs_desc_[i].name)) {
+      continue;
+    }
     auto handle = predictor_->GetOutputHandle(outputs_desc_[i].name);
     if (copy_to_fd) {
       (*outputs)[i].is_pinned_memory = option_.enable_pinned_memory;
diff --git a/fastdeploy/runtime/backends/paddle/paddle_backend.h b/fastdeploy/runtime/backends/paddle/paddle_backend.h
index 02c430ade..60079fed6 100755
--- a/fastdeploy/runtime/backends/paddle/paddle_backend.h
+++ b/fastdeploy/runtime/backends/paddle/paddle_backend.h
@@ -35,6 +35,9 @@ paddle_infer::PlaceType ConvertFDDeviceToPlace(Device device);
 // Share memory buffer with paddle_infer::Tensor from fastdeploy::FDTensor
 void ShareTensorFromFDTensor(paddle_infer::Tensor* tensor, FDTensor& fd_tensor);
 
+void ShareOutTensorFromFDTensor(paddle_infer::Tensor* tensor,
+                             FDTensor& fd_tensor);
+
 // convert paddle_infer::Tensor to fastdeploy::FDTensor
 // if copy_to_fd is true, copy memory data to FDTensor
 /// else share memory to FDTensor
@@ -89,4 +92,4 @@ class PaddleBackend : public BaseBackend {
   std::vector<TensorInfo> inputs_desc_;
   std::vector<TensorInfo> outputs_desc_;
 };
-}  // namespace fastdeploy
\ No newline at end of file
+}  // namespace fastdeploy
diff --git a/fastdeploy/runtime/backends/paddle/util.cc b/fastdeploy/runtime/backends/paddle/util.cc
index f117a49bc..bd7ff0944 100644
--- a/fastdeploy/runtime/backends/paddle/util.cc
+++ b/fastdeploy/runtime/backends/paddle/util.cc
@@ -61,6 +61,43 @@ void ShareTensorFromFDTensor(paddle_infer::Tensor* tensor,
            Str(fd_tensor.dtype).c_str());
 }
 
+void ShareOutTensorFromFDTensor(paddle_infer::Tensor* tensor,
+                                FDTensor& fd_tensor) {
+  std::vector<int> shape(fd_tensor.shape.begin(), fd_tensor.shape.end());
+  auto place = ConvertFDDeviceToPlace(fd_tensor.device);
+  if (fd_tensor.dtype == FDDataType::FP32) {
+    if (place == paddle_infer::PlaceType::kGPU) {
+      tensor->ShareExternalData(static_cast<float*>(fd_tensor.MutableData()),
+                                shape, place);
+    } else {
+      tensor->CopyToCpu(static_cast<float*>(fd_tensor.MutableData()));
+    }
+    return;
+  } else if (fd_tensor.dtype == FDDataType::INT32) {
+    if (place == paddle_infer::PlaceType::kGPU) {
+      tensor->ShareExternalData(static_cast<int32_t*>(fd_tensor.MutableData()),
+                                shape, place);
+    } else {
+      tensor->CopyToCpu(static_cast<int32_t*>(fd_tensor.MutableData()));
+    }
+    return;
+  } else if (fd_tensor.dtype == FDDataType::INT64) {
+    if (place == paddle_infer::PlaceType::kGPU) {
+      tensor->ShareExternalData(static_cast<int64_t*>(fd_tensor.MutableData()),
+                                shape, place);
+    } else {
+      tensor->CopyToCpu(static_cast<int64_t*>(fd_tensor.MutableData()));
+    }
+    return;
+  } else if (fd_tensor.dtype == FDDataType::UINT8) {
+    tensor->ShareExternalData(static_cast<uint8_t*>(fd_tensor.MutableData()),
+                              shape, paddle_infer::PlaceType::kCPU);
+    return;
+  }
+  FDASSERT(false, "Unexpected data type(%s) while infer with PaddleBackend.",
+           Str(fd_tensor.dtype).c_str());
+}
+
 void PaddleTensorToFDTensor(std::unique_ptr<paddle_infer::Tensor>& tensor,
                             FDTensor* fd_tensor, bool copy_to_fd) {
   auto fd_dtype = PaddleDataTypeToFD(tensor->type());
diff --git a/fastdeploy/runtime/runtime.cc b/fastdeploy/runtime/runtime.cc
index 70714e4f0..3f3ccd031 100644
--- a/fastdeploy/runtime/runtime.cc
+++ b/fastdeploy/runtime/runtime.cc
@@ -198,6 +198,26 @@ void Runtime::BindInputTensor(const std::string& name, FDTensor& input) {
   }
 }
 
+void Runtime::BindOutputTensor(const std::string& name, FDTensor& output) {
+  bool is_exist = false;
+  for (auto& t : output_tensors_) {
+    if (t.name == name) {
+      // FDWARNING << "The output name [" << name << "] is exist." << std::endl;
+      is_exist = true;
+      t.SetExternalData(output.shape, output.dtype, output.MutableData(),
+                        output.device, output.device_id);
+      break;
+    }
+  }
+  if (!is_exist) {
+    // FDWARNING << "The output name [" << name << "] don't exist." <<
+    // std::endl;
+    FDTensor new_tensor(name);
+    new_tensor.SetExternalData(output.shape, output.dtype, output.MutableData(),
+                               output.device, output.device_id);
+    output_tensors_.emplace_back(std::move(new_tensor));
+  }
+}
 FDTensor* Runtime::GetOutputTensor(const std::string& name) {
   for (auto& t : output_tensors_) {
     if (t.name == name) {
diff --git a/fastdeploy/runtime/runtime.h b/fastdeploy/runtime/runtime.h
index 6e7dc9629..66ce9c94e 100755
--- a/fastdeploy/runtime/runtime.h
+++ b/fastdeploy/runtime/runtime.h
@@ -72,6 +72,12 @@ struct FASTDEPLOY_DECL Runtime {
   /** \brief Bind FDTensor by name, no copy and share input memory
    */
   void BindInputTensor(const std::string& name, FDTensor& input);
+
+  /** \brief Bind FDTensor by name, no copy and share output memory.
+   *  Please make share the correctness of tensor shape of output.
+   */
+  void BindOutputTensor(const std::string& name, FDTensor& output);
+
   /** \brief Get output FDTensor by name, no copy and share backend output memory
    */
   FDTensor* GetOutputTensor(const std::string& name);
@@ -99,7 +105,7 @@ struct FASTDEPLOY_DECL Runtime {
    */
   double GetProfileTime() {
     return backend_->benchmark_result_.time_of_runtime;
-  }             
+  }
 
  private:
   void CreateOrtBackend();
diff --git a/python/fastdeploy/runtime.py b/python/fastdeploy/runtime.py
index 47659c98c..1251b955c 100644
--- a/python/fastdeploy/runtime.py
+++ b/python/fastdeploy/runtime.py
@@ -72,6 +72,14 @@ class Runtime:
         """
         self._runtime.bind_input_tensor(name, fdtensor)
 
+    def bind_output_tensor(self, name, fdtensor):
+        """Bind FDTensor by name, no copy and share output memory
+
+        :param name: (str)The name of output data.
+        :param fdtensor: (fastdeploy.FDTensor)The output FDTensor.
+        """
+        self._runtime.bind_output_tensor(name, fdtensor)
+
     def zero_copy_infer(self):
         """No params inference the model.
 
@@ -656,7 +664,8 @@ class RuntimeOption:
                 continue
             if hasattr(getattr(self._option, attr), "__call__"):
                 continue
-            message += "  {} : {}\t\n".format(attr, getattr(self._option, attr))
+            message += "  {} : {}\t\n".format(attr,
+                                              getattr(self._option, attr))
         message.strip("\n")
         message += ")"
         return message

From 45b070e4959c82e0960b83697ce9260a89951832 Mon Sep 17 00:00:00 2001
From: felixhjh <852142024@qq.com>
Date: Mon, 13 Feb 2023 06:31:04 +0000
Subject: [PATCH 21/30] Format code

---
 fastdeploy/runtime/runtime.cc        | 46 +++++++++++++++-------------
 fastdeploy/runtime/runtime.h         |  4 +--
 fastdeploy/runtime/runtime_option.cc | 12 ++++----
 fastdeploy/runtime/runtime_option.h  |  2 +-
 python/fastdeploy/runtime.py         | 16 +++++-----
 5 files changed, 42 insertions(+), 38 deletions(-)

diff --git a/fastdeploy/runtime/runtime.cc b/fastdeploy/runtime/runtime.cc
index 2a00dfda4..67774a306 100644
--- a/fastdeploy/runtime/runtime.cc
+++ b/fastdeploy/runtime/runtime.cc
@@ -104,30 +104,32 @@ bool AutoSelectBackend(RuntimeOption& option) {
 
 bool Runtime::Init(const RuntimeOption& _option) {
   option = _option;
+  // decrypt encrypted model
   if ("" != option.encryption_key_) {
-     #ifdef ENABLE_ENCRYPTION
-      if (option.model_from_memory_) {
-        option.model_file = Decrypt(option.model_file, option.encryption_key_);
-        if (!(option.params_file.empty())) {
-          option.params_file = Decrypt(option.params_file, option.encryption_key_);
-        }
-      } else {
-        std::string model_buffer = "";
-        FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
-                "Fail to read binary from model file");
-        option.model_file = Decrypt(model_buffer, option.encryption_key_);
-        if (!(option.params_file.empty())) {
-          std::string params_buffer = "";
-          FDASSERT(ReadBinaryFromFile(option.params_file, &params_buffer),
-                  "Fail to read binary from parameter file");
-          option.params_file = Decrypt(params_buffer, option.encryption_key_);
-        }
-        option.model_from_memory_ = true;
+#ifdef ENABLE_ENCRYPTION
+    if (option.model_from_memory_) {
+      option.model_file = Decrypt(option.model_file, option.encryption_key_);
+      if (!(option.params_file.empty())) {
+        option.params_file =
+            Decrypt(option.params_file, option.encryption_key_);
       }
-    #else
-      FDERROR << "The FastDeploy didn't compile with encryption function."
-              << std::endl;
-    #endif
+    } else {
+      std::string model_buffer = "";
+      FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
+               "Fail to read binary from model file");
+      option.model_file = Decrypt(model_buffer, option.encryption_key_);
+      if (!(option.params_file.empty())) {
+        std::string params_buffer = "";
+        FDASSERT(ReadBinaryFromFile(option.params_file, &params_buffer),
+                 "Fail to read binary from parameter file");
+        option.params_file = Decrypt(params_buffer, option.encryption_key_);
+      }
+      option.model_from_memory_ = true;
+    }
+#else
+    FDERROR << "The FastDeploy didn't compile with encryption function."
+            << std::endl;
+#endif
   }
   // Choose default backend by model format and device if backend is not
   // specified
diff --git a/fastdeploy/runtime/runtime.h b/fastdeploy/runtime/runtime.h
index 772773007..fa8b8f198 100755
--- a/fastdeploy/runtime/runtime.h
+++ b/fastdeploy/runtime/runtime.h
@@ -24,7 +24,7 @@
 #include "fastdeploy/runtime/runtime_option.h"
 #include "fastdeploy/utils/perf.h"
 #ifdef ENABLE_ENCRYPTION
-  #include "fastdeploy/encryption/include/decrypt.h"
+#include "fastdeploy/encryption/include/decrypt.h"
 #endif
 
 /** \brief All C++ FastDeploy APIs are defined inside this namespace
@@ -102,7 +102,7 @@ struct FASTDEPLOY_DECL Runtime {
    */
   double GetProfileTime() {
     return backend_->benchmark_result_.time_of_runtime;
-  }             
+  }
 
  private:
   void CreateOrtBackend();
diff --git a/fastdeploy/runtime/runtime_option.cc b/fastdeploy/runtime/runtime_option.cc
index 8568b3b7f..d074a9603 100644
--- a/fastdeploy/runtime/runtime_option.cc
+++ b/fastdeploy/runtime/runtime_option.cc
@@ -37,12 +37,12 @@ void RuntimeOption::SetModelBuffer(const std::string& model_buffer,
 }
 
 void RuntimeOption::SetEncryptionKey(const std::string& encryption_key) {
-  #ifdef ENABLE_ENCRYPTION
-    encryption_key_ = encryption_key;
-  #else
-    FDERROR << "The FastDeploy didn't compile with encryption function."
-            << std::endl;
-  #endif
+#ifdef ENABLE_ENCRYPTION
+  encryption_key_ = encryption_key;
+#else
+  FDERROR << "The FastDeploy didn't compile with encryption function."
+          << std::endl;
+#endif
 }
 
 void RuntimeOption::UseGpu(int gpu_id) {
diff --git a/fastdeploy/runtime/runtime_option.h b/fastdeploy/runtime/runtime_option.h
index b263f4ad3..a10ed9845 100644
--- a/fastdeploy/runtime/runtime_option.h
+++ b/fastdeploy/runtime/runtime_option.h
@@ -207,7 +207,7 @@ struct FASTDEPLOY_DECL RuntimeOption {
   // *** The belowing api are deprecated, will be removed in v1.2.0
   // *** Do not use it anymore
 
-  void SetPaddleMKLDNN(bool pd_mkldnn = true); 
+  void SetPaddleMKLDNN(bool pd_mkldnn = true);
   void EnablePaddleToTrt();
   void DeletePaddleBackendPass(const std::string& delete_pass_name);
   void EnablePaddleLogInfo();
diff --git a/python/fastdeploy/runtime.py b/python/fastdeploy/runtime.py
index 1d2fc1c1d..a9004a15a 100644
--- a/python/fastdeploy/runtime.py
+++ b/python/fastdeploy/runtime.py
@@ -187,8 +187,7 @@ class RuntimeOption:
         return self._option.set_model_buffer(model_buffer, params_buffer,
                                              model_format)
 
-    def set_encryption_key(self,
-                           encryption_key):
+    def set_encryption_key(self, encryption_key):
         """When loading encrypted model, encryption_key is required to decrypte model
         :param encryption_key: (str)The key for decrypting model
         """
@@ -590,10 +589,12 @@ class RuntimeOption:
                        replica_num=1,
                        available_memory_proportion=1.0,
                        enable_half_partial=False):
-        logging.warning("`RuntimeOption.set_ipu_config` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.set_ipu_config()` instead.")
-        self._option.paddle_infer_option.set_ipu_config(enable_fp16, replica_num,
-                                           available_memory_proportion,
-                                           enable_half_partial)
+        logging.warning(
+            "`RuntimeOption.set_ipu_config` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.set_ipu_config()` instead."
+        )
+        self._option.paddle_infer_option.set_ipu_config(
+            enable_fp16, replica_num, available_memory_proportion,
+            enable_half_partial)
 
     @property
     def poros_option(self):
@@ -664,7 +665,8 @@ class RuntimeOption:
                 continue
             if hasattr(getattr(self._option, attr), "__call__"):
                 continue
-            message += "  {} : {}\t\n".format(attr, getattr(self._option, attr))
+            message += "  {} : {}\t\n".format(attr,
+                                              getattr(self._option, attr))
         message.strip("\n")
         message += ")"
         return message

From 47b1d27fbb11e88dd4ccfe13d11e4561c1d9eb4e Mon Sep 17 00:00:00 2001
From: WJJ1995 <wjjisloser@163.com>
Date: Mon, 13 Feb 2023 16:12:54 +0800
Subject: [PATCH 22/30] [Benchmark] Add macros for benchmark (#1301)

* add GPL lisence

* add GPL-3.0 lisence

* add GPL-3.0 lisence

* add GPL-3.0 lisence

* support yolov8

* add pybind for yolov8

* add yolov8 readme

* add cpp benchmark

* add cpu and gpu mem

* public part split

* add runtime mode

* fixed bugs

* add cpu_thread_nums

* deal with comments

* deal with comments

* deal with comments

* rm useless code

* add FASTDEPLOY_DECL

* add FASTDEPLOY_DECL

* fixed for windows

* mv rss to pss

* mv rss to pss

* Update utils.cc

* use thread to collect mem

* Add ResourceUsageMonitor

* rm useless code

* fixed bug

* fixed typo

* update ResourceUsageMonitor

* fixed bug

* fixed bug

* add note for ResourceUsageMonitor

* deal with comments

* add macros

* deal with comments

* deal with comments

* deal with comments

* re-lint

---------

Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
---
 benchmark/cpp/benchmark_ppyolov8.cc | 110 ++++------------------------
 benchmark/cpp/benchmark_yolov5.cc   |  97 ++++--------------------
 benchmark/cpp/flags.h               |  73 ------------------
 benchmark/cpp/macros.h              |  70 ++++++++++++++++++
 benchmark/cpp/option.h              |  92 +++++++++++++++++++++++
 5 files changed, 190 insertions(+), 252 deletions(-)
 mode change 100755 => 100644 benchmark/cpp/benchmark_yolov5.cc
 create mode 100755 benchmark/cpp/macros.h
 create mode 100755 benchmark/cpp/option.h

diff --git a/benchmark/cpp/benchmark_ppyolov8.cc b/benchmark/cpp/benchmark_ppyolov8.cc
index cff374200..545474635 100644
--- a/benchmark/cpp/benchmark_ppyolov8.cc
+++ b/benchmark/cpp/benchmark_ppyolov8.cc
@@ -12,9 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "fastdeploy/benchmark/utils.h"
-#include "fastdeploy/vision.h"
 #include "flags.h"
+#include "macros.h"
+#include "option.h"
 
 #ifdef WIN32
 const char sep = '\\';
@@ -22,104 +22,24 @@ const char sep = '\\';
 const char sep = '/';
 #endif
 
-bool RunModel(std::string model_dir, std::string image_file, size_t warmup,
-              size_t repeats, size_t dump_period, std::string cpu_mem_file_name,
-              std::string gpu_mem_file_name) {
+int main(int argc, char* argv[]) {
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  auto im = cv::imread(FLAGS_image);
   // Initialization
   auto option = fastdeploy::RuntimeOption();
   if (!CreateRuntimeOption(&option)) {
     PrintUsage();
     return false;
   }
-  auto model_file = model_dir + sep + "model.pdmodel";
-  auto params_file = model_dir + sep + "model.pdiparams";
-  auto config_file = model_dir + sep + "infer_cfg.yml";
-
-  if (FLAGS_profile_mode == "runtime") {
-    option.EnableProfiling(FLAGS_include_h2d_d2h, repeats, warmup);
-  }
-  auto model = fastdeploy::vision::detection::PaddleYOLOv8(
+  auto model_file = FLAGS_model + sep + "model.pdmodel";
+  auto params_file = FLAGS_model + sep + "model.pdiparams";
+  auto config_file = FLAGS_model + sep + "infer_cfg.yml";
+  auto model_ppyolov8 = fastdeploy::vision::detection::PaddleYOLOv8(
       model_file, params_file, config_file, option);
-  if (!model.Initialized()) {
-    std::cerr << "Failed to initialize." << std::endl;
-    return false;
-  }
-  auto im = cv::imread(image_file);
-  // For Runtime
-  if (FLAGS_profile_mode == "runtime") {
-    fastdeploy::vision::DetectionResult res;
-    if (!model.Predict(im, &res)) {
-      std::cerr << "Failed to predict." << std::endl;
-      return false;
-    }
-    double profile_time = model.GetProfileTime() * 1000;
-    std::cout << "Runtime(ms): " << profile_time << "ms." << std::endl;
-    auto vis_im = fastdeploy::vision::VisDetection(im, res);
-    cv::imwrite("vis_result.jpg", vis_im);
-    std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
-  } else {
-    // For End2End
-    // Step1: warm up for warmup times
-    std::cout << "Warmup " << warmup << " times..." << std::endl;
-    for (int i = 0; i < warmup; i++) {
-      fastdeploy::vision::DetectionResult res;
-      if (!model.Predict(im, &res)) {
-        std::cerr << "Failed to predict." << std::endl;
-        return false;
-      }
-    }
-    std::vector<float> end2end_statis;
-    // Step2: repeat for repeats times
-    std::cout << "Counting time..." << std::endl;
-    fastdeploy::TimeCounter tc;
-    fastdeploy::vision::DetectionResult res;
-    for (int i = 0; i < repeats; i++) {
-      if (FLAGS_collect_memory_info && i % dump_period == 0) {
-        fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name);
-#if defined(WITH_GPU)
-        fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name,
-                                                         FLAGS_device_id);
-#endif
-      }
-      tc.Start();
-      if (!model.Predict(im, &res)) {
-        std::cerr << "Failed to predict." << std::endl;
-        return false;
-      }
-      tc.End();
-      end2end_statis.push_back(tc.Duration() * 1000);
-    }
-    float end2end = std::accumulate(end2end_statis.end() - repeats,
-                                    end2end_statis.end(), 0.f) /
-                    repeats;
-    std::cout << "End2End(ms): " << end2end << "ms." << std::endl;
-    auto vis_im = fastdeploy::vision::VisDetection(im, res);
-    cv::imwrite("vis_result.jpg", vis_im);
-    std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
-  }
-
-  return true;
-}
-
-int main(int argc, char* argv[]) {
-  google::ParseCommandLineFlags(&argc, &argv, true);
-  int repeats = FLAGS_repeat;
-  int warmup = FLAGS_warmup;
-  int dump_period = FLAGS_dump_period;
-  std::string cpu_mem_file_name = "result_cpu.txt";
-  std::string gpu_mem_file_name = "result_gpu.txt";
-  // Run model
-  if (RunModel(FLAGS_model, FLAGS_image, warmup, repeats, dump_period,
-               cpu_mem_file_name, gpu_mem_file_name) != true) {
-    exit(1);
-  }
-  if (FLAGS_collect_memory_info) {
-    float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name);
-    std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
-#if defined(WITH_GPU)
-    float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name);
-    std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
-#endif
-  }
+  fastdeploy::vision::DetectionResult res;
+  BENCHMARK_MODEL(model_ppyolov8, model_ppyolov8.Predict(im, &res))
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
   return 0;
-}
+}
\ No newline at end of file
diff --git a/benchmark/cpp/benchmark_yolov5.cc b/benchmark/cpp/benchmark_yolov5.cc
old mode 100755
new mode 100644
index 2e5df6b1c..5b2cab855
--- a/benchmark/cpp/benchmark_yolov5.cc
+++ b/benchmark/cpp/benchmark_yolov5.cc
@@ -12,96 +12,25 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "fastdeploy/benchmark/utils.h"
-#include "fastdeploy/vision.h"
 #include "flags.h"
+#include "macros.h"
+#include "option.h"
 
-bool RunModel(std::string model_file, std::string image_file, size_t warmup,
-              size_t repeats, size_t sampling_interval) {
+int main(int argc, char* argv[]) {
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  auto im = cv::imread(FLAGS_image);
   // Initialization
   auto option = fastdeploy::RuntimeOption();
   if (!CreateRuntimeOption(&option)) {
     PrintUsage();
     return false;
   }
-  if (FLAGS_profile_mode == "runtime") {
-    option.EnableProfiling(FLAGS_include_h2d_d2h, repeats, warmup);
-  }
-  auto model = fastdeploy::vision::detection::YOLOv5(model_file, "", option);
-  if (!model.Initialized()) {
-    std::cerr << "Failed to initialize." << std::endl;
-    return false;
-  }
-  auto im = cv::imread(image_file);
-  // For collect memory info
-  fastdeploy::benchmark::ResourceUsageMonitor resource_moniter(
-      sampling_interval, FLAGS_device_id);
-  if (FLAGS_collect_memory_info) {
-    resource_moniter.Start();
-  }
-  // For Runtime
-  if (FLAGS_profile_mode == "runtime") {
-    fastdeploy::vision::DetectionResult res;
-    if (!model.Predict(im, &res)) {
-      std::cerr << "Failed to predict." << std::endl;
-      return false;
-    }
-    double profile_time = model.GetProfileTime() * 1000;
-    std::cout << "Runtime(ms): " << profile_time << "ms." << std::endl;
-    auto vis_im = fastdeploy::vision::VisDetection(im, res);
-    cv::imwrite("vis_result.jpg", vis_im);
-    std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
-  } else {
-    // For End2End
-    // Step1: warm up for warmup times
-    std::cout << "Warmup " << warmup << " times..." << std::endl;
-    for (int i = 0; i < warmup; i++) {
-      fastdeploy::vision::DetectionResult res;
-      if (!model.Predict(im, &res)) {
-        std::cerr << "Failed to predict." << std::endl;
-        return false;
-      }
-    }
-    // Step2: repeat for repeats times
-    std::cout << "Counting time..." << std::endl;
-    std::cout << "Repeat " << repeats << " times..." << std::endl;
-    fastdeploy::vision::DetectionResult res;
-    fastdeploy::TimeCounter tc;
-    tc.Start();
-    for (int i = 0; i < repeats; i++) {
-      if (!model.Predict(im, &res)) {
-        std::cerr << "Failed to predict." << std::endl;
-        return false;
-      }
-    }
-    tc.End();
-    double end2end = tc.Duration() / repeats * 1000;
-    std::cout << "End2End(ms): " << end2end << "ms." << std::endl;
-    auto vis_im = fastdeploy::vision::VisDetection(im, res);
-    cv::imwrite("vis_result.jpg", vis_im);
-    std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
-  }
-  if (FLAGS_collect_memory_info) {
-    float cpu_mem = resource_moniter.GetMaxCpuMem();
-    float gpu_mem = resource_moniter.GetMaxGpuMem();
-    float gpu_util = resource_moniter.GetMaxGpuUtil();
-    std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
-    std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
-    std::cout << "gpu_util: " << gpu_util << std::endl;
-    resource_moniter.Stop();
-  }
-
-  return true;
-}
-
-int main(int argc, char* argv[]) {
-  google::ParseCommandLineFlags(&argc, &argv, true);
-  int repeats = FLAGS_repeat;
-  int warmup = FLAGS_warmup;
-  int sampling_interval = FLAGS_sampling_interval;
-  // Run model
-  if (!RunModel(FLAGS_model, FLAGS_image, warmup, repeats, sampling_interval)) {
-    exit(1);
-  }
+  auto model_yolov5 =
+      fastdeploy::vision::detection::YOLOv5(FLAGS_model, "", option);
+  fastdeploy::vision::DetectionResult res;
+  BENCHMARK_MODEL(model_yolov5, model_yolov5.Predict(im, &res))
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
   return 0;
-}
\ No newline at end of file
+}
diff --git a/benchmark/cpp/flags.h b/benchmark/cpp/flags.h
index 64f22c702..7f8c3a29f 100755
--- a/benchmark/cpp/flags.h
+++ b/benchmark/cpp/flags.h
@@ -15,7 +15,6 @@
 #pragma once
 
 #include "gflags/gflags.h"
-#include "fastdeploy/utils/perf.h"
 
 DEFINE_string(model, "", "Directory of the inference model.");
 DEFINE_string(image, "", "Path of the image file.");
@@ -49,75 +48,3 @@ void PrintUsage() {
   std::cout << "Default value of backend: default" << std::endl;
   std::cout << "Default value of use_fp16: false" << std::endl;
 }
-
-bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
-  if (FLAGS_device == "gpu") {
-    option->UseGpu(FLAGS_device_id);
-    if (FLAGS_backend == "ort") {
-      option->UseOrtBackend();
-    } else if (FLAGS_backend == "paddle") {
-      option->UsePaddleInferBackend();
-    } else if (FLAGS_backend == "trt" || FLAGS_backend == "paddle_trt") {
-      option->UseTrtBackend();
-      if (FLAGS_backend == "paddle_trt") {
-        option->EnablePaddleToTrt();
-      }
-      if (FLAGS_use_fp16) {
-        option->EnableTrtFP16();
-      }
-    } else if (FLAGS_backend == "default") {
-      return true;
-    } else {
-      std::cout << "While inference with GPU, only support "
-                   "default/ort/paddle/trt/paddle_trt now, "
-                << FLAGS_backend << " is not supported." << std::endl;
-      return false;
-    }
-  } else if (FLAGS_device == "cpu") {
-    option->SetCpuThreadNum(FLAGS_cpu_thread_nums);
-    if (FLAGS_backend == "ort") {
-      option->UseOrtBackend();
-    } else if (FLAGS_backend == "ov") {
-      option->UseOpenVINOBackend();
-    } else if (FLAGS_backend == "paddle") {
-      option->UsePaddleInferBackend();
-    } else if (FLAGS_backend == "lite") {
-      option->UsePaddleLiteBackend();
-      if (FLAGS_use_fp16) {
-        option->EnableLiteFP16();
-      }
-    } else if (FLAGS_backend == "default") {
-      return true;
-    } else {
-      std::cout << "While inference with CPU, only support "
-                   "default/ort/ov/paddle/lite now, "
-                << FLAGS_backend << " is not supported." << std::endl;
-      return false;
-    }
-  } else if (FLAGS_device == "xpu") {
-    option->UseKunlunXin(FLAGS_device_id);
-    if (FLAGS_backend == "ort") {
-      option->UseOrtBackend();
-    } else if (FLAGS_backend == "paddle") {
-      option->UsePaddleInferBackend();
-    } else if (FLAGS_backend == "lite") {
-      option->UsePaddleLiteBackend();
-      if (FLAGS_use_fp16) {
-        option->EnableLiteFP16();
-      }
-    } else if (FLAGS_backend == "default") {
-      return true;
-    } else {
-      std::cout << "While inference with XPU, only support "
-                   "default/ort/paddle/lite now, "
-                << FLAGS_backend << " is not supported." << std::endl;
-      return false;
-    }
-  } else {
-    std::cerr << "Only support device CPU/GPU/XPU now, " << FLAGS_device
-              << " is not supported." << std::endl;
-    return false;
-  }
-
-  return true;
-}
diff --git a/benchmark/cpp/macros.h b/benchmark/cpp/macros.h
new file mode 100755
index 000000000..bebd26e0d
--- /dev/null
+++ b/benchmark/cpp/macros.h
@@ -0,0 +1,70 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include "fastdeploy/benchmark/utils.h"
+#include "fastdeploy/utils/perf.h"
+
+#define BENCHMARK_MODEL(MODEL_NAME, BENCHMARK_FUNC)                         \
+{                                                                           \
+  std::cout << "====" << #MODEL_NAME << "====" << std::endl;                \
+  if (!MODEL_NAME.Initialized()) {                                          \
+    std::cerr << "Failed to initialize." << std::endl;                      \
+    return 0;                                                               \
+  }                                                                         \
+  auto __im__ = cv::imread(FLAGS_image);                                    \
+  fastdeploy::benchmark::ResourceUsageMonitor __resource_moniter__(         \
+      FLAGS_sampling_interval, FLAGS_device_id);                            \
+  if (FLAGS_collect_memory_info) {                                          \
+    __resource_moniter__.Start();                                           \
+  }                                                                         \
+  if (FLAGS_profile_mode == "runtime") {                                    \
+    if (!BENCHMARK_FUNC) {                                                  \
+      std::cerr << "Failed to predict." << std::endl;                       \
+      return 0;                                                             \
+    }                                                                       \
+    double __profile_time__ = MODEL_NAME.GetProfileTime() * 1000;           \
+    std::cout << "Runtime(ms): " << __profile_time__ << "ms." << std::endl; \
+  } else {                                                                  \
+    std::cout << "Warmup " << FLAGS_warmup << " times..." << std::endl;     \
+    for (int __i__ = 0; __i__ < FLAGS_warmup; __i__++) {                    \
+      if (!BENCHMARK_FUNC) {                                                \
+        std::cerr << "Failed to predict." << std::endl;                     \
+        return 0;                                                           \
+      }                                                                     \
+    }                                                                       \
+    std::cout << "Counting time..." << std::endl;                           \
+    std::cout << "Repeat " << FLAGS_repeat << " times..." << std::endl;     \
+    fastdeploy::TimeCounter __tc__;                                         \
+    __tc__.Start();                                                         \
+    for (int __i__ = 0; __i__ < FLAGS_repeat; __i__++) {                    \
+      if (!BENCHMARK_FUNC) {                                                \
+        std::cerr << "Failed to predict." << std::endl;                     \
+        return 0;                                                           \
+      }                                                                     \
+    }                                                                       \
+    __tc__.End();                                                           \
+    double __end2end__ = __tc__.Duration() / FLAGS_repeat * 1000;           \
+    std::cout << "End2End(ms): " << __end2end__ << "ms." << std::endl;      \
+  }                                                                         \
+  if (FLAGS_collect_memory_info) {                                          \
+    float __cpu_mem__ = __resource_moniter__.GetMaxCpuMem();                \
+    float __gpu_mem__ = __resource_moniter__.GetMaxGpuMem();                \
+    float __gpu_util__ = __resource_moniter__.GetMaxGpuUtil();              \
+    std::cout << "cpu_pss_mb: " << __cpu_mem__ << "MB." << std::endl;       \
+    std::cout << "gpu_pss_mb: " << __gpu_mem__ << "MB." << std::endl;       \
+    std::cout << "gpu_util: " << __gpu_util__ << std::endl;                 \
+    __resource_moniter__.Stop();                                            \
+  }                                                                         \
+}
diff --git a/benchmark/cpp/option.h b/benchmark/cpp/option.h
new file mode 100755
index 000000000..9989255e5
--- /dev/null
+++ b/benchmark/cpp/option.h
@@ -0,0 +1,92 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "fastdeploy/vision.h"
+
+static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
+  if (FLAGS_profile_mode == "runtime") {
+    option->EnableProfiling(FLAGS_include_h2d_d2h, FLAGS_repeat, FLAGS_warmup);
+  }
+  if (FLAGS_device == "gpu") {
+    option->UseGpu(FLAGS_device_id);
+    if (FLAGS_backend == "ort") {
+      option->UseOrtBackend();
+    } else if (FLAGS_backend == "paddle") {
+      option->UsePaddleInferBackend();
+    } else if (FLAGS_backend == "trt" || FLAGS_backend == "paddle_trt") {
+      option->UseTrtBackend();
+      if (FLAGS_backend == "paddle_trt") {
+        option->EnablePaddleToTrt();
+      }
+      if (FLAGS_use_fp16) {
+        option->EnableTrtFP16();
+      }
+    } else if (FLAGS_backend == "default") {
+      return true;
+    } else {
+      std::cout << "While inference with GPU, only support "
+                   "default/ort/paddle/trt/paddle_trt now, "
+                << FLAGS_backend << " is not supported." << std::endl;
+      return false;
+    }
+  } else if (FLAGS_device == "cpu") {
+    option->SetCpuThreadNum(FLAGS_cpu_thread_nums);
+    if (FLAGS_backend == "ort") {
+      option->UseOrtBackend();
+    } else if (FLAGS_backend == "ov") {
+      option->UseOpenVINOBackend();
+    } else if (FLAGS_backend == "paddle") {
+      option->UsePaddleInferBackend();
+    } else if (FLAGS_backend == "lite") {
+      option->UsePaddleLiteBackend();
+      if (FLAGS_use_fp16) {
+        option->EnableLiteFP16();
+      }
+    } else if (FLAGS_backend == "default") {
+      return true;
+    } else {
+      std::cout << "While inference with CPU, only support "
+                   "default/ort/ov/paddle/lite now, "
+                << FLAGS_backend << " is not supported." << std::endl;
+      return false;
+    }
+  } else if (FLAGS_device == "xpu") {
+    option->UseKunlunXin(FLAGS_device_id);
+    if (FLAGS_backend == "ort") {
+      option->UseOrtBackend();
+    } else if (FLAGS_backend == "paddle") {
+      option->UsePaddleInferBackend();
+    } else if (FLAGS_backend == "lite") {
+      option->UsePaddleLiteBackend();
+      if (FLAGS_use_fp16) {
+        option->EnableLiteFP16();
+      }
+    } else if (FLAGS_backend == "default") {
+      return true;
+    } else {
+      std::cout << "While inference with XPU, only support "
+                   "default/ort/paddle/lite now, "
+                << FLAGS_backend << " is not supported." << std::endl;
+      return false;
+    }
+  } else {
+    std::cerr << "Only support device CPU/GPU/XPU now, " << FLAGS_device
+              << " is not supported." << std::endl;
+    return false;
+  }
+
+  return true;
+}

From f1ab47a4efdcfcd5ad467565c7881323d22e4175 Mon Sep 17 00:00:00 2001
From: wwbitejotunn <wang_bojun@outlook.com>
Date: Mon, 13 Feb 2023 18:16:45 +0000
Subject: [PATCH 23/30] code revine

---
 fastdeploy/runtime/backends/paddle/paddle_backend.cc | 5 ++---
 fastdeploy/runtime/runtime.cc                        | 5 ++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/fastdeploy/runtime/backends/paddle/paddle_backend.cc b/fastdeploy/runtime/backends/paddle/paddle_backend.cc
index 73f0df673..09dbe812a 100644
--- a/fastdeploy/runtime/backends/paddle/paddle_backend.cc
+++ b/fastdeploy/runtime/backends/paddle/paddle_backend.cc
@@ -51,7 +51,7 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
         config_.SetOptimCacheDir(opt_cache_dir);
       }
       config_.EnableTensorRtEngine(option.trt_option.max_workspace_size,
-                                   option.trt_option.max_batch_size, 20,
+                                   option.trt_option.max_batch_size, 3,
                                    precision, use_static);
       SetTRTDynamicShapeToConfig(option);
     }
@@ -128,10 +128,9 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_buffer,
                  "file will save to the directory where paddle model saved."
               << std::endl;
           use_static = true;
-          config_.SetOptimCacheDir(option.trt_option.serialize_file);
         }
         config_.EnableTensorRtEngine(option.trt_option.max_workspace_size,
-                                     option.trt_option.max_batch_size, 20,
+                                     option.trt_option.max_batch_size, 3,
                                      paddle_infer::PrecisionType::kInt8,
                                      use_static, false);
         SetTRTDynamicShapeToConfig(option);
diff --git a/fastdeploy/runtime/runtime.cc b/fastdeploy/runtime/runtime.cc
index 3f3ccd031..160894a68 100644
--- a/fastdeploy/runtime/runtime.cc
+++ b/fastdeploy/runtime/runtime.cc
@@ -202,7 +202,7 @@ void Runtime::BindOutputTensor(const std::string& name, FDTensor& output) {
   bool is_exist = false;
   for (auto& t : output_tensors_) {
     if (t.name == name) {
-      // FDWARNING << "The output name [" << name << "] is exist." << std::endl;
+      FDINFO << "The output name [" << name << "] is exist." << std::endl;
       is_exist = true;
       t.SetExternalData(output.shape, output.dtype, output.MutableData(),
                         output.device, output.device_id);
@@ -210,8 +210,7 @@ void Runtime::BindOutputTensor(const std::string& name, FDTensor& output) {
     }
   }
   if (!is_exist) {
-    // FDWARNING << "The output name [" << name << "] don't exist." <<
-    // std::endl;
+    FDINFO << "The output name [" << name << "] is prebinded added into output tensor list." << std::endl;
     FDTensor new_tensor(name);
     new_tensor.SetExternalData(output.shape, output.dtype, output.MutableData(),
                                output.device, output.device_id);

From 727fdc58636e2fdbfdb64bd88685307c8aad5f27 Mon Sep 17 00:00:00 2001
From: wwbitejotunn <wang_bojun@outlook.com>
Date: Tue, 14 Feb 2023 06:22:47 +0000
Subject: [PATCH 24/30] reuse setStream c++ interface, add set_raw_stream
 python api

---
 fastdeploy/runtime/option_pybind.cc  |  5 ++++-
 fastdeploy/runtime/runtime_option.cc |  4 ----
 fastdeploy/runtime/runtime_option.h  |  2 --
 python/fastdeploy/runtime.py         | 15 +++++++++++----
 4 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/fastdeploy/runtime/option_pybind.cc b/fastdeploy/runtime/option_pybind.cc
index d5a6187c8..edec738f7 100644
--- a/fastdeploy/runtime/option_pybind.cc
+++ b/fastdeploy/runtime/option_pybind.cc
@@ -48,7 +48,10 @@ void BindOption(pybind11::module& m) {
       .def_readwrite("poros_option", &RuntimeOption::poros_option)
       .def_readwrite("paddle_infer_option", &RuntimeOption::paddle_infer_option)
       .def("set_external_stream", &RuntimeOption::SetExternalStream)
-      .def("set_external_raw_stream", &RuntimeOption::SetExternalRawStream)
+      .def("set_external_raw_stream",
+           [](RuntimeOption& self, size_t external_stream) {
+             self.SetExternalStream(reinterpret_cast<void*>(external_stream));
+           })
       .def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum)
       .def("use_paddle_backend", &RuntimeOption::UsePaddleBackend)
       .def("use_poros_backend", &RuntimeOption::UsePorosBackend)
diff --git a/fastdeploy/runtime/runtime_option.cc b/fastdeploy/runtime/runtime_option.cc
index 8a57c33f7..c09352d58 100644
--- a/fastdeploy/runtime/runtime_option.cc
+++ b/fastdeploy/runtime/runtime_option.cc
@@ -93,10 +93,6 @@ void RuntimeOption::SetExternalStream(void* external_stream) {
   external_stream_ = external_stream;
 }
 
-void RuntimeOption::SetExternalRawStream(size_t external_stream) {
-  external_stream_ = (void*) external_stream;
-}
-
 void RuntimeOption::SetCpuThreadNum(int thread_num) {
   FDASSERT(thread_num > 0, "The thread_num must be greater than 0.");
   cpu_thread_num = thread_num;
diff --git a/fastdeploy/runtime/runtime_option.h b/fastdeploy/runtime/runtime_option.h
index 84cfebc84..eb26cbfaa 100755
--- a/fastdeploy/runtime/runtime_option.h
+++ b/fastdeploy/runtime/runtime_option.h
@@ -105,8 +105,6 @@ struct FASTDEPLOY_DECL RuntimeOption {
 
   void SetExternalStream(void* external_stream);
 
-  void SetExternalRawStream(size_t external_stream);
-
   /*
    * @brief Set number of cpu threads while inference on CPU, by default it will decided by the different backends
    */
diff --git a/python/fastdeploy/runtime.py b/python/fastdeploy/runtime.py
index 20b2d8ad9..4562d2f8d 100644
--- a/python/fastdeploy/runtime.py
+++ b/python/fastdeploy/runtime.py
@@ -591,10 +591,12 @@ class RuntimeOption:
                        replica_num=1,
                        available_memory_proportion=1.0,
                        enable_half_partial=False):
-        logging.warning("`RuntimeOption.set_ipu_config` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.set_ipu_config()` instead.")
-        self._option.paddle_infer_option.set_ipu_config(enable_fp16, replica_num,
-                                           available_memory_proportion,
-                                           enable_half_partial)
+        logging.warning(
+            "`RuntimeOption.set_ipu_config` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.set_ipu_config()` instead."
+        )
+        self._option.paddle_infer_option.set_ipu_config(
+            enable_fp16, replica_num, available_memory_proportion,
+            enable_half_partial)
 
     @property
     def poros_option(self):
@@ -657,6 +659,11 @@ class RuntimeOption:
         """
         return self._option.disable_profiling()
 
+    def set_external_raw_stream(self, cuda_stream):
+        """Set the external raw stream used by fastdeploy runtime.
+        """
+        self._option.set_external_raw_stream(cuda_stream)
+
     def __repr__(self):
         attrs = dir(self._option)
         message = "RuntimeOption(\n"

From abae858f09b411af1ee840117531d1af08c1e237 Mon Sep 17 00:00:00 2001
From: chenjian <chenjian26@baidu.com>
Date: Tue, 14 Feb 2023 14:28:23 +0800
Subject: [PATCH 25/30] [C api] Add install command for c api and a demo in
 paddledection (#1289)

* add c api demo and fix  bugs

* fix doc

* add config.h.in

* add config.h.in

* add config.h.in
---
 CMakeLists.txt                                |  10 +
 FastDeploy.cmake.in                           |   3 +
 c_api/CMakeLists.txt                          |   1 +
 c_api/fastdeploy_capi/config.h                |  22 ++
 c_api/fastdeploy_capi/config.h.in             |  22 ++
 c_api/fastdeploy_capi/enum_variables.h        |  71 ++++++
 c_api/fastdeploy_capi/fd_common.h             |  40 ----
 c_api/fastdeploy_capi/fd_type.cc              |  40 ++++
 c_api/fastdeploy_capi/fd_type.h               |  19 +-
 c_api/fastdeploy_capi/runtime_option.cc       |  17 +-
 c_api/fastdeploy_capi/runtime_option.h        |  17 +-
 c_api/fastdeploy_capi/vision.h                |  26 +++
 .../vision/classification/ppcls/model.cc      |   7 +-
 .../vision/detection/ppdet/model.cc           |   7 +-
 c_api/fastdeploy_capi/vision/result.cc        |   6 +-
 c_api/fastdeploy_capi/vision/result.h         |  16 --
 c_api/fastdeploy_capi/vision/visualize.cc     |   6 +-
 .../paddledetection/c/CMakeLists.txt          |  13 ++
 .../detection/paddledetection/c/README.md     | 200 +++++++++++++++++
 .../detection/paddledetection/c/README_CN.md  | 204 ++++++++++++++++++
 .../paddledetection/c/infer_ppyoloe.c         | 124 +++++++++++
 21 files changed, 782 insertions(+), 89 deletions(-)
 create mode 100755 c_api/fastdeploy_capi/config.h
 create mode 100755 c_api/fastdeploy_capi/config.h.in
 create mode 100644 c_api/fastdeploy_capi/enum_variables.h
 create mode 100644 c_api/fastdeploy_capi/fd_type.cc
 create mode 100644 c_api/fastdeploy_capi/vision.h
 create mode 100644 examples/vision/detection/paddledetection/c/CMakeLists.txt
 create mode 100644 examples/vision/detection/paddledetection/c/README.md
 create mode 100644 examples/vision/detection/paddledetection/c/README_CN.md
 create mode 100644 examples/vision/detection/paddledetection/c/infer_ppyoloe.c

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 51ba10c83..4f85653f2 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -715,6 +715,16 @@ if(WITH_ASCEND)
   )
 endif()
 
+if(WITH_CAPI)
+  install(
+    DIRECTORY ${PROJECT_SOURCE_DIR}/c_api/fastdeploy_capi
+    DESTINATION ${CMAKE_INSTALL_PREFIX}/include
+    FILES_MATCHING
+    PATTERN "*.h"
+    PATTERN "*/types_internal.h" EXCLUDE
+  )
+endif()
+
 include(${PROJECT_SOURCE_DIR}/cmake/config_cpack.cmake)
 
 ############################### Building: FastDeploy Python Wheel #############################
diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in
index d622660f4..c79001c28 100644
--- a/FastDeploy.cmake.in
+++ b/FastDeploy.cmake.in
@@ -33,6 +33,7 @@ set(ORT_DIRECTORY "@ORT_DIRECTORY@")
 set(OPENVINO_DIRECTORY "@OPENVINO_DIRECTORY@")
 set(RKNN2_TARGET_SOC "@RKNN2_TARGET_SOC@")
 set(WITH_KUNLUNXIN @WITH_KUNLUNXIN@)
+set(WITH_CAPI @WITH_CAPI@)
 # Whether to use FastDeploy static lib. The default
 # value for this option is determined by the SDK
 # build-time options.
@@ -357,6 +358,7 @@ message(STATUS "  CMAKE_INSTALL_PREFIX      : ${CMAKE_INSTALL_PREFIX}")
 message(STATUS "  CMAKE_MODULE_PATH         : ${CMAKE_MODULE_PATH}")
 message(STATUS "")
 message(STATUS "  WITH_GPU                  : ${WITH_GPU}")
+message(STATUS "  WITH_CAPI                  : ${WITH_CAPI}")
 message(STATUS "  ENABLE_ORT_BACKEND        : ${ENABLE_ORT_BACKEND}")
 message(STATUS "  ENABLE_RKNPU2_BACKEND     : ${ENABLE_RKNPU2_BACKEND}")
 message(STATUS "  ENABLE_SOPHGO_BACKEND     : ${ENABLE_SOPHGO_BACKEND}")
@@ -365,6 +367,7 @@ message(STATUS "  ENABLE_POROS_BACKEND      : ${ENABLE_POROS_BACKEND}")
 message(STATUS "  ENABLE_OPENVINO_BACKEND   : ${ENABLE_OPENVINO_BACKEND}")
 message(STATUS "  ENABLE_TRT_BACKEND        : ${ENABLE_TRT_BACKEND}")
 message(STATUS "  ENABLE_LITE_BACKEND       : ${ENABLE_LITE_BACKEND}")
+
 if(ENABLE_PADDLE_BACKEND)
   message(STATUS "  Paddle Inference version  : ${PADDLEINFERENCE_VERSION}")
 endif()
diff --git a/c_api/CMakeLists.txt b/c_api/CMakeLists.txt
index 7c7a16626..4f3934165 100644
--- a/c_api/CMakeLists.txt
+++ b/c_api/CMakeLists.txt
@@ -19,6 +19,7 @@ if(NOT WITH_CAPI)
   return()
 endif()
 
+configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/c_api/fastdeploy_capi/config.h.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/c_api/fastdeploy_capi/config.h)
 file(GLOB_RECURSE DEPLOY_CAPI_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/c_api/fastdeploy_capi/*.cc)
 if(NOT ENABLE_VISION)
     file(GLOB_RECURSE DEPLOY_VISION_CAPI_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/c_api/fastdeploy_capi/vision/*.cc)
diff --git a/c_api/fastdeploy_capi/config.h b/c_api/fastdeploy_capi/config.h
new file mode 100755
index 000000000..73de04c44
--- /dev/null
+++ b/c_api/fastdeploy_capi/config.h
@@ -0,0 +1,22 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#ifndef ENABLE_VISION
+#define ENABLE_VISION
+#endif
+
+#ifndef ENABLE_TEXT
+/* #undef ENABLE_TEXT */
+#endif
diff --git a/c_api/fastdeploy_capi/config.h.in b/c_api/fastdeploy_capi/config.h.in
new file mode 100755
index 000000000..4b5b0137c
--- /dev/null
+++ b/c_api/fastdeploy_capi/config.h.in
@@ -0,0 +1,22 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#ifndef ENABLE_VISION
+#cmakedefine ENABLE_VISION
+#endif
+
+#ifndef ENABLE_TEXT
+#cmakedefine ENABLE_TEXT
+#endif
diff --git a/c_api/fastdeploy_capi/enum_variables.h b/c_api/fastdeploy_capi/enum_variables.h
new file mode 100644
index 000000000..05a3d4c9f
--- /dev/null
+++ b/c_api/fastdeploy_capi/enum_variables.h
@@ -0,0 +1,71 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#define FD_ENUM(type)                                                          \
+  typedef int32_t type;                                                        \
+  enum
+
+FD_ENUM(FD_C_ModelFormat){
+    AUTOREC,      ///< Auto recognize the model format by model file name
+    PADDLE,       ///< Model with paddlepaddle format
+    ONNX,         ///< Model with ONNX format
+    RKNN,         ///< Model with RKNN format
+    TORCHSCRIPT,  ///< Model with TorchScript format
+    SOPHGO,       ///< Model with SOPHGO format
+};
+
+FD_ENUM(FD_C_rknpu2_CpuName){
+    RK356X = 0, /* run on RK356X. */
+    RK3588 = 1, /* default,run on RK3588. */
+    UNDEFINED,
+};
+
+FD_ENUM(FD_C_rknpu2_CoreMask){
+    RKNN_NPU_CORE_AUTO = 0,  //< default, run on NPU core randomly.
+    RKNN_NPU_CORE_0 = 1,     //< run on NPU core 0.
+    RKNN_NPU_CORE_1 = 2,     //< run on NPU core 1.
+    RKNN_NPU_CORE_2 = 4,     //< run on NPU core 2.
+    RKNN_NPU_CORE_0_1 = RKNN_NPU_CORE_0 |
+                        RKNN_NPU_CORE_1,  //< run on NPU core 1 and core 2.
+    RKNN_NPU_CORE_0_1_2 = RKNN_NPU_CORE_0_1 |
+                          RKNN_NPU_CORE_2,  //< run on NPU core 1 and core 2.
+    RKNN_NPU_CORE_UNDEFINED,
+};
+
+FD_ENUM(FD_C_LitePowerMode){
+    LITE_POWER_HIGH = 0,       ///< Use Lite Backend with high power mode
+    LITE_POWER_LOW = 1,        ///< Use Lite Backend with low power mode
+    LITE_POWER_FULL = 2,       ///< Use Lite Backend with full power mode
+    LITE_POWER_NO_BIND = 3,    ///< Use Lite Backend with no bind power mode
+    LITE_POWER_RAND_HIGH = 4,  ///< Use Lite Backend with rand high mode
+    LITE_POWER_RAND_LOW = 5    ///< Use Lite Backend with rand low power mode
+};
+
+FD_ENUM(FD_C_ResultType){
+    UNKNOWN_RESULT,
+    CLASSIFY,
+    DETECTION,
+    SEGMENTATION,
+    OCR,
+    MOT,
+    FACE_DETECTION,
+    FACE_ALIGNMENT,
+    FACE_RECOGNITION,
+    MATTING,
+    MASK,
+    KEYPOINT_DETECTION,
+    HEADPOSE,
+};
diff --git a/c_api/fastdeploy_capi/fd_common.h b/c_api/fastdeploy_capi/fd_common.h
index 6374cf9b5..18bfb3c91 100644
--- a/c_api/fastdeploy_capi/fd_common.h
+++ b/c_api/fastdeploy_capi/fd_common.h
@@ -58,43 +58,3 @@
 typedef int8_t FD_C_Bool;
 #define TRUE 1
 #define FALSE 0
-
-#define FD_ENUM(type)                                                          \
-  typedef int32_t type;                                                        \
-  enum
-
-FD_ENUM(FD_C_ModelFormat){
-    AUTOREC,      ///< Auto recognize the model format by model file name
-    PADDLE,       ///< Model with paddlepaddle format
-    ONNX,         ///< Model with ONNX format
-    RKNN,         ///< Model with RKNN format
-    TORCHSCRIPT,  ///< Model with TorchScript format
-    SOPHGO,       ///< Model with SOPHGO format
-};
-
-FD_ENUM(FD_C_rknpu2_CpuName){
-    RK356X = 0, /* run on RK356X. */
-    RK3588 = 1, /* default,run on RK3588. */
-    UNDEFINED,
-};
-
-FD_ENUM(FD_C_rknpu2_CoreMask){
-    RKNN_NPU_CORE_AUTO = 0,  //< default, run on NPU core randomly.
-    RKNN_NPU_CORE_0 = 1,     //< run on NPU core 0.
-    RKNN_NPU_CORE_1 = 2,     //< run on NPU core 1.
-    RKNN_NPU_CORE_2 = 4,     //< run on NPU core 2.
-    RKNN_NPU_CORE_0_1 = RKNN_NPU_CORE_0 |
-                        RKNN_NPU_CORE_1,  //< run on NPU core 1 and core 2.
-    RKNN_NPU_CORE_0_1_2 = RKNN_NPU_CORE_0_1 |
-                          RKNN_NPU_CORE_2,  //< run on NPU core 1 and core 2.
-    RKNN_NPU_CORE_UNDEFINED,
-};
-
-FD_ENUM(FD_C_LitePowerMode){
-    LITE_POWER_HIGH = 0,       ///< Use Lite Backend with high power mode
-    LITE_POWER_LOW = 1,        ///< Use Lite Backend with low power mode
-    LITE_POWER_FULL = 2,       ///< Use Lite Backend with full power mode
-    LITE_POWER_NO_BIND = 3,    ///< Use Lite Backend with no bind power mode
-    LITE_POWER_RAND_HIGH = 4,  ///< Use Lite Backend with rand high mode
-    LITE_POWER_RAND_LOW = 5    ///< Use Lite Backend with rand low power mode
-};
diff --git a/c_api/fastdeploy_capi/fd_type.cc b/c_api/fastdeploy_capi/fd_type.cc
new file mode 100644
index 000000000..34390888a
--- /dev/null
+++ b/c_api/fastdeploy_capi/fd_type.cc
@@ -0,0 +1,40 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy_capi/fd_type.h"
+
+#include <opencv2/imgcodecs.hpp>
+
+#include "fastdeploy_capi/fd_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+FD_C_Mat FD_C_Imread(const char* imgpath) {
+  cv::Mat image = cv::imread(imgpath);
+  return new cv::Mat(image);
+}
+
+FD_C_Bool FD_C_Imwrite(const char* savepath, FD_C_Mat img) {
+  cv::Mat cv_img = *(reinterpret_cast<cv::Mat*>(img));
+  bool result = cv::imwrite(savepath, cv_img);
+  return result;
+}
+
+void FD_C_DestroyMat(FD_C_Mat mat) { delete reinterpret_cast<cv::Mat*>(mat); }
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/c_api/fastdeploy_capi/fd_type.h b/c_api/fastdeploy_capi/fd_type.h
index 75daf9db6..4a3d8cadd 100644
--- a/c_api/fastdeploy_capi/fd_type.h
+++ b/c_api/fastdeploy_capi/fd_type.h
@@ -17,7 +17,8 @@
 #include <stdint.h>
 #include <stdio.h>
 
-#include "fastdeploy_capi/fd_common.h"  // NOLINT
+#include "fastdeploy_capi/enum_variables.h"
+#include "fastdeploy_capi/fd_common.h"
 
 typedef struct FD_C_OneDimArrayUint8 {
   size_t size;
@@ -65,3 +66,19 @@ typedef struct FD_C_TwoDimArrayFloat {
 } FD_C_TwoDimArrayFloat;  // std::vector<std::vector<float>>
 
 typedef void* FD_C_Mat;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_Mat
+FD_C_Imread(const char* imgpath);
+
+FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_Imwrite(const char* savepath,
+                                                     __fd_keep FD_C_Mat);
+
+FASTDEPLOY_CAPI_EXPORT extern void FD_C_DestroyMat(__fd_take FD_C_Mat mat);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/c_api/fastdeploy_capi/runtime_option.cc b/c_api/fastdeploy_capi/runtime_option.cc
index 3c9b4022d..4683d468d 100644
--- a/c_api/fastdeploy_capi/runtime_option.cc
+++ b/c_api/fastdeploy_capi/runtime_option.cc
@@ -17,7 +17,9 @@
 #include "fastdeploy/utils/utils.h"
 #include "fastdeploy_capi/types_internal.h"
 
+#ifdef __cplusplus
 extern "C" {
+#endif
 
 FD_C_RuntimeOptionWrapper* FD_C_CreateRuntimeOptionWrapper() {
   FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper =
@@ -28,7 +30,7 @@ FD_C_RuntimeOptionWrapper* FD_C_CreateRuntimeOptionWrapper() {
   return fd_c_runtime_option_wrapper;
 }
 
-void FD_C_DestroyRuntimeOption(
+void FD_C_DestroyRuntimeOptionWrapper(
     __fd_take FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
   delete fd_c_runtime_option_wrapper;
 }
@@ -404,15 +406,6 @@ void FD_C_RuntimeOptionWrapperUseIpu(
                          batches_per_step);
 }
 
-void FD_C_RuntimeOptionWrapperSetIpuConfig(
-    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
-    FD_C_Bool enable_fp16, int replica_num, float available_memory_proportion,
-    FD_C_Bool enable_half_partial) {
-  auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
-                                                   fd_c_runtime_option_wrapper);
-  runtime_option->SetIpuConfig(enable_fp16, replica_num,
-                               available_memory_proportion,
-                               enable_half_partial);
+#ifdef __cplusplus
 }
-
-}  // extern "C"
+#endif
diff --git a/c_api/fastdeploy_capi/runtime_option.h b/c_api/fastdeploy_capi/runtime_option.h
index cfc087473..40d220bbe 100644
--- a/c_api/fastdeploy_capi/runtime_option.h
+++ b/c_api/fastdeploy_capi/runtime_option.h
@@ -14,9 +14,7 @@
 
 #pragma once
 
-#include <memory>
-
-#include "fastdeploy_capi/fd_common.h"
+#include "fastdeploy_capi/fd_type.h"
 
 typedef struct FD_C_RuntimeOptionWrapper FD_C_RuntimeOptionWrapper;
 
@@ -499,19 +497,6 @@ FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseIpu(
     int device_num, int micro_batch_size, FD_C_Bool enable_pipelining,
     int batches_per_step);
 
-/** \brief Set IPU config.
- *
- * \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
- * \param[in] enable_fp16 enable fp16.
- * \param[in] replica_num the number of graph replication.
- * \param[in] available_memory_proportion the available memory proportion for matmul/conv.
- * \param[in] enable_half_partial enable fp16 partial for matmul, only work with fp16.
- */
-FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperSetIpuConfig(
-    __fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
-    FD_C_Bool enable_fp16, int replica_num, float available_memory_proportion,
-    FD_C_Bool enable_half_partial);
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
diff --git a/c_api/fastdeploy_capi/vision.h b/c_api/fastdeploy_capi/vision.h
new file mode 100644
index 000000000..a27a6c8dd
--- /dev/null
+++ b/c_api/fastdeploy_capi/vision.h
@@ -0,0 +1,26 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include "fastdeploy_capi/config.h"
+
+#ifdef ENABLE_VISION
+#include "fastdeploy_capi/vision/classification/ppcls/model.h"
+#include "fastdeploy_capi/vision/detection/ppdet/model.h"
+#include "fastdeploy_capi/vision/result.h"
+#include "fastdeploy_capi/vision/visualize.h"
+#endif
+
+#include "fastdeploy_capi/fd_type.h"
+#include "fastdeploy_capi/runtime_option.h"
diff --git a/c_api/fastdeploy_capi/vision/classification/ppcls/model.cc b/c_api/fastdeploy_capi/vision/classification/ppcls/model.cc
index 3ed62f26a..ad0028612 100644
--- a/c_api/fastdeploy_capi/vision/classification/ppcls/model.cc
+++ b/c_api/fastdeploy_capi/vision/classification/ppcls/model.cc
@@ -16,7 +16,9 @@
 
 #include "fastdeploy_capi/types_internal.h"
 
+#ifdef __cplusplus
 extern "C" {
+#endif
 
 FD_C_PaddleClasModelWrapper* FD_C_CreatePaddleClasModelWrapper(
     const char* model_file, const char* params_file, const char* config_file,
@@ -50,4 +52,7 @@ FD_C_Bool FD_C_PaddleClasModelWrapperPredict(
       ClassifyResultWrapper, fd_c_classify_result_wrapper);
   return paddleclas_model->Predict(im, classify_result.get());
 }
-}
\ No newline at end of file
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/c_api/fastdeploy_capi/vision/detection/ppdet/model.cc b/c_api/fastdeploy_capi/vision/detection/ppdet/model.cc
index 17a87ec8b..e7055185f 100644
--- a/c_api/fastdeploy_capi/vision/detection/ppdet/model.cc
+++ b/c_api/fastdeploy_capi/vision/detection/ppdet/model.cc
@@ -17,7 +17,9 @@
 #include "fastdeploy_capi/types_internal.h"
 #include "fastdeploy_capi/vision/visualize.h"
 
+#ifdef __cplusplus
 extern "C" {
+#endif
 
 FD_C_PPYOLOEWrapper* FD_C_CreatesPPYOLOEWrapper(
     const char* model_file, const char* params_file, const char* config_file,
@@ -50,4 +52,7 @@ FD_C_Bool FD_C_PPYOLOEWrapperPredict(
       DetectionResultWrapper, fd_c_detection_result_wrapper);
   return ppyoloe_model->Predict(im, detection_result.get());
 }
-}
\ No newline at end of file
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/c_api/fastdeploy_capi/vision/result.cc b/c_api/fastdeploy_capi/vision/result.cc
index abf52ba69..207ef3950 100644
--- a/c_api/fastdeploy_capi/vision/result.cc
+++ b/c_api/fastdeploy_capi/vision/result.cc
@@ -17,7 +17,9 @@
 #include "fastdeploy/utils/utils.h"
 #include "fastdeploy_capi/types_internal.h"
 
+#ifdef __cplusplus
 extern "C" {
+#endif
 
 // Classification Results
 
@@ -235,4 +237,6 @@ FD_C_DetectionResultWrapper* FD_C_CreateDetectionResultWrapperFromData(
 
   return fd_c_detection_result_wrapper;
 }
-}
\ No newline at end of file
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/c_api/fastdeploy_capi/vision/result.h b/c_api/fastdeploy_capi/vision/result.h
index 9d32052d9..247ec50f6 100644
--- a/c_api/fastdeploy_capi/vision/result.h
+++ b/c_api/fastdeploy_capi/vision/result.h
@@ -24,22 +24,6 @@ typedef struct FD_C_DetectionResultWrapper FD_C_DetectionResultWrapper;
 extern "C" {
 #endif
 
-FD_ENUM(FD_C_ResultType){
-    UNKNOWN_RESULT,
-    CLASSIFY,
-    DETECTION,
-    SEGMENTATION,
-    OCR,
-    MOT,
-    FACE_DETECTION,
-    FACE_ALIGNMENT,
-    FACE_RECOGNITION,
-    MATTING,
-    MASK,
-    KEYPOINT_DETECTION,
-    HEADPOSE,
-};
-
 typedef struct FD_C_ClassifyResult {
   FD_C_OneDimArrayInt32 label_ids;
   FD_C_OneDimArrayFloat scores;
diff --git a/c_api/fastdeploy_capi/vision/visualize.cc b/c_api/fastdeploy_capi/vision/visualize.cc
index 9132fe606..6045270bd 100644
--- a/c_api/fastdeploy_capi/vision/visualize.cc
+++ b/c_api/fastdeploy_capi/vision/visualize.cc
@@ -17,7 +17,9 @@
 #include "fastdeploy/vision/visualize/visualize.h"
 #include "fastdeploy_capi/types_internal.h"
 
+#ifdef __cplusplus
 extern "C" {
+#endif
 
 FD_C_Mat FD_C_VisDetection(FD_C_Mat im,
                            FD_C_DetectionResult* fd_c_detection_result,
@@ -32,4 +34,6 @@ FD_C_Mat FD_C_VisDetection(FD_C_Mat im,
       line_size, font_size);
   return new cv::Mat(result);
 }
-}
\ No newline at end of file
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/examples/vision/detection/paddledetection/c/CMakeLists.txt b/examples/vision/detection/paddledetection/c/CMakeLists.txt
new file mode 100644
index 000000000..12a8d9a2c
--- /dev/null
+++ b/examples/vision/detection/paddledetection/c/CMakeLists.txt
@@ -0,0 +1,13 @@
+PROJECT(infer_demo C)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+# 指定下载解压后的fastdeploy库路径
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+# 添加FastDeploy依赖头文件
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_ppyoloe_demo ${PROJECT_SOURCE_DIR}/infer_ppyoloe.c)
+target_link_libraries(infer_ppyoloe_demo ${FASTDEPLOY_LIBS})
diff --git a/examples/vision/detection/paddledetection/c/README.md b/examples/vision/detection/paddledetection/c/README.md
new file mode 100644
index 000000000..79a33e51a
--- /dev/null
+++ b/examples/vision/detection/paddledetection/c/README.md
@@ -0,0 +1,200 @@
+English | [简体中文](README_CN.md)
+# PaddleDetection C Deployment Example
+
+This directory provides examples that `infer_xxx.c` fast finishes the deployment of PaddleDetection models, including PPYOLOE on CPU/GPU.
+
+Before deployment, two steps require confirmation
+
+- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)  
+- 2.  Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
+
+Taking inference on Linux as an example, the compilation test can be completed by executing the following command in this directory. FastDeploy version 1.0.4 or above (x.x.x>=1.0.4) is required to support this model.
+
+```bash
+ppyoloe is taken as an example for inference deployment
+
+mkdir build
+cd build
+# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-x.x.x.tgz
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j
+
+# Download the PPYOLOE model file and test images
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+tar xvf ppyoloe_crn_l_300e_coco.tgz
+
+
+# CPU inference
+./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 0
+# GPU inference
+./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 1
+```
+
+The above command works for Linux or MacOS. For SDK use-pattern in Windows, refer to:
+- [How to use FastDeploy C++ SDK in Windows](../../../../../docs/en/faq/use_sdk_on_windows.md)
+
+## PaddleDetection C Interface
+
+### RuntimeOption
+
+```c
+FD_C_RuntimeOptionWrapper* FD_C_CreateRuntimeOptionWrapper()
+```
+
+> Create a RuntimeOption object, and return a pointer to manipulate it.
+>
+> **Return**
+> * **fd_c_runtime_option_wrapper**(FD_C_RuntimeOptionWrapper*): Pointer to manipulate RuntimeOption object.
+
+
+```c
+void FD_C_RuntimeOptionWrapperUseCpu(
+     FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper)
+```
+
+> Enable Cpu inference.
+>
+> **Params**
+>
+> * **fd_c_runtime_option_wrapper**(FD_C_RuntimeOptionWrapper*): Pointer to manipulate RuntimeOption object.
+
+```c
+void FD_C_RuntimeOptionWrapperUseGpu(
+    FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    int gpu_id)
+```
+> Enable Gpu inference.
+>
+> **Params**
+>
+> * **fd_c_runtime_option_wrapper**(FD_C_RuntimeOptionWrapper*): Pointer to manipulate RuntimeOption object.
+> * **gpu_id**(int): gpu id
+
+
+### Model
+
+```c
+
+FD_C_PPYOLOEWrapper* FD_C_CreatesPPYOLOEWrapper(
+    const char* model_file, const char* params_file, const char* config_file,
+    FD_C_RuntimeOptionWrapper* runtime_option,
+    const FD_C_ModelFormat model_format)
+
+```
+
+> Create a PPYOLOE model object, and return a pointer to manipulate it.
+>
+> **Params**
+>
+> * **model_file**(const char*): Model file path
+> * **params_file**(const char*): Parameter file path
+> * **config_file**(const char*): Configuration file path, which is the deployment yaml file exported by PaddleDetection
+> * **runtime_option**(FD_C_RuntimeOptionWrapper*): Backend inference configuration. None by default, which is the default configuration
+> * **model_format**(FD_C_ModelFormat): Model format. Paddle format by default
+>
+> **Return**
+> * **fd_c_ppyoloe_wrapper**(FD_C_PPYOLOEWrapper*): Pointer to manipulate PPYOLOE object.
+
+
+#### Read and write image
+
+```c
+FD_C_Mat FD_C_Imread(const char* imgpath)
+```
+
+> Read an image, and return a pointer to cv::Mat.
+>
+> **Params**
+>
+> * **imgpath**(const char*): image path
+>
+> **Return**
+>
+> * **imgmat**(FD_C_Mat): pointer to cv::Mat object which holds the image.
+
+
+```c
+FD_C_Bool FD_C_Imwrite(const char* savepath,  FD_C_Mat img);
+```
+
+> Write image to a file.
+>
+> **Params**
+>
+> * **savepath**(const char*): save path
+> * **img**(FD_C_Mat): pointer to cv::Mat object
+>
+> **Return**
+>
+> * **result**(FD_C_Bool): bool to indicate success or failure
+
+
+#### Prediction
+
+```c
+FD_C_Bool FD_C_PPYOLOEWrapperPredict(
+    __fd_take FD_C_PPYOLOEWrapper* fd_c_ppyoloe_wrapper, FD_C_Mat img,
+    FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper)
+```
+>
+> Predict an image, and generate detection result.
+>
+> **Params**
+> * **fd_c_ppyoloe_wrapper**(FD_C_PPYOLOEWrapper*): pointer to manipulate PPYOLOE object
+> * **img**（FD_C_Mat）: pointer to cv::Mat object, which can be obained by FD_C_Imread interface
+> * **result**（FD_C_DetectionResultWrapper*): Detection result, including detection box and confidence of each box. Refer to [Vision Model Prediction Result](../../../../../docs/api/vision_results/) for DetectionResult
+
+
+#### Result
+
+```c
+FD_C_DetectionResultWrapper* FD_C_CreateDetectionResultWrapper();
+```
+>
+> Create a DetectionResult object to keep the detection result，return a pointer to manipulate it.
+>
+> **Return**
+> * **fd_c_detection_result_wrapper**(FD_C_DetectionResultWrapper*): pointer to manipulate DetectionResult object
+
+
+
+```c
+FD_C_DetectionResult* FD_C_DetectionResultWrapperGetData(
+     FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper)
+```
+>
+> Get the C DetectionResult structure from FD_C_DetectionResultWrapper, which can access the fileds directly.
+>
+> **Params**
+> * **fd_c_detection_result_wrapper**(FD_C_DetectionResultWrapper*): pointer to manipulate DetectionResult object
+>
+> **Return**
+> * **fd_c_detection_result**(FD_C_DetectionResult*): pointer to C DetectionResult structure
+
+
+
+```c
+FD_C_Mat FD_C_VisDetection(FD_C_Mat im, FD_C_DetectionResult* fd_detection_result,
+                  float score_threshold, int line_size, float font_size);
+```
+>
+> Visualize detection results and return visualization image.
+>
+> **Params**
+> * **im**(FD_C_Mat): pointer to input image
+> * **fd_detection_result**(FD_C_DetectionResult*): pointer to C DetectionResult structure
+> * **score_threshold**(float): score threshold
+> * **line_size**(int): line size
+> * **font_size**(float): font size
+>
+> **Return**
+> * **vis_im**(FD_C_Mat): pointer to visualization image.
+
+
+- [Model Description](../../)
+- [Python Deployment](../python)
+- [Vision Model prediction results](../../../../../docs/api/vision_results/)
+- [How to switch the model inference backend engine](../../../../../docs/en/faq/how_to_change_backend.md)
diff --git a/examples/vision/detection/paddledetection/c/README_CN.md b/examples/vision/detection/paddledetection/c/README_CN.md
new file mode 100644
index 000000000..22cbd311c
--- /dev/null
+++ b/examples/vision/detection/paddledetection/c/README_CN.md
@@ -0,0 +1,204 @@
+[English](README.md) | 简体中文
+# PaddleDetection C 部署示例
+
+本目录下提供`infer_xxx.c`来调用C API快速完成PaddleDetection模型PPYOLOE在CPU/GPU上部署的示例。
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. 根据开发环境，下载预编译部署库和samples代码，参考[FastDeploy预编译库](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+以Linux上推理为例，在本目录执行如下命令即可完成编译测试，支持此模型需保证FastDeploy版本1.0.4以上(x.x.x>=1.0.4)
+
+```bash
+以ppyoloe为例进行推理部署
+
+mkdir build
+cd build
+# 下载FastDeploy预编译库，用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-x.x.x.tgz
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j
+
+# 下载PPYOLOE模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+tar xvf ppyoloe_crn_l_300e_coco.tgz
+
+
+# CPU推理
+./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 0
+# GPU推理
+./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 1
+```
+
+以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:  
+- [如何在Windows中使用FastDeploy C++ SDK](../../../../../docs/cn/faq/use_sdk_on_windows.md)
+
+如果用户使用华为昇腾NPU部署, 请参考以下方式在部署前初始化部署环境:
+- [如何使用华为昇腾NPU部署](../../../../../docs/cn/faq/use_sdk_on_ascend.md)
+
+## PaddleDetection C API接口
+
+### 配置
+
+```c
+FD_C_RuntimeOptionWrapper* FD_C_CreateRuntimeOptionWrapper()
+```
+
+> 创建一个RuntimeOption的配置对象，并且返回操作它的指针。
+>
+> **返回**
+>
+> * **fd_c_runtime_option_wrapper**(FD_C_RuntimeOptionWrapper*): 指向RuntimeOption对象的指针
+
+
+```c
+void FD_C_RuntimeOptionWrapperUseCpu(
+     FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper)
+```
+
+> 开启CPU推理
+>
+> **参数**
+>
+> * **fd_c_runtime_option_wrapper**(FD_C_RuntimeOptionWrapper*): 指向RuntimeOption对象的指针
+
+```c
+void FD_C_RuntimeOptionWrapperUseGpu(
+    FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
+    int gpu_id)
+```
+> 开启GPU推理
+>
+> **参数**
+>
+> * **fd_c_runtime_option_wrapper**(FD_C_RuntimeOptionWrapper*): 指向RuntimeOption对象的指针
+> * **gpu_id**(int): 显卡号
+
+
+### 模型
+
+```c
+
+FD_C_PPYOLOEWrapper* FD_C_CreatesPPYOLOEWrapper(
+    const char* model_file, const char* params_file, const char* config_file,
+    FD_C_RuntimeOptionWrapper* runtime_option,
+    const FD_C_ModelFormat model_format)
+
+```
+
+> 创建一个PPYOLOE的模型，并且返回操作它的指针。
+>
+> **参数**
+>
+> * **model_file**(const char*): 模型文件路径
+> * **params_file**(const char*): 参数文件路径
+> * **config_file**(const char*): 配置文件路径，即PaddleDetection导出的部署yaml文件
+> * **runtime_option**(FD_C_RuntimeOptionWrapper*): 指向RuntimeOption的指针，表示后端推理配置
+> * **model_format**(FD_C_ModelFormat): 模型格式
+>
+> **返回**
+> * **fd_c_ppyoloe_wrapper**(FD_C_PPYOLOEWrapper*): 指向PPYOLOE模型对象的指针
+
+
+#### 读写图像
+
+```c
+FD_C_Mat FD_C_Imread(const char* imgpath)
+```
+
+> 读取一个图像，并且返回cv::Mat的指针。
+>
+> **参数**
+>
+> * **imgpath**(const char*): 图像文件路径
+>
+> **返回**
+>
+> * **imgmat**(FD_C_Mat): 指向图像数据cv::Mat的指针。
+
+
+```c
+FD_C_Bool FD_C_Imwrite(const char* savepath,  FD_C_Mat img);
+```
+
+> 将图像写入文件中。
+>
+> **参数**
+>
+> * **savepath**(const char*): 保存图像的路径
+> * **img**(FD_C_Mat): 指向图像数据的指针
+>
+> **返回**
+>
+> * **result**(FD_C_Bool): 表示操作是否成功
+
+
+#### Predict函数
+
+```c
+FD_C_Bool FD_C_PPYOLOEWrapperPredict(
+    __fd_take FD_C_PPYOLOEWrapper* fd_c_ppyoloe_wrapper, FD_C_Mat img,
+    FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper)
+```
+>
+> 模型预测接口，输入图像直接并生成检测结果。
+>
+> **参数**
+> * **fd_c_ppyoloe_wrapper**(FD_C_PPYOLOEWrapper*): 指向PPYOLOE模型的指针
+> * **img**（FD_C_Mat）: 输入图像的指针，指向cv::Mat对象，可以调用FD_C_Imread读取图像获取
+> * **result**（FD_C_DetectionResultWrapper*): 指向检测结果的指针，检测结果包括检测框，各个框的置信度, DetectionResult说明参考[视觉模型预测结果](../../../../../docs/api/vision_results/)
+
+
+#### Predict结果
+
+```c
+FD_C_DetectionResultWrapper* FD_C_CreateDetectionResultWrapper();
+```
+>
+> 创建一个DetectionResult对象，用来保存推理的结果，并返回所创建的DetectionResult对象的指针。
+>
+> **返回**
+> * **fd_c_detection_result_wrapper**(FD_C_DetectionResultWrapper*): 指向DetectionResult对象的指针
+
+
+
+```c
+FD_C_DetectionResult* FD_C_DetectionResultWrapperGetData(
+     FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper)
+```
+>
+> 从DetectionResult对象中提取纯C结构的DetectionResult结果，并返回结构指针，通过该指针可直接返回结构中的字段。
+>
+> **参数**
+> * **fd_c_detection_result_wrapper**(FD_C_DetectionResultWrapper*): 指向DetectionResult对象的指针
+>
+> **返回**
+> * **fd_c_detection_result**(FD_C_DetectionResult*): 指向纯C结构的DetectionResult的指针
+
+
+
+```c
+FD_C_Mat FD_C_VisDetection(FD_C_Mat im, FD_C_DetectionResult* fd_detection_result,
+                  float score_threshold, int line_size, float font_size);
+```
+>
+> 对检测结果进行可视化，返回可视化的图像。
+>
+> **参数**
+> * **im**(FD_C_Mat): 指向输入图像的指针
+> * **fd_detection_result**(FD_C_DetectionResult*): 指向纯C结构DetectionResult的指针
+> * **score_threshold**(float): 检测阈值
+> * **line_size**(int): 检测框线大小
+> * **font_size**(float): 检测框字体大小
+>
+> **返回**
+> * **vis_im**(FD_C_Mat): 指向可视化图像的指针
+
+
+- [模型介绍](../../)
+- [Python部署](../python)
+- [视觉模型预测结果](../../../../../docs/api/vision_results/)
+- [如何切换模型推理后端引擎](../../../../../docs/cn/faq/how_to_change_backend.md)
diff --git a/examples/vision/detection/paddledetection/c/infer_ppyoloe.c b/examples/vision/detection/paddledetection/c/infer_ppyoloe.c
new file mode 100644
index 000000000..6b9cc4369
--- /dev/null
+++ b/examples/vision/detection/paddledetection/c/infer_ppyoloe.c
@@ -0,0 +1,124 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "fastdeploy_capi/vision.h"
+
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void CpuInfer(const char* model_dir, const char* image_file) {
+  char model_file[100];
+  char params_file[100];
+  char config_file[100];
+  int max_size = 99;
+  snprintf(model_file, max_size, "%s%c%s", model_dir, sep, "model.pdmodel");
+  snprintf(params_file, max_size, "%s%c%s", model_dir, sep, "model.pdiparams");
+  snprintf(config_file, max_size, "%s%c%s", model_dir, sep, "infer_cfg.yml");
+
+  FD_C_RuntimeOptionWrapper* option = FD_C_CreateRuntimeOptionWrapper();
+  FD_C_RuntimeOptionWrapperUseCpu(option);
+
+  FD_C_PPYOLOEWrapper* model = FD_C_CreatesPPYOLOEWrapper(
+      model_file, params_file, config_file, option, PADDLE);
+
+  FD_C_Mat im = FD_C_Imread(image_file);
+
+  FD_C_DetectionResultWrapper* result_wrapper =
+      FD_C_CreateDetectionResultWrapper();
+
+  if (!FD_C_PPYOLOEWrapperPredict(model, im, result_wrapper)) {
+    printf("Failed to predict.\n");
+    return;
+  }
+
+  FD_C_DetectionResult* result =
+      FD_C_DetectionResultWrapperGetData(result_wrapper);
+  FD_C_Mat vis_im = FD_C_VisDetection(im, result, 0.5, 1, 0.5);
+
+  FD_C_Imwrite("vis_result.jpg", vis_im);
+  printf("Visualized result saved in ./vis_result.jpg\n");
+
+  FD_C_DestroyRuntimeOptionWrapper(option);
+  FD_C_DestroyPPYOLOEWrapper(model);
+  FD_C_DestroyDetectionResultWrapper(result_wrapper);
+  FD_C_DestroyDetectionResult(result);
+  FD_C_DestroyMat(im);
+  FD_C_DestroyMat(vis_im);
+}
+
+void GpuInfer(const char* model_dir, const char* image_file) {
+  char model_file[100];
+  char params_file[100];
+  char config_file[100];
+  int max_size = 99;
+  snprintf(model_file, max_size, "%s%c%s", model_dir, sep, "model.pdmodel");
+  snprintf(params_file, max_size, "%s%c%s", model_dir, sep, "model.pdiparams");
+  snprintf(config_file, max_size, "%s%c%s", model_dir, sep, "infer_cfg.yml");
+
+  FD_C_RuntimeOptionWrapper* option = FD_C_CreateRuntimeOptionWrapper();
+  FD_C_RuntimeOptionWrapperUseGpu(option, 0);
+
+  FD_C_PPYOLOEWrapper* model = FD_C_CreatesPPYOLOEWrapper(
+      model_file, params_file, config_file, option, PADDLE);
+
+  FD_C_Mat im = FD_C_Imread(image_file);
+
+  FD_C_DetectionResultWrapper* result_wrapper =
+      FD_C_CreateDetectionResultWrapper();
+
+  if (!FD_C_PPYOLOEWrapperPredict(model, im, result_wrapper)) {
+    printf("Failed to predict.\n");
+    return;
+  }
+
+  FD_C_DetectionResult* result =
+      FD_C_DetectionResultWrapperGetData(result_wrapper);
+  FD_C_Mat vis_im = FD_C_VisDetection(im, result, 0.5, 1, 0.5);
+
+  FD_C_Imwrite("vis_result.jpg", vis_im);
+  printf("Visualized result saved in ./vis_result.jpg\n");
+
+  FD_C_DestroyRuntimeOptionWrapper(option);
+  FD_C_DestroyPPYOLOEWrapper(model);
+  FD_C_DestroyDetectionResultWrapper(result_wrapper);
+  FD_C_DestroyDetectionResult(result);
+  FD_C_DestroyMat(im);
+  FD_C_DestroyMat(vis_im);
+}
+
+int main(int argc, char* argv[]) {
+  if (argc < 4) {
+    printf(
+        "Usage: infer_demo path/to/model_dir path/to/image run_option, "
+        "e.g ./infer_model ./ppyoloe_model_dir ./test.jpeg 0"
+        "\n");
+    printf(
+        "The data type of run_option is int, 0: run with cpu; 1: run with gpu"
+        "\n");
+    return -1;
+  }
+
+  if (atoi(argv[3]) == 0) {
+    CpuInfer(argv[1], argv[2]);
+  } else if (atoi(argv[3]) == 1) {
+    GpuInfer(argv[1], argv[2]);
+  }
+  return 0;
+}

From 2dfda1db85fdf9adf131f7fe82af4ac9a76fd9e7 Mon Sep 17 00:00:00 2001
From: WJJ1995 <wjjisloser@163.com>
Date: Tue, 14 Feb 2023 15:00:05 +0800
Subject: [PATCH 26/30] [Benchmark] Remove pmap and use mem api (#1309)

* add GPL lisence

* add GPL-3.0 lisence

* add GPL-3.0 lisence

* add GPL-3.0 lisence

* support yolov8

* add pybind for yolov8

* add yolov8 readme

* add cpp benchmark

* add cpu and gpu mem

* public part split

* add runtime mode

* fixed bugs

* add cpu_thread_nums

* deal with comments

* deal with comments

* deal with comments

* rm useless code

* add FASTDEPLOY_DECL

* add FASTDEPLOY_DECL

* fixed for windows

* mv rss to pss

* mv rss to pss

* Update utils.cc

* use thread to collect mem

* Add ResourceUsageMonitor

* rm useless code

* fixed bug

* fixed typo

* update ResourceUsageMonitor

* fixed bug

* fixed bug

* add note for ResourceUsageMonitor

* deal with comments

* add macros

* deal with comments

* deal with comments

* deal with comments

* re-lint

* rm pmap and use mem api

* rm pmap and use mem api

* add mem api

* Add PrintBenchmarkInfo func

* Add PrintBenchmarkInfo func

* Add PrintBenchmarkInfo func

* deal with comments

---------

Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
---
 benchmark/cpp/benchmark_ppyolov8.cc |  9 ++---
 benchmark/cpp/benchmark_yolov5.cc   |  3 +-
 benchmark/cpp/flags.h               | 38 ++++++++++++++++++++
 benchmark/cpp/macros.h              |  5 ++-
 fastdeploy/benchmark/utils.cc       | 56 +++++++++++------------------
 fastdeploy/benchmark/utils.h        | 14 +++++---
 6 files changed, 74 insertions(+), 51 deletions(-)
 mode change 100644 => 100755 benchmark/cpp/benchmark_ppyolov8.cc
 mode change 100644 => 100755 benchmark/cpp/benchmark_yolov5.cc

diff --git a/benchmark/cpp/benchmark_ppyolov8.cc b/benchmark/cpp/benchmark_ppyolov8.cc
old mode 100644
new mode 100755
index 545474635..5541696d5
--- a/benchmark/cpp/benchmark_ppyolov8.cc
+++ b/benchmark/cpp/benchmark_ppyolov8.cc
@@ -12,16 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "flags.h"
 #include "macros.h"
+#include "flags.h"
 #include "option.h"
 
-#ifdef WIN32
-const char sep = '\\';
-#else
-const char sep = '/';
-#endif
-
 int main(int argc, char* argv[]) {
   google::ParseCommandLineFlags(&argc, &argv, true);
   auto im = cv::imread(FLAGS_image);
@@ -31,6 +25,7 @@ int main(int argc, char* argv[]) {
     PrintUsage();
     return false;
   }
+  PrintBenchmarkInfo();
   auto model_file = FLAGS_model + sep + "model.pdmodel";
   auto params_file = FLAGS_model + sep + "model.pdiparams";
   auto config_file = FLAGS_model + sep + "infer_cfg.yml";
diff --git a/benchmark/cpp/benchmark_yolov5.cc b/benchmark/cpp/benchmark_yolov5.cc
old mode 100644
new mode 100755
index 5b2cab855..6ab3c5990
--- a/benchmark/cpp/benchmark_yolov5.cc
+++ b/benchmark/cpp/benchmark_yolov5.cc
@@ -12,8 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "flags.h"
 #include "macros.h"
+#include "flags.h"
 #include "option.h"
 
 int main(int argc, char* argv[]) {
@@ -25,6 +25,7 @@ int main(int argc, char* argv[]) {
     PrintUsage();
     return false;
   }
+  PrintBenchmarkInfo();
   auto model_yolov5 =
       fastdeploy::vision::detection::YOLOv5(FLAGS_model, "", option);
   fastdeploy::vision::DetectionResult res;
diff --git a/benchmark/cpp/flags.h b/benchmark/cpp/flags.h
index 7f8c3a29f..fd20e685c 100755
--- a/benchmark/cpp/flags.h
+++ b/benchmark/cpp/flags.h
@@ -16,6 +16,12 @@
 
 #include "gflags/gflags.h"
 
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
 DEFINE_string(model, "", "Directory of the inference model.");
 DEFINE_string(image, "", "Path of the image file.");
 DEFINE_string(device, "cpu",
@@ -48,3 +54,35 @@ void PrintUsage() {
   std::cout << "Default value of backend: default" << std::endl;
   std::cout << "Default value of use_fp16: false" << std::endl;
 }
+
+void PrintBenchmarkInfo() {
+  // Get model name
+  std::vector<std::string> model_names;
+  fastdeploy::benchmark::Split(FLAGS_model, model_names, sep);
+  // Save benchmark info
+  std::stringstream ss;
+  ss.precision(3);
+  ss << "\n======= Model Info =======\n";
+  ss << "model_name: " << model_names[model_names.size() - 1] << std::endl;
+  ss << "profile_mode: " << FLAGS_profile_mode << std::endl;
+  if (FLAGS_profile_mode == "runtime") {
+    ss << "include_h2d_d2h: " << FLAGS_include_h2d_d2h << std::endl;
+  }
+  ss << "\n======= Backend Info =======\n";
+  ss << "warmup: " << FLAGS_warmup << std::endl;
+  ss << "repeats: " << FLAGS_repeat << std::endl;
+  ss << "device: " << FLAGS_device << std::endl;
+  if (FLAGS_device == "gpu") {
+    ss << "device_id: " << FLAGS_device_id << std::endl;
+  }
+  ss << "backend: " << FLAGS_backend << std::endl;
+  ss << "cpu_thread_nums: " << FLAGS_cpu_thread_nums << std::endl;
+  ss << "use_fp16: " << FLAGS_use_fp16 << std::endl;
+  ss << "collect_memory_info: " << FLAGS_collect_memory_info << std::endl;
+  if (FLAGS_collect_memory_info) {
+    ss << "sampling_interval: " << std::to_string(FLAGS_sampling_interval)
+       << "ms" << std::endl;
+  }
+  std::cout << ss.str() << std::endl;
+  return;
+}
diff --git a/benchmark/cpp/macros.h b/benchmark/cpp/macros.h
index bebd26e0d..77df0c657 100755
--- a/benchmark/cpp/macros.h
+++ b/benchmark/cpp/macros.h
@@ -18,7 +18,6 @@
 
 #define BENCHMARK_MODEL(MODEL_NAME, BENCHMARK_FUNC)                         \
 {                                                                           \
-  std::cout << "====" << #MODEL_NAME << "====" << std::endl;                \
   if (!MODEL_NAME.Initialized()) {                                          \
     std::cerr << "Failed to initialize." << std::endl;                      \
     return 0;                                                               \
@@ -62,8 +61,8 @@
     float __cpu_mem__ = __resource_moniter__.GetMaxCpuMem();                \
     float __gpu_mem__ = __resource_moniter__.GetMaxGpuMem();                \
     float __gpu_util__ = __resource_moniter__.GetMaxGpuUtil();              \
-    std::cout << "cpu_pss_mb: " << __cpu_mem__ << "MB." << std::endl;       \
-    std::cout << "gpu_pss_mb: " << __gpu_mem__ << "MB." << std::endl;       \
+    std::cout << "cpu_rss_mb: " << __cpu_mem__ << "MB." << std::endl;       \
+    std::cout << "gpu_rss_mb: " << __gpu_mem__ << "MB." << std::endl;       \
     std::cout << "gpu_util: " << __gpu_util__ << std::endl;                 \
     __resource_moniter__.Stop();                                            \
   }                                                                         \
diff --git a/fastdeploy/benchmark/utils.cc b/fastdeploy/benchmark/utils.cc
index a33db1dc2..8857f10c4 100755
--- a/fastdeploy/benchmark/utils.cc
+++ b/fastdeploy/benchmark/utils.cc
@@ -13,8 +13,8 @@
 // limitations under the License.
 
 #include <sys/types.h>
-#if defined(__linux__) || defined(__ANDROID__)
-#include <unistd.h>
+#ifdef __linux__
+#include <sys/resource.h>
 #endif
 #include <cmath>
 
@@ -23,8 +23,7 @@
 namespace fastdeploy {
 namespace benchmark {
 
-// Remove the ch characters at both ends of str
-static std::string strip(const std::string& str, char ch = ' ') {
+std::string Strip(const std::string& str, char ch) {
   int i = 0;
   while (str[i] == ch) {
     i++;
@@ -36,9 +35,8 @@ static std::string strip(const std::string& str, char ch = ' ') {
   return str.substr(i, j + 1 - i);
 }
 
-// Split string
-static void split(const std::string& s, std::vector<std::string>& tokens,
-                  char delim = ' ') {
+void Split(const std::string& s, std::vector<std::string>& tokens,
+           char delim) {
   tokens.clear();
   size_t lastPos = s.find_first_not_of(delim, 0);
   size_t pos = s.find(delim, lastPos);
@@ -54,7 +52,7 @@ ResourceUsageMonitor::ResourceUsageMonitor(int sampling_interval_ms, int gpu_id)
     : is_supported_(false),
       sampling_interval_(sampling_interval_ms),
       gpu_id_(gpu_id) {
-#if defined(__linux__) || defined(__ANDROID__)
+#ifdef __linux__
   is_supported_ = true;
 #else
   is_supported_ = false;
@@ -67,7 +65,9 @@ ResourceUsageMonitor::ResourceUsageMonitor(int sampling_interval_ms, int gpu_id)
 }
 
 void ResourceUsageMonitor::Start() {
-  if (!is_supported_) return;
+  if (!is_supported_) {
+    return;
+  }
   if (check_memory_thd_ != nullptr) {
     FDINFO << "Memory monitoring has already started!" << std::endl;
     return;
@@ -77,20 +77,24 @@ void ResourceUsageMonitor::Start() {
   check_memory_thd_.reset(new std::thread(([this]() {
     // Note we retrieve the memory usage at the very beginning of the thread.
     while (true) {
-      std::string cpu_mem_info = GetCurrentCpuMemoryInfo();
-      // get max_cpu_mem
-      std::vector<std::string> cpu_tokens;
-      split(cpu_mem_info, cpu_tokens, ' ');
-      max_cpu_mem_ = std::max(max_cpu_mem_, stof(cpu_tokens[3]) / 1024);
+#ifdef __linux__
+      rusage res;
+      if (getrusage(RUSAGE_SELF, &res) == 0) {
+        max_cpu_mem_ =
+            std::max(max_cpu_mem_, static_cast<float>(res.ru_maxrss / 1024.0));
+      }
+#endif
 #if defined(WITH_GPU)
       std::string gpu_mem_info = GetCurrentGpuMemoryInfo(gpu_id_);
       // get max_gpu_mem and max_gpu_util
       std::vector<std::string> gpu_tokens;
-      split(gpu_mem_info, gpu_tokens, ',');
+      Split(gpu_mem_info, gpu_tokens, ',');
       max_gpu_mem_ = std::max(max_gpu_mem_, stof(gpu_tokens[6]));
       max_gpu_util_ = std::max(max_gpu_util_, stof(gpu_tokens[7]));
 #endif
-      if (stop_signal_) break;
+      if (stop_signal_) {
+        break;
+      }
       std::this_thread::sleep_for(
           std::chrono::milliseconds(sampling_interval_));
     }
@@ -121,26 +125,6 @@ void ResourceUsageMonitor::StopInternal() {
   check_memory_thd_.reset(nullptr);
 }
 
-std::string ResourceUsageMonitor::GetCurrentCpuMemoryInfo() {
-  std::string result = "";
-#if defined(__linux__) || defined(__ANDROID__)
-  int iPid = static_cast<int>(getpid());
-  std::string command = "pmap -x " + std::to_string(iPid) + " | grep total";
-  FILE* pp = popen(command.data(), "r");
-  if (!pp) return "";
-  char tmp[1024];
-
-  while (fgets(tmp, sizeof(tmp), pp) != NULL) {
-    result += tmp;
-  }
-  pclose(pp);
-#else
-  FDASSERT(false,
-           "Currently collect cpu memory info only supports Linux and ANDROID.")
-#endif
-  return result;
-}
-
 std::string ResourceUsageMonitor::GetCurrentGpuMemoryInfo(int device_id) {
   std::string result = "";
 #if defined(__linux__) && defined(WITH_GPU)
diff --git a/fastdeploy/benchmark/utils.h b/fastdeploy/benchmark/utils.h
index f81cb29c1..4037cd09c 100755
--- a/fastdeploy/benchmark/utils.h
+++ b/fastdeploy/benchmark/utils.h
@@ -65,20 +65,26 @@ class FASTDEPLOY_DECL ResourceUsageMonitor {
 
  private:
   void StopInternal();
-  // Get current cpu memory info
-  std::string GetCurrentCpuMemoryInfo();
   // Get current gpu memory info
   std::string GetCurrentGpuMemoryInfo(int device_id);
 
   bool is_supported_ = false;
   bool stop_signal_ = false;
   const int sampling_interval_;
-  float max_cpu_mem_ = 0.0f;
-  float max_gpu_mem_ = 0.0f;
+  float max_cpu_mem_ = 0.0f;  // MB
+  float max_gpu_mem_ = 0.0f;  // MB
   float max_gpu_util_ = 0.0f;
   const int gpu_id_ = 0;
   std::unique_ptr<std::thread> check_memory_thd_ = nullptr;
 };
 
+// Remove the ch characters at both ends of str
+FASTDEPLOY_DECL std::string Strip(const std::string& str, char ch = ' ');
+
+// Split string
+FASTDEPLOY_DECL void Split(const std::string& s,
+                           std::vector<std::string>& tokens,
+                           char delim = ' ');
+
 }  // namespace benchmark
 }  // namespace fastdeploy

From cd5a54cd75057dfa4877005e0cee513c1b628e06 Mon Sep 17 00:00:00 2001
From: Zheng-Bicheng <58363586+Zheng-Bicheng@users.noreply.github.com>
Date: Tue, 14 Feb 2023 15:40:15 +0800
Subject: [PATCH 27/30] [Bug Fix] Fix the bug reported by RK3568 Runtime under
 the latest driver (#1311)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 更新rkyolo runtime 在rk3568最新驱动下报错的问题

* 删除debug代码
---
 fastdeploy/runtime/backends/rknpu2/rknpu2_backend.cc | 3 ++-
 fastdeploy/runtime/backends/rknpu2/rknpu2_backend.h  | 4 ++--
 fastdeploy/runtime/runtime_option.h                  | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/fastdeploy/runtime/backends/rknpu2/rknpu2_backend.cc b/fastdeploy/runtime/backends/rknpu2/rknpu2_backend.cc
index dc7bbbad5..a6b9a386f 100644
--- a/fastdeploy/runtime/backends/rknpu2/rknpu2_backend.cc
+++ b/fastdeploy/runtime/backends/rknpu2/rknpu2_backend.cc
@@ -67,6 +67,7 @@ bool RKNPU2Backend::GetSDKAndDeviceVersion() {
  ***************************************************************/
 void RKNPU2Backend::BuildOption(const RKNPU2BackendOption& option) {
   this->option_ = option;
+
   // save cpu_name
   this->option_.cpu_name = option.cpu_name;
 
@@ -123,7 +124,7 @@ bool RKNPU2Backend::InitFromRKNN(const std::string& model_file,
  *  @return     bool
  *  @note       Only support RK3588
  ***************************************************************/
-bool RKNPU2Backend::SetCoreMask(rknpu2::CoreMask& core_mask) const {
+bool RKNPU2Backend::SetCoreMask(const rknpu2::CoreMask& core_mask) {
   int ret = rknn_set_core_mask(ctx, static_cast<rknn_core_mask>(core_mask));
   if (ret != RKNN_SUCC) {
     FDERROR << "rknn_set_core_mask fail! ret=" << ret << std::endl;
diff --git a/fastdeploy/runtime/backends/rknpu2/rknpu2_backend.h b/fastdeploy/runtime/backends/rknpu2/rknpu2_backend.h
index 8b3aa9fe2..52f174d6d 100644
--- a/fastdeploy/runtime/backends/rknpu2/rknpu2_backend.h
+++ b/fastdeploy/runtime/backends/rknpu2/rknpu2_backend.h
@@ -25,7 +25,7 @@
 
 namespace fastdeploy {
 struct RKNPU2BackendOption {
-  rknpu2::CpuName cpu_name = rknpu2::CpuName::RK3588;
+  rknpu2::CpuName cpu_name = rknpu2::CpuName::RK356X;
 
   // The specification of NPU core setting.It has the following choices :
   // RKNN_NPU_CORE_AUTO : Referring to automatic mode, meaning that it will
@@ -49,7 +49,7 @@ class RKNPU2Backend : public BaseBackend {
 
   bool GetSDKAndDeviceVersion();
 
-  bool SetCoreMask(rknpu2::CoreMask& core_mask) const;
+  bool SetCoreMask(const rknpu2::CoreMask& core_mask);
 
   bool GetModelInputOutputInfos();
 
diff --git a/fastdeploy/runtime/runtime_option.h b/fastdeploy/runtime/runtime_option.h
index a36ac5459..904640eab 100755
--- a/fastdeploy/runtime/runtime_option.h
+++ b/fastdeploy/runtime/runtime_option.h
@@ -71,9 +71,9 @@ struct FASTDEPLOY_DECL RuntimeOption {
   void UseGpu(int gpu_id = 0);
   /// Use RKNPU2 e.g RK3588/RK356X to inference
   void UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name =
-                     fastdeploy::rknpu2::CpuName::RK3588,
+                     fastdeploy::rknpu2::CpuName::RK356X,
                  fastdeploy::rknpu2::CoreMask rknpu2_core =
-                     fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0);
+                     fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO);
   /// Use TimVX e.g RV1126/A311D to inference
   void UseTimVX();
   /// Use Huawei Ascend to inference

From fc6edcc541c98aeb28093511d419765780b0d7b9 Mon Sep 17 00:00:00 2001
From: Zheng-Bicheng <58363586+Zheng-Bicheng@users.noreply.github.com>
Date: Tue, 14 Feb 2023 17:34:41 +0800
Subject: [PATCH 28/30] [Model] Update PPDetection RKNPU2 (#1323)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 更新docs

* 修正docs错误

* 更新docs

* 更新python example脚本和ppyoloe转换脚本
---
 .../paddledetection/rknpu2/README_CN.md       | 81 ++++++++++++++++++-
 .../paddledetection/rknpu2/picodet.md         | 68 ----------------
 .../paddledetection/rknpu2/python/infer.py    |  7 +-
 .../paddledetection/rknpu2/yolov8.md          | 50 ------------
 .../picodet_s_416_coco_lcnet_unquantized.yaml |  2 +-
 ...ppyoloe_plus_crn_s_80e_coco_quantized.yaml | 17 ++++
 6 files changed, 99 insertions(+), 126 deletions(-)
 delete mode 100644 examples/vision/detection/paddledetection/rknpu2/picodet.md
 delete mode 100644 examples/vision/detection/paddledetection/rknpu2/yolov8.md
 create mode 100644 tools/rknpu2/config/ppyoloe_plus_crn_s_80e_coco_quantized.yaml

diff --git a/examples/vision/detection/paddledetection/rknpu2/README_CN.md b/examples/vision/detection/paddledetection/rknpu2/README_CN.md
index 8fb3765ba..6a932fe3a 100644
--- a/examples/vision/detection/paddledetection/rknpu2/README_CN.md
+++ b/examples/vision/detection/paddledetection/rknpu2/README_CN.md
@@ -4,12 +4,14 @@
 
 ## 支持模型列表
 
-目前FastDeploy使用RKNPU2支持如下PaddleDetection模型的部署:
+在RKNPU2上已经通过测试的PaddleDetection模型如下:
 
 - Picodet
-- PPYOLOE
+- PPYOLOE(int8)
 - YOLOV8
 
+如果你需要查看详细的速度信息，请查看[RKNPU2模型速度一览表](../../../../../docs/cn/faq/rknpu2/rknpu2.md)
+
 ## 准备PaddleDetection部署模型以及转换模型
 
 RKNPU部署模型前需要将Paddle模型转换成RKNN模型，具体步骤如下:
@@ -20,8 +22,79 @@ RKNPU部署模型前需要将Paddle模型转换成RKNN模型，具体步骤如
 
 ## 模型转换example
 
-- [Picodet RKNPU2模型转换文档](./picodet.md)
-- [YOLOv8 RKNPU2模型转换文档](./yolov8.md)
+### 注意点
+
+PPDetection模型在RKNPU2上部署时要注意以下几点:
+
+* 模型导出需要包含Decode
+* 由于RKNPU2不支持NMS，因此输出节点必须裁剪至NMS之前
+* 由于RKNPU2 Div算子的限制，模型的输出节点需要裁剪至Div算子之前
+
+### Paddle模型转换为ONNX模型
+
+由于Rockchip提供的rknn-toolkit2工具暂时不支持Paddle模型直接导出为RKNN模型，因此需要先将Paddle模型导出为ONNX模型，再将ONNX模型转为RKNN模型。
+
+```bash
+# 以Picodet为例
+# 下载Paddle静态图模型并解压
+wget https://paddledet.bj.bcebos.com/deploy/Inference/picodet_s_416_coco_lcnet.tar
+tar xvf picodet_s_416_coco_lcnet.tar
+
+# 静态图转ONNX模型，注意，这里的save_file请和压缩包名对齐
+paddle2onnx --model_dir picodet_s_416_coco_lcnet \
+            --model_filename model.pdmodel \
+            --params_filename model.pdiparams \
+            --save_file picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \
+            --enable_dev_version True
+
+# 固定shape
+python -m paddle2onnx.optimize --input_model picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \
+                                --output_model picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \
+                                --input_shape_dict "{'image':[1,3,416,416]}"
+```
+
+### 编写yaml文件
+
+**修改normalize参数**
+
+如果你需要在NPU上执行normalize操作，请根据你的模型配置normalize参数，例如:
+
+```yaml
+mean:
+  -
+    - 123.675
+    - 116.28
+    - 103.53
+std:
+  -
+    - 58.395
+    - 57.12
+    - 57.375
+```
+
+**修改outputs参数**
+由于Paddle2ONNX版本的不同，转换模型的输出节点名称也有所不同，请使用[Netron](https://netron.app)对模型进行可视化，并找到以下蓝色方框标记的NonMaxSuppression节点，红色方框的节点名称即为目标名称。
+
+例如，使用Netron可视化后，得到以下图片:
+
+![](https://user-images.githubusercontent.com/58363586/212599781-e1952da7-6eae-4951-8ca7-bab7e6940692.png)
+
+找到蓝色方框标记的NonMaxSuppression节点，可以看到红色方框标记的两个节点名称为p2o.Div.79和p2o.Concat.9,因此需要修改outputs参数，修改后如下:
+
+```yaml
+outputs_nodes:
+  - 'p2o.Mul.179'
+  - 'p2o.Concat.9'
+```
+
+### ONNX模型转RKNN模型
+
+为了方便大家使用，我们提供了python脚本，通过我们预配置的config文件，你将能够快速地转换ONNX模型到RKNN模型
+
+```bash
+python tools/rknpu2/export.py --config_path tools/rknpu2/config/picodet_s_416_coco_lcnet_unquantized.yaml \
+                              --target_platform rk3588
+```
 
 
 ## 其他链接
diff --git a/examples/vision/detection/paddledetection/rknpu2/picodet.md b/examples/vision/detection/paddledetection/rknpu2/picodet.md
deleted file mode 100644
index de0192648..000000000
--- a/examples/vision/detection/paddledetection/rknpu2/picodet.md
+++ /dev/null
@@ -1,68 +0,0 @@
-# Picodet RKNPU2模型转换文档
-
-以下步骤均在Ubuntu电脑上完成，请参考配置文档完成转换模型环境配置。下面以Picodet-s为例子,教大家如何转换PaddleDetection模型到RKNN模型。
-
-
-### 导出ONNX模型
-
-```bash
-# 下载Paddle静态图模型并解压
-wget https://paddledet.bj.bcebos.com/deploy/Inference/picodet_s_416_coco_lcnet.tar
-tar xvf picodet_s_416_coco_lcnet.tar
-
-# 静态图转ONNX模型，注意，这里的save_file请和压缩包名对齐
-paddle2onnx --model_dir picodet_s_416_coco_lcnet \
-            --model_filename model.pdmodel \
-            --params_filename model.pdiparams \
-            --save_file picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \
-            --enable_dev_version True
-
-# 固定shape
-python -m paddle2onnx.optimize --input_model picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \
-                                --output_model picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \
-                                --input_shape_dict "{'image':[1,3,416,416]}"
-```
-
-### 编写模型导出配置文件
-
-以转化RK3568的RKNN模型为例子，我们需要编辑tools/rknpu2/config/picodet_s_416_coco_lcnet_unquantized.yaml，来转换ONNX模型到RKNN模型。
-
-**修改normalize参数**
-
-如果你需要在NPU上执行normalize操作，请根据你的模型配置normalize参数，例如:
-
-```yaml
-mean:
-  -
-    - 127.5
-    - 127.5
-    - 127.5
-std:
-  -
-    - 127.5
-    - 127.5
-    - 127.5
-```
-
-**修改outputs参数**
-由于Paddle2ONNX版本的不同，转换模型的输出节点名称也有所不同，请使用[Netron](https://netron.app)对模型进行可视化，并找到以下蓝色方框标记的NonMaxSuppression节点，红色方框的节点名称即为目标名称。
-
-例如，使用Netron可视化后，得到以下图片:
-
-![](https://user-images.githubusercontent.com/58363586/212599781-e1952da7-6eae-4951-8ca7-bab7e6940692.png)
-
-找到蓝色方框标记的NonMaxSuppression节点，可以看到红色方框标记的两个节点名称为p2o.Div.79和p2o.Concat.9,因此需要修改outputs参数，修改后如下:
-
-```yaml
-outputs_nodes: [ 'p2o.Div.79','p2o.Concat.9' ]
-```
-
-### 转换模型
-
-```bash
-
-# ONNX模型转RKNN模型
-# 转换模型,模型将生成在picodet_s_320_coco_lcnet_non_postprocess目录下
-python tools/rknpu2/export.py --config_path tools/rknpu2/config/picodet_s_416_coco_lcnet_unquantized.yaml \
-                              --target_platform rk3588
-```
diff --git a/examples/vision/detection/paddledetection/rknpu2/python/infer.py b/examples/vision/detection/paddledetection/rknpu2/python/infer.py
index a3c146531..2dfb54281 100644
--- a/examples/vision/detection/paddledetection/rknpu2/python/infer.py
+++ b/examples/vision/detection/paddledetection/rknpu2/python/infer.py
@@ -45,15 +45,16 @@ if __name__ == "__main__":
 
     # 配置runtime，加载模型
     runtime_option = fd.RuntimeOption()
-    runtime_option.use_cpu()
+    runtime_option.use_rknpu2()
 
     model = fd.vision.detection.PPYOLOE(
         model_file,
         params_file,
         config_file,
         runtime_option=runtime_option,
-        model_format=fd.ModelFormat.ONNX)
-
+        model_format=fd.ModelFormat.RKNN)
+    model.preprocessor.disable_normalize()
+    model.preprocessor.disable_permute()
     model.postprocessor.apply_decode_and_nms()
 
     # 预测图片分割结果
diff --git a/examples/vision/detection/paddledetection/rknpu2/yolov8.md b/examples/vision/detection/paddledetection/rknpu2/yolov8.md
deleted file mode 100644
index 432fe02bb..000000000
--- a/examples/vision/detection/paddledetection/rknpu2/yolov8.md
+++ /dev/null
@@ -1,50 +0,0 @@
-# YOLOv8 RKNPU2模型转换文档
-
-以下步骤均在Ubuntu电脑上完成，请参考配置文档完成转换模型环境配置。下面以yolov8为例子,教大家如何转换PaddleDetection模型到RKNN模型。
-
-
-### 导出ONNX模型
-
-```bash
-# 下载Paddle静态图模型并解压
-
-# 静态图转ONNX模型，注意，这里的save_file请和压缩包名对齐
-paddle2onnx --model_dir yolov8_n_500e_coco \
-            --model_filename model.pdmodel \
-            --params_filename model.pdiparams \
-            --save_file yolov8_n_500e_coco/yolov8_n_500e_coco.onnx \
-            --enable_dev_version True
-
-# 固定shape
-python -m paddle2onnx.optimize --input_model yolov8_n_500e_coco/yolov8_n_500e_coco.onnx \
-                                --output_model yolov8_n_500e_coco/yolov8_n_500e_coco.onnx \
-                                --input_shape_dict "{'image':[1,3,640,640],'scale_factor':[1,2]}"
-```
-
-### 编写模型导出配置文件
-**修改outputs参数**
-由于Paddle2ONNX版本的不同，转换模型的输出节点名称也有所不同，请使用[Netron](https://netron.app)对模型进行可视化，并找到以下蓝色方框标记的NonMaxSuppression节点，红色方框的节点名称即为目标名称。
-
-例如，使用Netron可视化后，得到以下图片:
-
-![](https://user-images.githubusercontent.com/58363586/212599658-8a2c4b79-f59a-40b5-ade7-f77c6fcfdf2a.png)
-
-找到蓝色方框标记的NonMaxSuppression节点，可以看到红色方框标记的两个节点名称为p2o.Div.1和p2o.Concat.9,因此需要修改outputs参数，修改后如下:
-
-```yaml
-outputs_nodes: [ 'p2o.Div.1','p2o.Concat.49' ]
-```
-
-### 转换模型
-
-```bash
-
-# ONNX模型转RKNN模型
-# 转换非全量化模型,模型将生成在yolov8_n目录下
-python tools/rknpu2/export.py --config_path tools/rknpu2/config/yolov8_n_unquantized.yaml \
-                              --target_platform rk3588
-
-# 转换全量化模型,模型将生成在yolov8_n目录下
-python tools/rknpu2/export.py --config_path tools/rknpu2/config/yolov8_n_quantized.yaml \
-                              --target_platform rk3588
-```
diff --git a/tools/rknpu2/config/picodet_s_416_coco_lcnet_unquantized.yaml b/tools/rknpu2/config/picodet_s_416_coco_lcnet_unquantized.yaml
index aeeb3fdb7..ddcd9c91e 100644
--- a/tools/rknpu2/config/picodet_s_416_coco_lcnet_unquantized.yaml
+++ b/tools/rknpu2/config/picodet_s_416_coco_lcnet_unquantized.yaml
@@ -10,7 +10,7 @@ std:
     - 57.375
 model_path: ./picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx
 outputs_nodes:
-  - 'p2o.Div.79'
+  - 'p2o.Mul.179'
   - 'p2o.Concat.9'
 do_quantization: False
 dataset:
diff --git a/tools/rknpu2/config/ppyoloe_plus_crn_s_80e_coco_quantized.yaml b/tools/rknpu2/config/ppyoloe_plus_crn_s_80e_coco_quantized.yaml
new file mode 100644
index 000000000..965e43cde
--- /dev/null
+++ b/tools/rknpu2/config/ppyoloe_plus_crn_s_80e_coco_quantized.yaml
@@ -0,0 +1,17 @@
+mean:
+  -
+    - 0
+    - 0
+    - 0
+std:
+  -
+    - 255
+    - 255
+    - 255
+model_path: ./ppyoloe_plus_crn_s_80e_coco/ppyoloe_plus_crn_s_80e_coco.onnx
+outputs_nodes:
+  - 'p2o.Mul.224'
+  - 'p2o.Concat.29'
+do_quantization: True
+dataset: "./ppyoloe_plus_crn_s_80e_coco/dataset.txt"
+output_folder: "./ppyoloe_plus_crn_s_80e_coco"

From c25d1cc1bce44ef56fc7c86ae9bd7f3f931b9b65 Mon Sep 17 00:00:00 2001
From: WJJ1995 <wjjisloser@163.com>
Date: Tue, 14 Feb 2023 17:51:39 +0800
Subject: [PATCH 29/30] [Backend]Fixed enable_paddle_to_trt() bug (#1320)

* add GPL lisence

* add GPL-3.0 lisence

* add GPL-3.0 lisence

* add GPL-3.0 lisence

* support yolov8

* add pybind for yolov8

* add yolov8 readme

* add cpp benchmark

* add cpu and gpu mem

* public part split

* add runtime mode

* fixed bugs

* add cpu_thread_nums

* deal with comments

* deal with comments

* deal with comments

* rm useless code

* add FASTDEPLOY_DECL

* add FASTDEPLOY_DECL

* fixed for windows

* mv rss to pss

* mv rss to pss

* Update utils.cc

* use thread to collect mem

* Add ResourceUsageMonitor

* rm useless code

* fixed bug

* fixed typo

* update ResourceUsageMonitor

* fixed bug

* fixed bug

* add note for ResourceUsageMonitor

* deal with comments

* add macros

* deal with comments

* deal with comments

* deal with comments

* re-lint

* rm pmap and use mem api

* rm pmap and use mem api

* add mem api

* Add PrintBenchmarkInfo func

* Add PrintBenchmarkInfo func

* Add PrintBenchmarkInfo func

* deal with comments

* fixed enable_paddle_to_trt

* add log for paddle_trt

---------

Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
---
 fastdeploy/runtime/backends/paddle/paddle_backend.cc | 4 ++++
 python/fastdeploy/runtime.py                         | 5 +++--
 2 files changed, 7 insertions(+), 2 deletions(-)
 mode change 100644 => 100755 fastdeploy/runtime/backends/paddle/paddle_backend.cc
 mode change 100644 => 100755 python/fastdeploy/runtime.py

diff --git a/fastdeploy/runtime/backends/paddle/paddle_backend.cc b/fastdeploy/runtime/backends/paddle/paddle_backend.cc
old mode 100644
new mode 100755
index 09dbe812a..dc804e926
--- a/fastdeploy/runtime/backends/paddle/paddle_backend.cc
+++ b/fastdeploy/runtime/backends/paddle/paddle_backend.cc
@@ -29,6 +29,10 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
       config_.SetExecStream(option_.external_stream_);
     }
     if (option.enable_trt) {
+      if (!option.trt_option.enable_fp16) {
+        FDINFO << "Will try to use tensorrt inference with Paddle Backend."
+               << std::endl;
+      }
       config_.Exp_DisableTensorRtOPs(option.trt_disabled_ops_);
       auto precision = paddle_infer::PrecisionType::kFloat32;
       if (option.trt_option.enable_fp16) {
diff --git a/python/fastdeploy/runtime.py b/python/fastdeploy/runtime.py
old mode 100644
new mode 100755
index 6be764ea3..4980db52e
--- a/python/fastdeploy/runtime.py
+++ b/python/fastdeploy/runtime.py
@@ -532,9 +532,10 @@ class RuntimeOption:
         logging.warning("    option = fd.RuntimeOption()")
         logging.warning("    option.use_gpu(0)")
         logging.warning("    option.use_paddle_infer_backend()")
-        logging.warning("    option.paddle_infer_option.enabel_trt = True")
+        logging.warning("    option.paddle_infer_option.enable_trt = True")
         logging.warning("    ==============================================")
-        return self._option.enable_paddle_to_trt()
+        self._option.use_paddle_backend()
+        self._option.paddle_infer_option.enable_trt = True
 
     def set_trt_max_workspace_size(self, trt_max_workspace_size):
         """Set max workspace size while using TensorRT backend.

From a5d23c57d0cb6bd52fd2bc3daa4a9d3c2274c403 Mon Sep 17 00:00:00 2001
From: CoolCola <49013063+CoolKbh@users.noreply.github.com>
Date: Tue, 14 Feb 2023 18:36:28 +0800
Subject: [PATCH 30/30] [Bug fix]add yolov7face landmarks (#1297)

* add yolov7face benchmark

* fix review problem

* fix review problems
---
 .../contrib/yolov7face/postprocessor.cc       | 27 ++++++++++++++++---
 .../contrib/yolov7face/postprocessor.h        | 12 +++++++--
 .../contrib/yolov7face/yolov7face_pybind.cc   |  3 ++-
 .../vision/facedet/contrib/yolov7face.py      | 12 +++++++++
 4 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/fastdeploy/vision/facedet/contrib/yolov7face/postprocessor.cc b/fastdeploy/vision/facedet/contrib/yolov7face/postprocessor.cc
index 30bb523cc..624aa3403 100644
--- a/fastdeploy/vision/facedet/contrib/yolov7face/postprocessor.cc
+++ b/fastdeploy/vision/facedet/contrib/yolov7face/postprocessor.cc
@@ -24,7 +24,7 @@ namespace facedet {
 Yolov7FacePostprocessor::Yolov7FacePostprocessor() {
   conf_threshold_ = 0.5;
   nms_threshold_ = 0.45;
-  max_wh_ = 7680.0;
+  landmarks_per_face_ = 5;
 }
 
 bool Yolov7FacePostprocessor::Run(const std::vector<FDTensor>& infer_result,
@@ -36,6 +36,8 @@ bool Yolov7FacePostprocessor::Run(const std::vector<FDTensor>& infer_result,
 
   for (size_t bs = 0; bs < batch; ++bs) {
     (*results)[bs].Clear();
+    // must be setup landmarks_per_face before reserve
+    (*results)[bs].landmarks_per_face = landmarks_per_face_;
     (*results)[bs].Reserve(infer_result[0].shape[1]);
     if (infer_result[0].dtype != FDDataType::FP32) {
       FDERROR << "Only support post process with float32 data." << std::endl;
@@ -61,6 +63,15 @@ bool Yolov7FacePostprocessor::Run(const std::vector<FDTensor>& infer_result,
       (*results)[bs].boxes.emplace_back(std::array<float, 4>{
           (x - w / 2.f), (y - h / 2.f), (x + w / 2.f), (y + h / 2.f)});
       (*results)[bs].scores.push_back(confidence);
+
+      // decode landmarks (default 5 landmarks)
+      if (landmarks_per_face_ > 0) {
+        float* landmarks_ptr = const_cast<float*>(reg_cls_ptr + 6);
+        for (size_t j = 0; j < landmarks_per_face_ * 3; j += 3) {
+          (*results)[bs].landmarks.emplace_back(
+              std::array<float, 2>{landmarks_ptr[j], landmarks_ptr[j + 1]});
+        }
+      }
     }
 
     if ((*results)[bs].boxes.size() == 0) {
@@ -79,9 +90,9 @@ bool Yolov7FacePostprocessor::Run(const std::vector<FDTensor>& infer_result,
     float ipt_h = iter_ipt->second[0];
     float ipt_w = iter_ipt->second[1];
     float scale = std::min(out_h / ipt_h, out_w / ipt_w);
+    float pad_h = (out_h - ipt_h * scale) / 2;
+    float pad_w = (out_w - ipt_w * scale) / 2;
     for (size_t i = 0; i < (*results)[bs].boxes.size(); ++i) {
-      float pad_h = (out_h - ipt_h * scale) / 2;
-      float pad_w = (out_w - ipt_w * scale) / 2;
       // clip box
       (*results)[bs].boxes[i][0] = std::max(((*results)[bs].boxes[i][0] - pad_w) / scale, 0.0f);
       (*results)[bs].boxes[i][1] = std::max(((*results)[bs].boxes[i][1] - pad_h) / scale, 0.0f);
@@ -92,6 +103,16 @@ bool Yolov7FacePostprocessor::Run(const std::vector<FDTensor>& infer_result,
       (*results)[bs].boxes[i][2] = std::min((*results)[bs].boxes[i][2], ipt_w - 1.0f);
       (*results)[bs].boxes[i][3] = std::min((*results)[bs].boxes[i][3], ipt_h - 1.0f);
     }
+    		
+    // scale and clip landmarks
+    for (size_t i = 0; i < (*results)[bs].landmarks.size(); ++i) {
+      (*results)[bs].landmarks[i][0] =
+          std::max(((*results)[bs].landmarks[i][0] - pad_w) / scale, 0.0f);
+      (*results)[bs].landmarks[i][1] =
+          std::max(((*results)[bs].landmarks[i][1] - pad_h) / scale, 0.0f);
+      (*results)[bs].landmarks[i][0] = std::min((*results)[bs].landmarks[i][0], ipt_w - 1.0f);
+      (*results)[bs].landmarks[i][1] = std::min((*results)[bs].landmarks[i][1], ipt_h - 1.0f);
+    }
   }
   return true;
 }
diff --git a/fastdeploy/vision/facedet/contrib/yolov7face/postprocessor.h b/fastdeploy/vision/facedet/contrib/yolov7face/postprocessor.h
index 4a1b0d852..f6a6bb23c 100644
--- a/fastdeploy/vision/facedet/contrib/yolov7face/postprocessor.h
+++ b/fastdeploy/vision/facedet/contrib/yolov7face/postprocessor.h
@@ -56,11 +56,19 @@ class FASTDEPLOY_DECL Yolov7FacePostprocessor{
   /// Get nms_threshold, default 0.45
   float GetNMSThreshold() const { return nms_threshold_; }
 
+  /// Set landmarks_per_face, default 5
+  void SetLandmarksPerFace(const int& landmarks_per_face) {
+    landmarks_per_face_ = landmarks_per_face;
+  }
+
+  /// Get landmarks_per_face, default 5
+  int GetLandmarksPerFace() const { return landmarks_per_face_; }
+
+
  protected:
   float conf_threshold_;
   float nms_threshold_;
-  bool multi_label_;
-  float max_wh_;
+  int landmarks_per_face_;
 };
 
 }  // namespace facedet
diff --git a/fastdeploy/vision/facedet/contrib/yolov7face/yolov7face_pybind.cc b/fastdeploy/vision/facedet/contrib/yolov7face/yolov7face_pybind.cc
index c0c99d425..07f5feece 100644
--- a/fastdeploy/vision/facedet/contrib/yolov7face/yolov7face_pybind.cc
+++ b/fastdeploy/vision/facedet/contrib/yolov7face/yolov7face_pybind.cc
@@ -60,7 +60,8 @@ void BindYOLOv7Face(pybind11::module& m) {
         return results;
       })
       .def_property("conf_threshold", &vision::facedet::Yolov7FacePostprocessor::GetConfThreshold, &vision::facedet::Yolov7FacePostprocessor::SetConfThreshold)
-      .def_property("nms_threshold", &vision::facedet::Yolov7FacePostprocessor::GetNMSThreshold, &vision::facedet::Yolov7FacePostprocessor::SetNMSThreshold);
+      .def_property("nms_threshold", &vision::facedet::Yolov7FacePostprocessor::GetNMSThreshold, &vision::facedet::Yolov7FacePostprocessor::SetNMSThreshold)
+      .def_property("landmarks_per_face", &vision::facedet::Yolov7FacePostprocessor::GetLandmarksPerFace, &vision::facedet::Yolov7FacePostprocessor::SetLandmarksPerFace);
     
   pybind11::class_<vision::facedet::YOLOv7Face, FastDeployModel>(m, "YOLOv7Face")
       .def(pybind11::init<std::string, std::string, RuntimeOption,
diff --git a/python/fastdeploy/vision/facedet/contrib/yolov7face.py b/python/fastdeploy/vision/facedet/contrib/yolov7face.py
index 963965441..f9d58dd65 100644
--- a/python/fastdeploy/vision/facedet/contrib/yolov7face.py
+++ b/python/fastdeploy/vision/facedet/contrib/yolov7face.py
@@ -107,6 +107,13 @@ class Yolov7FacePostprocessor:
         """
         return self._postprocessor.nms_threshold
 
+    @property
+    def landmarks_per_face(self):
+        """
+        landmarks per face for postprocessing, default is 5
+        """
+        return self._postprocessor.landmarks_per_face
+
     @conf_threshold.setter
     def conf_threshold(self, conf_threshold):
         assert isinstance(conf_threshold, float),\
@@ -119,6 +126,11 @@ class Yolov7FacePostprocessor:
             "The value to set `nms_threshold` must be type of float."
         self._postprocessor.nms_threshold = nms_threshold
 
+    @landmarks_per_face.setter
+    def landmarks_per_face(self, landmarks_per_face):
+        assert isinstance(landmarks_per_face, int),\
+            "The value to set `landmarks_per_face` must be type of int."
+        self._postprocessor.landmarks_per_face = landmarks_per_face
 
 class YOLOv7Face(FastDeployModel):
     def __init__(self,