Merge branch 'develop' of https://github.com/PaddlePaddle/FastDeploy into develop

2025-10-28 18:51:58 +08:00 · 2022-11-29 10:27:43 +08:00
parent 7761bbed4f 350ac1e8b5
commit 80a23e6072
60 changed files with 316 additions and 207 deletions
--- a/README_CN.md
+++ b/README_CN.md
@@ -11,7 +11,6 @@
    <a href=""><img src="https://img.shields.io/badge/os-linux%2C%20win%2C%20mac-pink.svg"></a>
    <a href="https://github.com/PaddlePaddle/FastDeploy/graphs/contributors"><img src="https://img.shields.io/github/contributors/PaddlePaddle/FastDeploy?color=9ea"></a>
    <a href="https://github.com/PaddlePaddle/FastDeploy/commits"><img src="https://img.shields.io/github/commit-activity/m/PaddlePaddle/FastDeploy?color=3af"></a>
-    <a href="https://pypi.org/project/FastDeploy-python/"><img src="https://img.shields.io/pypi/dm/FastDeploy-python?color=9cf"></a>
    <a href="https://github.com/PaddlePaddle/FastDeploy/issues"><img src="https://img.shields.io/github/issues/PaddlePaddle/FastDeploy?color=9cc"></a>
    <a href="https://github.com/PaddlePaddle/FastDeploy/stargazers"><img src="https://img.shields.io/github/stars/PaddlePaddle/FastDeploy?color=ccf"></a>
 </p>
--- a/README_EN.md
+++ b/README_EN.md
@@ -11,7 +11,6 @@ English | [简体中文](README_CN.md)
    <a href=""><img src="https://img.shields.io/badge/os-linux%2C%20win%2C%20mac-pink.svg"></a>
    <a href="https://github.com/PaddlePaddle/FastDeploy/graphs/contributors"><img src="https://img.shields.io/github/contributors/PaddlePaddle/FastDeploy?color=9ea"></a>
    <a href="https://github.com/PaddlePaddle/FastDeploy/commits"><img src="https://img.shields.io/github/commit-activity/m/PaddlePaddle/FastDeploy?color=3af"></a>
-    <a href="https://pypi.org/project/FastDeploy-python/"><img src="https://img.shields.io/pypi/dm/FastDeploy-python?color=9cf"></a>
    <a href="https://github.com/PaddlePaddle/FastDeploy/issues"><img src="https://img.shields.io/github/issues/PaddlePaddle/FastDeploy?color=9cc"></a>
    <a href="https://github.com/PaddlePaddle/FastDeploy/stargazers"><img src="https://img.shields.io/github/stars/PaddlePaddle/FastDeploy?color=ccf"></a>
 </p>
--- a/examples/multimodal/stable_diffusion/infer.py
+++ b/examples/multimodal/stable_diffusion/infer.py
@@ -120,7 +120,7 @@ def create_paddle_inference_runtime(model_dir,
                                    use_fp16=False,
                                    device_id=0):
    option = fd.RuntimeOption()
-    option.use_paddle_backend()
+    option.use_paddle_infer_backend()
    if device_id == -1:
        option.use_cpu()
    else:
--- a/examples/runtime/cpp/infer_paddle_paddle_inference.cc
+++ b/examples/runtime/cpp/infer_paddle_paddle_inference.cc
@@ -24,7 +24,7 @@ int main(int argc, char* argv[]) {
  fd::RuntimeOption runtime_option;
  runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
  // CPU
-  runtime_option.UsePaddleBackend();
+  runtime_option.UsePaddleInferBackend();
  runtime_option.SetCpuThreadNum(12);
  // GPU
  // runtime_option.UseGpu(0);
--- a/examples/runtime/python/infer_paddle_paddle_inference.py
+++ b/examples/runtime/python/infer_paddle_paddle_inference.py
@@ -26,7 +26,7 @@ option.set_model_path("mobilenetv2/inference.pdmodel",

 # **** CPU 配置 ****
 option.use_cpu()
-option.use_paddle_backend()
+option.use_paddle_infer_backend()
 option.set_cpu_thread_num(12)

 # **** GPU 配置 ***
--- a/examples/text/ernie-3.0/cpp/seq_cls_infer.cc
+++ b/examples/text/ernie-3.0/cpp/seq_cls_infer.cc
@@ -69,7 +69,7 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
  if (FLAGS_backend == "onnx_runtime") {
    option->UseOrtBackend();
  } else if (FLAGS_backend == "paddle") {
-    option->UsePaddleBackend();
+    option->UsePaddleInferBackend();
  } else if (FLAGS_backend == "openvino") {
    option->UseOpenVINOBackend();
  } else if (FLAGS_backend == "tensorrt" ||
--- a/examples/text/ernie-3.0/python/seq_cls_infer.py
+++ b/examples/text/ernie-3.0/python/seq_cls_infer.py
@@ -99,7 +99,7 @@ class ErnieForSequenceClassificationPredictor(object):
        else:
            option.use_gpu()
        if args.backend == 'paddle':
-            option.use_paddle_backend()
+            option.use_paddle_infer_backend()
        elif args.backend == 'onnx_runtime':
            option.use_ort_backend()
        elif args.backend == 'openvino':
--- a/examples/text/uie/cpp/infer.cc
+++ b/examples/text/uie/cpp/infer.cc
@@ -50,7 +50,7 @@ int main(int argc, char* argv[]) {
  }
  switch (backend_type) {
    case 0:
-      option.UsePaddleBackend();
+      option.UsePaddleInferBackend();
      break;
    case 1:
      option.UseOrtBackend();
--- a/examples/text/uie/python/infer.py
+++ b/examples/text/uie/python/infer.py
@@ -57,7 +57,7 @@ def build_option(args):
    if args.backend == 'onnx_runtime':
        runtime_option.use_ort_backend()
    elif args.backend == 'paddle_inference':
-        runtime_option.use_paddle_backend()
+        runtime_option.use_paddle_infer_backend()
    elif args.backend == 'openvino':
        runtime_option.use_openvino_backend()
    runtime_option.set_cpu_thread_num(args.cpu_num_threads)
--- a/examples/text/uie/serving/models/uie/1/model.py
+++ b/examples/text/uie/serving/models/uie/1/model.py
@@ -78,7 +78,7 @@ class TritonPythonModel:

        for option in options:
            if option['name'] == 'paddle':
-                runtime_option.use_paddle_backend()
+                runtime_option.use_paddle_infer_backend()
            elif option['name'] == 'onnxruntime':
                runtime_option.use_ort_backend()
            elif option['name'] == 'openvino':
--- a/examples/vision/classification/paddleclas/cpp/infer.cc
+++ b/examples/vision/classification/paddleclas/cpp/infer.cc
@@ -36,7 +36,7 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  auto im = cv::imread(image_file);

  fastdeploy::vision::ClassifyResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }
@@ -61,7 +61,7 @@ void GpuInfer(const std::string& model_dir, const std::string& image_file) {
  auto im = cv::imread(image_file);

  fastdeploy::vision::ClassifyResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }
@@ -87,7 +87,7 @@ void IpuInfer(const std::string& model_dir, const std::string& image_file) {
  auto im = cv::imread(image_file);

  fastdeploy::vision::ClassifyResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }
@@ -113,7 +113,7 @@ void TrtInfer(const std::string& model_dir, const std::string& image_file) {
  auto im = cv::imread(image_file);

  fastdeploy::vision::ClassifyResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }
--- a/examples/vision/classification/paddleclas/python/infer.py
+++ b/examples/vision/classification/paddleclas/python/infer.py
@@ -53,5 +53,5 @@ model = fd.vision.classification.PaddleClasModel(

 # 预测图片分类结果
 im = cv2.imread(args.image)
-result = model.predict(im.copy(), args.topk)
+result = model.predict(im, args.topk)
 print(result)
--- a/examples/vision/classification/paddleclas/quantize/cpp/infer.cc
+++ b/examples/vision/classification/paddleclas/quantize/cpp/infer.cc
@@ -31,10 +31,9 @@ void InitAndInfer(const std::string& model_dir, const std::string& image_file,
  assert(model.Initialized());

  auto im = cv::imread(image_file);
-  auto im_bak = im.clone();

  fastdeploy::vision::ClassifyResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }
--- a/examples/vision/classification/paddleclas/quantize/cpp/ocr.sh
+++ b/examples/vision/classification/paddleclas/quantize/cpp/ocr.sh
@@ -1,10 +0,0 @@
-rm -rf build
-mkdir build
-
-cd build
-
-#/xieyunyao/project/FastDeploy
-
-cmake .. -DFASTDEPLOY_INSTALL_DIR=/xieyunyao/project/FastDeploy
-
-make -j
--- a/examples/vision/classification/paddleclas/quantize/python/infer.py
+++ b/examples/vision/classification/paddleclas/quantize/python/infer.py
@@ -56,7 +56,7 @@ def build_option(args):
    elif args.backend.lower() == "ort":
        option.use_ort_backend()
    elif args.backend.lower() == "paddle":
-        option.use_paddle_backend()
+        option.use_paddle_infer_backend()
    elif args.backend.lower() == "openvino":
        assert args.device.lower(
        ) == "cpu", "OpenVINO backend require inference on device CPU."
@@ -78,5 +78,5 @@ model = fd.vision.classification.PaddleClasModel(

 # 预测图片检测结果
 im = cv2.imread(args.image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)
--- a/examples/vision/classification/paddleclas/rk1126/cpp/infer.cc
+++ b/examples/vision/classification/paddleclas/rk1126/cpp/infer.cc
@@ -35,7 +35,7 @@ void InitAndInfer(const std::string& model_dir, const std::string& image_file) {
  auto im = cv::imread(image_file);

  fastdeploy::vision::ClassifyResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }
--- a/examples/vision/detection/paddledetection/cpp/infer_faster_rcnn.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_faster_rcnn.cc
@@ -34,16 +34,15 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  }

  auto im = cv::imread(image_file);
-  auto im_bak = im.clone();

  fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }

  std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res, 0.5);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
  cv::imwrite("vis_result.jpg", vis_im);
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -63,16 +62,15 @@ void GpuInfer(const std::string& model_dir, const std::string& image_file) {
  }

  auto im = cv::imread(image_file);
-  auto im_bak = im.clone();

  fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }

  std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res, 0.5);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
  cv::imwrite("vis_result.jpg", vis_im);
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_mask_rcnn.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_mask_rcnn.cc
@@ -34,16 +34,15 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  }

  auto im = cv::imread(image_file);
-  auto im_bak = im.clone();

  fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }

  std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res, 0.5);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
  cv::imwrite("vis_result.jpg", vis_im);
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -63,16 +62,15 @@ void GpuInfer(const std::string& model_dir, const std::string& image_file) {
  }

  auto im = cv::imread(image_file);
-  auto im_bak = im.clone();

  fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }

  std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res, 0.5);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
  cv::imwrite("vis_result.jpg", vis_im);
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_picodet.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_picodet.cc
@@ -34,16 +34,15 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  }

  auto im = cv::imread(image_file);
-  auto im_bak = im.clone();

  fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }

  std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res, 0.5);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
  cv::imwrite("vis_result.jpg", vis_im);
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -63,16 +62,15 @@ void GpuInfer(const std::string& model_dir, const std::string& image_file) {
  }

  auto im = cv::imread(image_file);
-  auto im_bak = im.clone();

  fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }

  std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res, 0.5);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
  cv::imwrite("vis_result.jpg", vis_im);
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -93,16 +91,15 @@ void TrtInfer(const std::string& model_dir, const std::string& image_file) {
  }

  auto im = cv::imread(image_file);
-  auto im_bak = im.clone();

  fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }

  std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res, 0.5);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
  cv::imwrite("vis_result.jpg", vis_im);
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_ppyolo.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_ppyolo.cc
@@ -34,16 +34,15 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  }

  auto im = cv::imread(image_file);
-  auto im_bak = im.clone();

  fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }

  std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res, 0.5);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
  cv::imwrite("vis_result.jpg", vis_im);
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -63,16 +62,15 @@ void GpuInfer(const std::string& model_dir, const std::string& image_file) {
  }

  auto im = cv::imread(image_file);
-  auto im_bak = im.clone();

  fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }

  std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res, 0.5);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
  cv::imwrite("vis_result.jpg", vis_im);
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc
@@ -36,7 +36,7 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  auto im = cv::imread(image_file);

  fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }
@@ -64,7 +64,7 @@ void GpuInfer(const std::string& model_dir, const std::string& image_file) {
  auto im = cv::imread(image_file);

  fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }
@@ -93,7 +93,7 @@ void TrtInfer(const std::string& model_dir, const std::string& image_file) {
  auto im = cv::imread(image_file);

  fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }
--- a/examples/vision/detection/paddledetection/cpp/infer_ssd.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_ssd.cc
@@ -35,16 +35,15 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  }

  auto im = cv::imread(image_file);
-  auto im_bak = im.clone();

  fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }

  std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res, 0.5);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
  cv::imwrite("vis_result.jpg", vis_im);
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -64,16 +63,15 @@ void GpuInfer(const std::string& model_dir, const std::string& image_file) {
  }

  auto im = cv::imread(image_file);
-  auto im_bak = im.clone();

  fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }

  std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res, 0.5);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
  cv::imwrite("vis_result.jpg", vis_im);
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_yolov3.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_yolov3.cc
@@ -34,16 +34,15 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  }

  auto im = cv::imread(image_file);
-  auto im_bak = im.clone();

  fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }

  std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res, 0.5);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
  cv::imwrite("vis_result.jpg", vis_im);
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -63,16 +62,15 @@ void GpuInfer(const std::string& model_dir, const std::string& image_file) {
  }

  auto im = cv::imread(image_file);
-  auto im_bak = im.clone();

  fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }

  std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res, 0.5);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
  cv::imwrite("vis_result.jpg", vis_im);
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_yolox.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_yolox.cc
@@ -34,16 +34,15 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  }

  auto im = cv::imread(image_file);
-  auto im_bak = im.clone();

  fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }

  std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res, 0.5);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
  cv::imwrite("vis_result.jpg", vis_im);
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -63,16 +62,15 @@ void GpuInfer(const std::string& model_dir, const std::string& image_file) {
  }

  auto im = cv::imread(image_file);
-  auto im_bak = im.clone();

  fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }

  std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res, 0.5);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
  cv::imwrite("vis_result.jpg", vis_im);
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -93,16 +91,15 @@ void TrtInfer(const std::string& model_dir, const std::string& image_file) {
  }

  auto im = cv::imread(image_file);
-  auto im_bak = im.clone();

  fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }

  std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res, 0.5);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
  cv::imwrite("vis_result.jpg", vis_im);
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
--- a/examples/vision/detection/paddledetection/python/infer_faster_rcnn.py
+++ b/examples/vision/detection/paddledetection/python/infer_faster_rcnn.py
@@ -61,7 +61,7 @@ if args.image is None:
 else:
    image = args.image
 im = cv2.imread(image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)

 # 预测结果可视化
--- a/examples/vision/detection/paddledetection/python/infer_mask_rcnn.py
+++ b/examples/vision/detection/paddledetection/python/infer_mask_rcnn.py
@@ -68,7 +68,7 @@ if args.image is None:
 else:
    image = args.image
 im = cv2.imread(image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)

 # 预测结果可视化
--- a/examples/vision/detection/paddledetection/python/infer_picodet.py
+++ b/examples/vision/detection/paddledetection/python/infer_picodet.py
@@ -59,7 +59,7 @@ if args.image is None:
 else:
    image = args.image
 im = cv2.imread(image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)

 # 预测结果可视化
--- a/examples/vision/detection/paddledetection/python/infer_ppyolo.py
+++ b/examples/vision/detection/paddledetection/python/infer_ppyolo.py
@@ -61,7 +61,7 @@ if args.image is None:
 else:
    image = args.image
 im = cv2.imread(image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)

 # 预测结果可视化
--- a/examples/vision/detection/paddledetection/python/infer_ppyoloe.py
+++ b/examples/vision/detection/paddledetection/python/infer_ppyoloe.py
@@ -60,7 +60,7 @@ if args.image is None:
 else:
    image = args.image
 im = cv2.imread(image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)

 # 预测结果可视化
--- a/examples/vision/detection/paddledetection/python/infer_ssd.py
+++ b/examples/vision/detection/paddledetection/python/infer_ssd.py
@@ -41,7 +41,7 @@ model = fd.vision.detection.SSD(

 # 预测图片检测结果
 im = cv2.imread(args.image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)

 # 预测结果可视化
--- a/examples/vision/detection/paddledetection/python/infer_yolov3.py
+++ b/examples/vision/detection/paddledetection/python/infer_yolov3.py
@@ -59,7 +59,7 @@ if args.image is None:
 else:
    image = args.image
 im = cv2.imread(image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)

 # 预测结果可视化
--- a/examples/vision/detection/paddledetection/python/infer_yolox.py
+++ b/examples/vision/detection/paddledetection/python/infer_yolox.py
@@ -59,7 +59,7 @@ if args.image is None:
 else:
    image = args.image
 im = cv2.imread(image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)

 # 预测结果可视化
--- a/examples/vision/detection/paddledetection/quantize/cpp/infer_ppyoloe.cc
+++ b/examples/vision/detection/paddledetection/quantize/cpp/infer_ppyoloe.cc
@@ -30,17 +30,16 @@ void InitAndInfer(const std::string& model_dir, const std::string& image_file,
  assert(model.Initialized());

  auto im = cv::imread(image_file);
-  auto im_bak = im.clone();

  fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }

  std::cout << res.Str() << std::endl;

-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res, 0.5);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
  cv::imwrite("vis_result.jpg", vis_im);
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;

@@ -78,7 +77,7 @@ int main(int argc, char* argv[]) {
  }
    else if (flag == 3) {
    option.UseCpu();
-    option.UsePaddleBackend();
+    option.UsePaddleInferBackend();
    }

  std::string model_dir = argv[1];
--- a/examples/vision/detection/paddledetection/quantize/python/infer_ppyoloe.py
+++ b/examples/vision/detection/paddledetection/quantize/python/infer_ppyoloe.py
@@ -57,7 +57,7 @@ def build_option(args):
    elif args.backend.lower() == "ort":
        option.use_ort_backend()
    elif args.backend.lower() == "paddle":
-        option.use_paddle_backend()
+        option.use_paddle_infer_backend()
    elif args.backend.lower() == "openvino":
        assert args.device.lower(
        ) == "cpu", "OpenVINO backend require inference on device CPU."
@@ -78,7 +78,7 @@ model = fd.vision.detection.PPYOLOE(

 # 预测图片检测结果
 im = cv2.imread(args.image)
-result = model.predict(im.copy())
+result = model.predict(im)
 print(result)

 # 预测结果可视化
--- a/examples/vision/detection/paddledetection/rknpu2/python/infer.py
+++ b/examples/vision/detection/paddledetection/rknpu2/python/infer.py
@@ -50,7 +50,7 @@ if __name__ == "__main__":

    # 预测图片分割结果
    im = cv2.imread(args.image)
-    result = model.predict(im.copy())
+    result = model.predict(im)
    print(result)

    # 可视化结果
--- a/examples/vision/facealign/face_landmark_1000/cpp/infer.cc
+++ b/examples/vision/facealign/face_landmark_1000/cpp/infer.cc
@@ -40,7 +40,7 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
    if (FLAGS_backend == "ort") {
      option->UseOrtBackend();
    } else if (FLAGS_backend == "paddle") {
-      option->UsePaddleBackend();
+      option->UsePaddleInferBackend();
    } else if (FLAGS_backend == "trt" || 
               FLAGS_backend == "paddle_trt") {
      option->UseTrtBackend();
@@ -63,7 +63,7 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
    } else if (FLAGS_backend == "ov") {
      option->UseOpenVINOBackend();
    } else if (FLAGS_backend == "paddle") {
-      option->UsePaddleBackend();
+      option->UsePaddleInferBackend();
    } else if (FLAGS_backend == "default") {
      return true;
    } else {
--- a/examples/vision/facealign/face_landmark_1000/python/infer.py
+++ b/examples/vision/facealign/face_landmark_1000/python/infer.py
@@ -38,7 +38,7 @@ def build_option(args):
        if backend == "ort":
            option.use_ort_backend()
        elif backend == "paddle":
-            option.use_paddle_backend()
+            option.use_paddle_infer_backend()
        elif backend in ["trt", "paddle_trt"]:
            option.use_trt_backend()
            option.set_trt_input_shape("input", [1, 3, 112, 112])
@@ -58,7 +58,7 @@ def build_option(args):
        elif backend == "ov":
            option.use_openvino_backend()
        elif backend == "paddle":
-            option.use_paddle_backend()
+            option.use_paddle_infer_backend()
        elif backend == "default":
            return option
        else:
--- a/examples/vision/facealign/pfld/cpp/infer.cc
+++ b/examples/vision/facealign/pfld/cpp/infer.cc
@@ -40,7 +40,7 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
    if (FLAGS_backend == "ort") {
      option->UseOrtBackend();
    } else if (FLAGS_backend == "paddle") {
-      option->UsePaddleBackend();
+      option->UsePaddleInferBackend();
    } else if (FLAGS_backend == "trt" || 
               FLAGS_backend == "paddle_trt") {
      option->UseTrtBackend();
@@ -63,7 +63,7 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
    } else if (FLAGS_backend == "ov") {
      option->UseOpenVINOBackend();
    } else if (FLAGS_backend == "paddle") {
-      option->UsePaddleBackend();
+      option->UsePaddleInferBackend();
    } else if (FLAGS_backend == "default") {
      return true;
    } else {
--- a/examples/vision/facealign/pfld/python/infer.py
+++ b/examples/vision/facealign/pfld/python/infer.py
@@ -37,7 +37,7 @@ def build_option(args):
        if backend == "ort":
            option.use_ort_backend()
        elif backend == "paddle":
-            option.use_paddle_backend()
+            option.use_paddle_infer_backend()
        elif backend in ["trt", "paddle_trt"]:
            option.use_trt_backend()
            option.set_trt_input_shape("input", [1, 3, 112, 112])
@@ -57,7 +57,7 @@ def build_option(args):
        elif backend == "ov":
            option.use_openvino_backend()
        elif backend == "paddle":
-            option.use_paddle_backend()
+            option.use_paddle_infer_backend()
        elif backend == "default":
            return option
        else:
--- a/examples/vision/facealign/pipnet/cpp/infer.cc
+++ b/examples/vision/facealign/pipnet/cpp/infer.cc
@@ -41,7 +41,7 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
    if (FLAGS_backend == "ort") {
      option->UseOrtBackend();
    } else if (FLAGS_backend == "paddle") {
-      option->UsePaddleBackend();
+      option->UsePaddleInferBackend();
    } else if (FLAGS_backend == "trt" || 
               FLAGS_backend == "paddle_trt") {
      option->UseTrtBackend();
@@ -64,7 +64,7 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
    } else if (FLAGS_backend == "ov") {
      option->UseOpenVINOBackend();
    } else if (FLAGS_backend == "paddle") {
-      option->UsePaddleBackend();
+      option->UsePaddleInferBackend();
    } else if (FLAGS_backend == "default") {
      return true;
    } else {
--- a/examples/vision/facealign/pipnet/python/infer.py
+++ b/examples/vision/facealign/pipnet/python/infer.py
@@ -42,7 +42,7 @@ def build_option(args):
        if backend == "ort":
            option.use_ort_backend()
        elif backend == "paddle":
-            option.use_paddle_backend()
+            option.use_paddle_infer_backend()
        elif backend in ["trt", "paddle_trt"]:
            option.use_trt_backend()
            option.set_trt_input_shape("input", [1, 3, 112, 112])
@@ -62,7 +62,7 @@ def build_option(args):
        elif backend == "ov":
            option.use_openvino_backend()
        elif backend == "paddle":
-            option.use_paddle_backend()
+            option.use_paddle_infer_backend()
        elif backend == "default":
            return option
        else:
--- a/examples/vision/headpose/fsanet/cpp/infer.cc
+++ b/examples/vision/headpose/fsanet/cpp/infer.cc
@@ -40,7 +40,7 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
    if (FLAGS_backend == "ort") {
      option->UseOrtBackend();
    } else if (FLAGS_backend == "paddle") {
-      option->UsePaddleBackend();
+      option->UsePaddleInferBackend();
    } else if (FLAGS_backend == "trt" || 
               FLAGS_backend == "paddle_trt") {
      option->UseTrtBackend();
@@ -63,7 +63,7 @@ bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
    } else if (FLAGS_backend == "ov") {
      option->UseOpenVINOBackend();
    } else if (FLAGS_backend == "paddle") {
-      option->UsePaddleBackend();
+      option->UsePaddleInferBackend();
    } else if (FLAGS_backend == "default") {
      return true;
    } else {
--- a/examples/vision/headpose/fsanet/python/infer.py
+++ b/examples/vision/headpose/fsanet/python/infer.py
@@ -37,7 +37,7 @@ def build_option(args):
        if backend == "ort":
            option.use_ort_backend()
        elif backend == "paddle":
-            option.use_paddle_backend()
+            option.use_paddle_infer_backend()
        elif backend in ["trt", "paddle_trt"]:
            option.use_trt_backend()
            option.set_trt_input_shape("input", [1, 3, 64, 64])
@@ -57,7 +57,7 @@ def build_option(args):
        elif backend == "ov":
            option.use_openvino_backend()
        elif backend == "paddle":
-            option.use_paddle_backend()
+            option.use_paddle_infer_backend()
        elif backend == "default":
            return option
        else:
--- a/examples/vision/matting/ppmatting/cpp/infer.cc
+++ b/examples/vision/matting/ppmatting/cpp/infer.cc
@@ -60,7 +60,7 @@ void GpuInfer(const std::string& model_dir, const std::string& image_file,

  auto option = fastdeploy::RuntimeOption();
  option.UseGpu();
-  option.UsePaddleBackend();
+  option.UsePaddleInferBackend();
  auto model = fastdeploy::vision::matting::PPMatting(model_file, params_file,
                                                      config_file, option);
  if (!model.Initialized()) {
--- a/examples/vision/matting/ppmatting/python/infer.py
+++ b/examples/vision/matting/ppmatting/python/infer.py
@@ -34,7 +34,7 @@ def build_option(args):
    option = fd.RuntimeOption()
    if args.device.lower() == "gpu":
        option.use_gpu()
-        option.use_paddle_backend()
+        option.use_paddle_infer_backend()

    if args.use_trt:
        option.use_trt_backend()
--- a/examples/vision/ocr/PP-OCRv2/python/infer.py
+++ b/examples/vision/ocr/PP-OCRv2/python/infer.py
@@ -81,7 +81,7 @@ def build_option(args):
    elif args.backend.lower() == "ort":
        option.use_ort_backend()
    elif args.backend.lower() == "paddle":
-        option.use_paddle_backend()
+        option.use_paddle_infer_backend()
    elif args.backend.lower() == "openvino":
        assert args.device.lower(
        ) == "cpu", "OpenVINO backend require inference on device CPU."
--- a/examples/vision/ocr/PP-OCRv2/serving/README.md
+++ b/examples/vision/ocr/PP-OCRv2/serving/README.md
@@ -0,0 +1,12 @@
+# PP-OCRv2服务化部署示例
+
+除了`下载的模型`和`rec前处理的1个参数`以外PP-OCRv2的服务化部署与PP-OCRv3服务化部署全部一样，请参考[PP-OCRv3服务化部署](../../PP-OCRv3/serving)。
+
+## 下载模型
+将下载链接中的`v3`改为`v2`即可。
+
+## 修改rec前处理参数
+在[model.py](../../PP-OCRv3/serving/models/det_postprocess/1/model.py#L109)文件**109行添加以下代码**：
+```
+self.rec_preprocessor.cls_image_shape[1] = 32
+```
--- a/examples/vision/ocr/PP-OCRv3/python/infer.py
+++ b/examples/vision/ocr/PP-OCRv3/python/infer.py
@@ -81,7 +81,7 @@ def build_option(args):
    elif args.backend.lower() == "ort":
        option.use_ort_backend()
    elif args.backend.lower() == "paddle":
-        option.use_paddle_backend()
+        option.use_paddle_infer_backend()
    elif args.backend.lower() == "openvino":
        assert args.device.lower(
        ) == "cpu", "OpenVINO backend require inference on device CPU."
--- a/examples/vision/segmentation/paddleseg/README.md
+++ b/examples/vision/segmentation/paddleseg/README.md
@@ -16,28 +16,30 @@

 ## 准备PaddleSeg部署模型

-PaddleSeg模型导出，请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/docs/model_export_cn.md)  
+PaddleSeg模型导出，请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export_cn.md)  

 **注意**
 - PaddleSeg导出的模型包含`model.pdmodel`、`model.pdiparams`和`deploy.yaml`三个文件，FastDeploy会从yaml文件中获取模型在推理时需要的预处理信息
- aarch64平台（如：Jetson）暂时只支持`onnxruntime`和`tensorrt`作为后端推理（**不支持**非固定shape的图片输入即动态输入）。因此，**必须指定**`--input_shape`导出具有固定输入的PaddleSeg模型（FastDeploy会在预处理阶段，对原图进行resize操作）
- 在使用其他平台（如：Windows、Mac、Linux），在导出PaddleSeg模型模型时，可指定`--input_shape`参数（当想采用`onnxruntime`或`tensorrt`作为后端进行推理）。但是，若输入的预测图片尺寸并不固定，建议使用默认值即**不指定**该参数（同时采用Paddle Inference或者OpenVino作为后端进行推理）

 ## 下载预训练模型

-为了方便开发者的测试，下面提供了PaddleSeg导出的部分模型（导出方式为：**不指定**`--input_shape`，**指定**`--output_op none`），开发者可直接下载使用。
+为了方便开发者的测试，下面提供了PaddleSeg导出的部分模型
+- without-argmax导出方式为：**不指定**`--input_shape`，**指定**`--output_op none`
+- with-argmax导出方式为：**不指定**`--input_shape`，**指定**`--output_op argmax`
+
+开发者可直接下载使用。

 | 模型                                                               | 参数文件大小    |输入Shape |  mIoU | mIoU (flip) | mIoU (ms+flip) |
 |:---------------------------------------------------------------- |:----- |:----- | :----- | :----- | :----- |
-| [Unet-cityscapes](https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz) | 52MB | 1024x512 | 65.00% | 66.02% | 66.89% |
-| [PP-LiteSeg-T(STDC1)-cityscapes](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer.tgz)  | 31MB  | 1024x512 |77.04% | 77.73% | 77.46% |
-| [PP-HumanSegV1-Lite(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Lite_infer.tgz) |  543KB | 192x192 | 86.2% | - | - |
-| [PP-HumanSegV2-Lite(通用人像分割模型)](https://bj.bcebos.com/paddle2onnx/libs/PP_HumanSegV2_Lite_192x192_infer.tgz) |  12MB | 192x192 | 92.52% | - | - |
-| [PP-HumanSegV2-Mobile(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Mobile_192x192_infer.tgz) |  29MB | 192x192 | 93.13% | - | - |
-| [PP-HumanSegV1-Server(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Server_infer.tgz) |  103MB | 512x512 | 96.47% | - | - |
-| [Portait-PP-HumanSegV2_Lite(肖像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV2_Lite_256x144_infer.tgz) |  3.6M | 256x144 | 96.63% | - | - |
-| [FCN-HRNet-W18-cityscapes](https://bj.bcebos.com/paddlehub/fastdeploy/FCN_HRNet_W18_cityscapes_without_argmax_infer.tgz) |  37MB | 1024x512 | 78.97% | 79.49% | 79.74% |
-| [Deeplabv3-ResNet101-OS8-cityscapes](https://bj.bcebos.com/paddlehub/fastdeploy/Deeplabv3_ResNet101_OS8_cityscapes_without_argmax_infer.tgz) |  150MB | 1024x512 | 79.90% | 80.22% | 80.47% |
+| [Unet-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_with_argmax_infer.tgz) \| [Unet-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Unet_cityscapes_without_argmax_infer.tgz)  | 52MB | 1024x512 | 65.00% | 66.02% | 66.89% |
+| [PP-LiteSeg-B(STDC2)-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz) \| [PP-LiteSeg-B(STDC2)-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_without_argmax_infer.tgz) | 31MB  | 1024x512 | 79.04% |	79.52% | 79.85% |
+|[PP-HumanSegV1-Lite-with-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV1_Lite_with_argmax_infer.tgz) \| [PP-HumanSegV1-Lite-without-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Lite_infer.tgz) |  543KB | 192x192 | 86.2% | - | - |
+|[PP-HumanSegV2-Lite-with-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Lite_192x192_with_argmax_infer.tgz) \| [PP-HumanSegV2-Lite-without-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Lite_192x192_infer.tgz) |  12MB | 192x192 | 92.52% | - | - |
+| [PP-HumanSegV2-Mobile-with-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Mobile_192x192_with_argmax_infer.tgz) \| [PP-HumanSegV2-Mobile-without-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV2_Mobile_192x192_infer.tgz) |  29MB | 192x192 | 93.13% | - | - |
+|[PP-HumanSegV1-Server-with-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Server_with_argmax_infer.tgz) \| [PP-HumanSegV1-Server-without-argmax(通用人像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/PP_HumanSegV1_Server_infer.tgz) |  103MB | 512x512 | 96.47% | - | - |
+| [Portait-PP-HumanSegV2-Lite-with-argmax(肖像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV2_Lite_256x144_with_argmax_infer.tgz) \| [Portait-PP-HumanSegV2-Lite-without-argmax(肖像分割模型)](https://bj.bcebos.com/paddlehub/fastdeploy/Portrait_PP_HumanSegV2_Lite_256x144_infer.tgz) |  3.6M | 256x144 | 96.63% | - | - |
+| [FCN-HRNet-W18-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/FCN_HRNet_W18_cityscapes_with_argmax_infer.tgz) \| [FCN-HRNet-W18-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/FCN_HRNet_W18_cityscapes_without_argmax_infer.tgz)(暂时不支持ONNXRuntime的GPU推理) |  37MB | 1024x512 | 78.97% | 79.49% | 79.74% |
+| [Deeplabv3-ResNet101-OS8-cityscapes-with-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Deeplabv3_ResNet101_OS8_cityscapes_with_argmax_infer.tgz) \| [Deeplabv3-ResNet101-OS8-cityscapes-without-argmax](https://bj.bcebos.com/paddlehub/fastdeploy/Deeplabv3_ResNet101_OS8_cityscapes_without_argmax_infer.tgz) |  150MB | 1024x512 | 79.90% | 80.22% | 80.47% |

 ## 详细部署文档

--- a/examples/vision/segmentation/paddleseg/quantize/cpp/infer.cc
+++ b/examples/vision/segmentation/paddleseg/quantize/cpp/infer.cc
@@ -57,7 +57,7 @@ void InitAndInfer(const std::string& model_dir, const std::string& image_file,

 //   fastdeploy::RuntimeOption option;
 //   option.UseCpu();
-//   option.UsePaddleBackend();
+//   option.UsePaddleInferBackend();
 //   std::cout<<"Xyy-debug, enable Paddle Backend==!";

 //   std::string model_dir = argv[1];
@@ -89,7 +89,7 @@ int main(int argc, char* argv[]) {
    std::cout<<"Use ORT!"<<std::endl;
  } else if (flag == 1) {
    option.UseCpu();
-    option.UsePaddleBackend();
+    option.UsePaddleInferBackend();
    std::cout<<"Use PP!"<<std::endl;
  }

--- a/examples/vision/segmentation/paddleseg/quantize/python/infer.py
+++ b/examples/vision/segmentation/paddleseg/quantize/python/infer.py
@@ -52,7 +52,7 @@ def build_option(args):
    elif args.backend.lower() == "ort":
        option.use_ort_backend()
    elif args.backend.lower() == "paddle":
-        option.use_paddle_backend()
+        option.use_paddle_infer_backend()
    elif args.backend.lower() == "openvino":
        assert args.device.lower(
        ) == "cpu", "OpenVINO backend require inference on device CPU."
--- a/java/android/app/src/main/java/com/baidu/paddle/fastdeploy/app/examples/segmentation/SegmentationMainActivity.java
+++ b/java/android/app/src/main/java/com/baidu/paddle/fastdeploy/app/examples/segmentation/SegmentationMainActivity.java
@@ -250,12 +250,10 @@ public class SegmentationMainActivity extends Activity implements View.OnClickLi
        }

        boolean modified = false;
-
-        long tc = System.currentTimeMillis();
-
        SegmentationResult result = new SegmentationResult();
        result.setCxxBufferFlag(true);

+        long tc = System.currentTimeMillis();
        predictor.predict(ARGB8888ImageBitmap, result);
        timeElapsed += (System.currentTimeMillis() - tc);

--- a/java/android/fastdeploy/src/main/cpp/fastdeploy_jni/vision/results_jni.cc
+++ b/java/android/fastdeploy/src/main/cpp/fastdeploy_jni/vision/results_jni.cc
@@ -859,6 +859,13 @@ bool AllocateSegmentationResultFromJava(
    return false;
  }

+  // mInitialized boolean:         Z
+  jboolean j_seg_initialized =
+      env->GetBooleanField(j_seg_result_obj, j_seg_initialized_id_cc);
+  if (j_seg_initialized == JNI_FALSE) {
+    return false;
+  }
+
  // If 'mEnableCxxBuffer' set as true, then, we only Allocate from
  // cxx context to cxx result. Some users may want to use this
  // method to boost the performance of segmentation.
@@ -872,32 +879,23 @@ bool AllocateSegmentationResultFromJava(
    }
    // Allocate from cxx context to cxx result
    auto c_cxx_buffer = reinterpret_cast<vision::SegmentationResult *>(j_cxx_buffer);
-    // TODO: May use 'swap' to exchange the administrative privileges ?
-    // c_result_ptr->shape.swap(c_cxx_buffer->shape);
-    // c_result_ptr->label_map.swap(c_cxx_buffer->label_map);
-    // c_result_ptr->contain_score_map = c_cxx_buffer->contain_score_map;
-    // if (c_cxx_buffer->contain_score_map) {
-    //   c_result_ptr->score_map.swap(c_cxx_buffer->score_map);
-    // }
-    c_result_ptr->shape.assign(
-        c_cxx_buffer->shape.begin(), c_cxx_buffer->shape.end());
-    c_result_ptr->label_map.assign(
-        c_cxx_buffer->label_map.begin(), c_cxx_buffer->label_map.end());
+
+    // (*c_result_ptr) = std::move(*c_cxx_buffer);
+    c_result_ptr->shape = c_cxx_buffer->shape;
+    const size_t label_len = c_cxx_buffer->label_map.size();
+    c_result_ptr->label_map.resize(label_len);
+    std::memcpy(c_result_ptr->label_map.data(), c_cxx_buffer->label_map.data(),
+                label_len * sizeof(uint8_t));
    c_result_ptr->contain_score_map = c_cxx_buffer->contain_score_map;
    if (c_cxx_buffer->contain_score_map) {
-      c_result_ptr->score_map.assign(
-          c_cxx_buffer->score_map.begin(), c_cxx_buffer->score_map.end());
+      const size_t score_len = c_cxx_buffer->score_map.size();
+      c_result_ptr->score_map.resize(score_len);
+      std::memcpy(c_result_ptr->score_map.data(), c_cxx_buffer->score_map.data(),
+                  score_len * sizeof(float));
    }
    return true;
  }

-  // mInitialized boolean:         Z
-  jboolean j_seg_initialized =
-      env->GetBooleanField(j_seg_result_obj, j_seg_initialized_id_cc);
-  if (j_seg_initialized == JNI_FALSE) {
-    return false;
-  }
-
  jbyteArray j_seg_label_map_byte_arr = reinterpret_cast<jbyteArray>(
      env->GetObjectField(j_seg_result_obj, j_seg_label_map_id_cc));
  jlongArray j_seg_shape_long_arr = reinterpret_cast<jlongArray>(
--- a/java/android/fastdeploy/src/main/cpp/fastdeploy_jni/vision/segmentation/paddleseg_model_jni.cc
+++ b/java/android/fastdeploy/src/main/cpp/fastdeploy_jni/vision/segmentation/paddleseg_model_jni.cc
@@ -49,7 +49,7 @@ Java_com_baidu_paddle_fastdeploy_vision_segmentation_PaddleSegModel_bindNative(
  jboolean j_is_vertical_screen = env->GetBooleanField(
      thiz, j_is_vertical_screen_id);
  bool c_is_vertical_screen = static_cast<jboolean>(j_is_vertical_screen);
-  c_model_ptr->is_vertical_screen = c_is_vertical_screen;
+  c_model_ptr->GetPreprocessor().SetIsVerticalScreen(c_is_vertical_screen);
  env->DeleteLocalRef(j_ppseg_clazz);

  vision::EnableFlyCV();
--- a/java/android/fastdeploy/src/main/cpp/fastdeploy_jni/vision/visualize_jni.cc
+++ b/java/android/fastdeploy/src/main/cpp/fastdeploy_jni/vision/visualize_jni.cc
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include <jni.h>  // NOLINT
+#include "fastdeploy_jni/perf_jni.h"  // NOLINT
 #include "fastdeploy_jni/bitmap_jni.h"  // NOLINT
 #include "fastdeploy_jni/convert_jni.h" // NOLINT
 #include "fastdeploy_jni/vision/results_jni.h"  // NOLINT
@@ -19,16 +20,21 @@
 namespace fni = fastdeploy::jni;
 namespace vision = fastdeploy::vision;

-#ifdef __cplusplus
-extern "C" {
-#endif
+namespace fastdeploy {
+namespace jni {

-/// VisClassification
-JNIEXPORT jboolean JNICALL
-Java_com_baidu_paddle_fastdeploy_vision_Visualize_visClassificationNative(
-    JNIEnv *env, jclass clazz, jobject argb8888_bitmap,
+/// Some visualize helpers.
+jboolean VisClassificationFromJava(
+    JNIEnv *env, jobject argb8888_bitmap,
    jobject result, jfloat score_threshold, jfloat font_size,
    jobjectArray labels) {
+  const jclass j_cls_result_clazz = env->FindClass(
+      "com/baidu/paddle/fastdeploy/vision/ClassifyResult");
+  if (!env->IsInstanceOf(result, j_cls_result_clazz)) {
+    env->DeleteLocalRef(j_cls_result_clazz);
+    return JNI_FALSE;
+  }
+  env->DeleteLocalRef(j_cls_result_clazz);
  vision::ClassifyResult c_result;
  if (!fni::AllocateCxxResultFromJava(
      env, result, reinterpret_cast<void *>(&c_result),
@@ -56,19 +62,23 @@ Java_com_baidu_paddle_fastdeploy_vision_Visualize_visClassificationNative(
  return JNI_TRUE;
 }

-/// VisDetection
-JNIEXPORT jboolean JNICALL
-Java_com_baidu_paddle_fastdeploy_vision_Visualize_visDetectionNative(
-    JNIEnv *env, jclass clazz, jobject argb8888_bitmap,
+jboolean VisDetectionFromJava(
+    JNIEnv *env, jobject argb8888_bitmap,
    jobject result, jfloat score_threshold, jint line_size,
    jfloat font_size, jobjectArray labels) {
+  const jclass j_det_result_clazz = env->FindClass(
+      "com/baidu/paddle/fastdeploy/vision/DetectionResult");
+  if (!env->IsInstanceOf(result, j_det_result_clazz)) {
+    env->DeleteLocalRef(j_det_result_clazz);
+    return JNI_FALSE;
+  }
+  env->DeleteLocalRef(j_det_result_clazz);
  vision::DetectionResult c_result;
  if (!fni::AllocateCxxResultFromJava(
      env, result, reinterpret_cast<void *>(&c_result),
      vision::ResultType::DETECTION)) {
    return JNI_FALSE;
  }
-
  // Get labels from Java [n]
  auto c_labels = fni::ConvertTo<std::vector<std::string>>(env, labels);

@@ -90,11 +100,15 @@ Java_com_baidu_paddle_fastdeploy_vision_Visualize_visDetectionNative(
  return JNI_TRUE;
 }

-/// VisOcr
-JNIEXPORT jboolean JNICALL
-Java_com_baidu_paddle_fastdeploy_vision_Visualize_visOcrNative(
-    JNIEnv *env, jclass clazz, jobject argb8888_bitmap,
-    jobject result) {
+jboolean VisOcrFromJava(
+    JNIEnv *env, jobject argb8888_bitmap, jobject result) {
+  const jclass j_ocr_result_clazz = env->FindClass(
+      "com/baidu/paddle/fastdeploy/vision/OCRResult");
+  if (!env->IsInstanceOf(result, j_ocr_result_clazz)) {
+    env->DeleteLocalRef(j_ocr_result_clazz);
+    return JNI_FALSE;
+  }
+  env->DeleteLocalRef(j_ocr_result_clazz);
  vision::OCRResult c_result;
  if (!fni::AllocateCxxResultFromJava(
      env, result, reinterpret_cast<void *>(&c_result),
@@ -113,17 +127,22 @@ Java_com_baidu_paddle_fastdeploy_vision_Visualize_visOcrNative(
  return JNI_TRUE;
 }

-JNIEXPORT jboolean JNICALL
-Java_com_baidu_paddle_fastdeploy_vision_Visualize_visSegmentationNative(
-    JNIEnv *env, jclass clazz, jobject argb8888_bitmap,
-    jobject result, jfloat weight) {
+jboolean VisSegmentationFromJava(
+    JNIEnv *env, jobject argb8888_bitmap, jobject result, jfloat weight) {
+  const jclass j_seg_result_clazz = env->FindClass(
+      "com/baidu/paddle/fastdeploy/vision/SegmentationResult");
+  if (!env->IsInstanceOf(result, j_seg_result_clazz)) {
+    env->DeleteLocalRef(j_seg_result_clazz);
+    return JNI_FALSE;
+  }
+  env->DeleteLocalRef(j_seg_result_clazz);
+  // Allocate from Java result, may cost some times.
  vision::SegmentationResult c_result;
  if (!fni::AllocateCxxResultFromJava(
      env, result, reinterpret_cast<void *>(&c_result),
      vision::ResultType::SEGMENTATION)) {
    return JNI_FALSE;
  }
-
  cv::Mat c_bgr;
  if (!fni::ARGB888Bitmap2BGR(env, argb8888_bitmap, &c_bgr)) {
    return JNI_FALSE;
@@ -135,17 +154,66 @@ Java_com_baidu_paddle_fastdeploy_vision_Visualize_visSegmentationNative(
  return JNI_TRUE;
 }

-JNIEXPORT jboolean JNICALL
-Java_com_baidu_paddle_fastdeploy_vision_Visualize_visFaceDetectionNative(
-    JNIEnv *env, jclass clazz, jobject argb8888_bitmap,
+jboolean VisSegmentationFromCxxBuffer(
+    JNIEnv *env, jobject argb8888_bitmap, jobject result, jfloat weight) {
+  const jclass j_seg_result_clazz = env->FindClass(
+      "com/baidu/paddle/fastdeploy/vision/SegmentationResult");
+  if (!env->IsInstanceOf(result, j_seg_result_clazz)) {
+    env->DeleteLocalRef(j_seg_result_clazz);
+    return JNI_FALSE;
+  }
+  const jfieldID j_enable_cxx_buffer_id = env->GetFieldID(
+      j_seg_result_clazz, "mEnableCxxBuffer", "Z");
+  const jfieldID  j_cxx_buffer_id = env->GetFieldID(
+      j_seg_result_clazz, "mCxxBuffer", "J");
+  const jfieldID j_seg_initialized_id = env->GetFieldID(
+      j_seg_result_clazz, "mInitialized", "Z");
+  jboolean j_enable_cxx_buffer =
+      env->GetBooleanField(result, j_enable_cxx_buffer_id);
+  jboolean j_seg_initialized =
+      env->GetBooleanField(result, j_seg_initialized_id);
+
+  env->DeleteLocalRef(j_seg_result_clazz);
+  if (j_seg_initialized == JNI_FALSE) {
+    return JNI_FALSE;
+  }
+  // Use CxxBuffer directly without any copy.
+  if (j_enable_cxx_buffer == JNI_TRUE) {
+    jlong j_cxx_buffer = env->GetLongField(result, j_cxx_buffer_id);
+    if (j_cxx_buffer == 0) {
+      return JNI_FALSE;
+    }
+    // Allocate from cxx context to cxx result
+    auto c_cxx_buffer = reinterpret_cast<vision::SegmentationResult *>(j_cxx_buffer);
+    cv::Mat c_bgr;
+    if (!fni::ARGB888Bitmap2BGR(env, argb8888_bitmap, &c_bgr)) {
+      return JNI_FALSE;
+    }
+    auto c_vis_im = vision::VisSegmentation(c_bgr, *c_cxx_buffer, weight);
+    if (!fni::BGR2ARGB888Bitmap(env, argb8888_bitmap, c_vis_im)) {
+      return JNI_FALSE;
+    }
+    return JNI_TRUE;
+  }
+  return JNI_FALSE;
+}
+
+jboolean VisFaceDetectionFromJava(
+    JNIEnv *env, jobject argb8888_bitmap,
    jobject result, jint line_size, jfloat font_size) {
+  const jclass j_face_det_result_clazz = env->FindClass(
+      "com/baidu/paddle/fastdeploy/vision/FaceDetectionResult");
+  if (!env->IsInstanceOf(result, j_face_det_result_clazz)) {
+    env->DeleteLocalRef(j_face_det_result_clazz);
+    return JNI_FALSE;
+  }
+  env->DeleteLocalRef(j_face_det_result_clazz);
  vision::FaceDetectionResult c_result;
  if (!fni::AllocateCxxResultFromJava(
      env, result, reinterpret_cast<void *>(&c_result),
      vision::ResultType::FACE_DETECTION)) {
    return JNI_FALSE;
  }
-
  cv::Mat c_bgr;
  if (!fni::ARGB888Bitmap2BGR(env, argb8888_bitmap, &c_bgr)) {
    return JNI_FALSE;
@@ -157,6 +225,64 @@ Java_com_baidu_paddle_fastdeploy_vision_Visualize_visFaceDetectionNative(
  return JNI_TRUE;
 }

+}  // jni
+}  // fastdeploy
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/// VisClassification
+JNIEXPORT jboolean JNICALL
+Java_com_baidu_paddle_fastdeploy_vision_Visualize_visClassificationNative(
+    JNIEnv *env, jclass clazz, jobject argb8888_bitmap,
+    jobject result, jfloat score_threshold, jfloat font_size,
+    jobjectArray labels) {
+  return fni::VisClassificationFromJava(env, argb8888_bitmap, result,
+                                        score_threshold, font_size, labels);
+}
+
+/// VisDetection
+JNIEXPORT jboolean JNICALL
+Java_com_baidu_paddle_fastdeploy_vision_Visualize_visDetectionNative(
+    JNIEnv *env, jclass clazz, jobject argb8888_bitmap,
+    jobject result, jfloat score_threshold, jint line_size,
+    jfloat font_size, jobjectArray labels) {
+  return fni::VisDetectionFromJava(env, argb8888_bitmap, result, score_threshold,
+                                   line_size, font_size, labels);
+}
+
+/// VisOcr
+JNIEXPORT jboolean JNICALL
+Java_com_baidu_paddle_fastdeploy_vision_Visualize_visOcrNative(
+    JNIEnv *env, jclass clazz, jobject argb8888_bitmap,
+    jobject result) {
+  return fni::VisOcrFromJava(env, argb8888_bitmap, result);
+}
+
+/// VisSegmentation
+JNIEXPORT jboolean JNICALL
+Java_com_baidu_paddle_fastdeploy_vision_Visualize_visSegmentationNative(
+    JNIEnv *env, jclass clazz, jobject argb8888_bitmap,
+    jobject result, jfloat weight) {
+  // First, try visualize segmentation result via CxxBuffer.
+  if (fni::VisSegmentationFromCxxBuffer(
+      env, argb8888_bitmap, result, weight)) {
+    return JNI_TRUE;
+  }
+  // Then, try visualize segmentation from Java result(may cost some times).
+  return fni::VisSegmentationFromJava(env, argb8888_bitmap, result, weight);
+}
+
+/// VisFaceDetection
+JNIEXPORT jboolean JNICALL
+Java_com_baidu_paddle_fastdeploy_vision_Visualize_visFaceDetectionNative(
+    JNIEnv *env, jclass clazz, jobject argb8888_bitmap,
+    jobject result, jint line_size, jfloat font_size) {
+  return fni::VisFaceDetectionFromJava(env, argb8888_bitmap, result,
+                                       line_size, font_size);
+}
+
 #ifdef __cplusplus
 }
 #endif
--- a/serving/README_CN.md
+++ b/serving/README_CN.md
@@ -32,6 +32,7 @@ docker pull paddlepaddle/fastdeploy:0.6.0-gpu-cuda11.4-trt8.4-21.10
 ## 其它文档
 - [模型仓库目录说明](docs/zh_CN/model_repository.md) (说明如何准备模型仓库目录)
 - [模型配置说明](docs/zh_CN/model_configuration.md)  (说明runtime的配置选项)
+- [服务化部署示例](docs/zh_CN/demo.md) (服务化部署示例)


 ### 服务化部署示例
--- a/serving/README_EN.md
+++ b/serving/README_EN.md
@@ -39,6 +39,7 @@ Users can also compile the image by themselves according to their own needs, ref

 - [How to Prepare Serving Model Repository](docs/zh_CN/model_repository.md)
 - [Serving Deployment Configuration for Runtime](docs/zh_CN/model_configuration.md)
+- [Demo of Serving Deployment](docs/zh_CN/demo.md)


 ### Serving Deployment Demo
--- a/serving/docs/zh_CN/demo.md
+++ b/serving/docs/zh_CN/demo.md
@@ -6,30 +6,30 @@
 ## 基本原理介绍
 像常见的深度学习模型一样，yolov5完整的运行过程包含前处理+模型预测+后处理三个阶段。

-在Triton中，将前处理、模型预测、后处理均视为1个**Triton-Model**，每个Triton-Model的**config.pbtxt**配置文件中均描述了其输入数据格式、输出数据格式、Triton-Model的类型（即config.pbtxt中的**backend**或**platform**字段）、以及其他的一些配置选项。
+在FastDeployServer中，将前处理、模型预测、后处理均视为1个**模型服务**，每个模型服务的**config.pbtxt**配置文件中均描述了其输入数据格式、输出数据格式、模型服务的类型（即config.pbtxt中的**backend**或**platform**字段）、以及其他的一些配置选项。

-前处理和后处理一般是运行一段Python代码，为了方便后续描述，我们称之为**Python-Triton-Model**，其config.pbtxt配置文件中的`backend: "python"`。
+前处理和后处理一般是运行一段Python代码，为了方便后续描述，我们称之为**Python模型服务**，其config.pbtxt配置文件中的`backend: "python"`。

-模型预测阶段是深度学习模型预测引擎（如ONNXRuntime、Paddle、TRT、FastDeploy）加载用户提供的深度学习模型文件来运行模型预测，我们称之为**Runtime-Triton-Model**，其config.pbtxt配置文件中的`backend: "fastdeploy"`。
+模型预测阶段是深度学习模型预测引擎加载用户提供的深度学习模型文件来运行模型预测，我们称之为**Runtime模型服务**，其config.pbtxt配置文件中的`backend: "fastdeploy"`。

 根据用户提供的模型类型的不同，可以在**optimization**字段中设置使用CPU、GPU、TRT、ONNX等配置，配置方法参考[服务化部署配置说明](model_configuration.md)。

-除此之外，还需要一个**Ensemble-Triton-Model**来将前处理、模型预测、后处理3个**Triton-Model**组合为1个整体，并描述3个Triton-Model之间的关联关系。例如，前处理的输出与模型预测的输入之间的对应关系，多个Triton-Model的调用顺序、串并联关系等，**Ensemble-Triton-Model**的config.pbtxt配置文件中的`platform: "ensemble"`。
+除此之外，还需要一个**Ensemble模型服务**来将前处理、模型预测、后处理3个**模型服务**组合为1个整体，并描述3个模型服务之间的关联关系。例如，前处理的输出与模型预测的输入之间的对应关系，多个模型服务的调用顺序、串并联关系等，**Ensemble模型服务**的config.pbtxt配置文件中的`platform: "ensemble"`。

-在本文的yolov5服务化示例中，**Ensemble-Triton-Model**将前处理、模型预测、后处理3个**Triton-Model**串联组合为1个整体，整体的结构如下图所示。
+在本文的yolov5服务化示例中，**Ensemble模型服务**将前处理、模型预测、后处理3个**模型服务**串联组合为1个整体，整体的结构如下图所示。
 <p align="center">
    <br>
 <img src='https://user-images.githubusercontent.com/35565423/204268774-7b2f6b4a-50b1-4962-ade9-cd10cf3897ab.png'>
    <br>
 </p>
  
-对于像[OCR这样多个深度学习模型的组合模型](../../../examples/vision/ocr/PP-OCRv3/serving)，或者[流式输入输出的深度学习模型](../../../examples/audio/pp-tts/serving)，其**Ensemble-Triton-Model**会更加复杂。
+对于像[OCR这样多个深度学习模型的组合模型](../../../examples/vision/ocr/PP-OCRv3/serving)，或者[流式输入输出的深度学习模型](../../../examples/audio/pp-tts/serving)，其**Ensemble模型服务**的配置会更加复杂。
  
  
-## Python-Triton-Model简介
-我们以[yolov5前处理](../../../examples/vision/detection/yolov5/serving/models/preprocess/1/model.py)为例，简单介绍一下编写Python-Triton-Model中的注意事项。
+## Python模型服务简介
+我们以[yolov5前处理](../../../examples/vision/detection/yolov5/serving/models/preprocess/1/model.py)为例，简单介绍一下编写Python模型服务中的注意事项。

-Python-Triton-Model代码model.py的整体结构框架如下所示。Python代码的核心是1个`class TritonPythonModel`类，类中包含3个成员函数`initialize`、`execute`、`finalize`，类名、成员函数名、函数输入变量都不允许更改。在此基础上，用户可以自行编写代码。
+Python模型服务的代码model.py的整体结构框架如下所示。Python代码的核心是1个`class TritonPythonModel`类，类中包含3个成员函数`initialize`、`execute`、`finalize`，类名、成员函数名、函数输入变量都不允许更改。在此基础上，用户可以自行编写代码。

 ```
 import json
@@ -97,9 +97,9 @@ class TritonPythonModel:
        #你的析构代码，finalize只在模型卸载的时候被调用1次
 ```

-`initialize`中一般放置初始化的一些操作，该函数只在Python-Triton-Model被加载的时候执行1次。
+`initialize`中一般放置初始化的一些操作，该函数只在Python模型服务被加载的时候执行1次。

-`finalize`中一般放置一些析构释放的操作，该函数只在Python-Triton-Model被卸载的时候执行1次。
+`finalize`中一般放置一些析构释放的操作，该函数只在Python模型服务被卸载的时候执行1次。

 `execute`中放置用户需要的前后处理的逻辑，该函数在每次服务端收到客户端请求的时候被执行1次。

@@ -118,18 +118,18 @@ fd.vision.detection.YOLOv5.preprocess(data)
 动态合并Batch的原理如下图所示。当用户请求request并发量较大，但GPU利用率较小时，可以通过将不同用户的request组合为1个大的Batch进行模型预测，从而提高服务的吞吐性能。
 <p align="center">
    <br>
-<img src='https://user-images.githubusercontent.com/35565423/204268829-797112e1-bf75-486b-8931-07b44f659578.png'>
+<img src='https://user-images.githubusercontent.com/35565423/204285444-1f9aaf24-05c2-4aae-bbd5-47dc3582dc01.png'>
    <br>
 </p>

-开启动态合并Batch功能非常简单，仅需在config.pbtxt结尾处，增加`dynamic_batching{}`字段即可。
+开启动态合并Batch功能非常简单，仅需在config.pbtxt结尾处，增加`dynamic_batching{}`字段即可，但最大的合并Batch不会超过`max_batch_size`。

-**注意**：`ensemble_scheduling`字段与`dynamic_batching`字段不可共存，即对于**Ensemble-Triton-Model**不存在动态合并Batch功能，这也可以理解，因为**Ensemble-Triton-Model**本身仅仅是多个Triton-Model的组合。
+**注意**：`ensemble_scheduling`字段与`dynamic_batching`字段不可共存，即对于**Ensemble模型服务**不存在动态合并Batch功能，这也可以理解，因为**Ensemble模型服务**本身仅仅是多个模型服务的组合。

 ## 多模型实例
-多模型实例的原理如下图所示。当前后处理（通常不支持Batch）成为整个服务的性能瓶颈时，能通过增加多个前后处理的**Python-Triton-Model**实例，来提高整个服务的时延性能。
+多模型实例的原理如下图所示。当前后处理（通常不支持Batch）成为整个服务的性能瓶颈时，能通过增加多个前后处理的**Python模型服务**实例，来提高整个服务的时延性能。

-当然也可以开启多个**Runtime-Triton-Model**模型实例，来提升GPU利用率。
+当然也可以开启多个**Runtime模型服务**实例，来提升GPU利用率。
 <p align="center">
    <br>
 <img src='https://user-images.githubusercontent.com/35565423/204268809-6ea95a9f-e014-468a-8597-98b67ebc7381.png'>
--- a/tests/release_task/infer_ppyoloe.cc
+++ b/tests/release_task/infer_ppyoloe.cc
@@ -61,16 +61,15 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  }

  auto im = cv::imread(image_file);
-  auto im_bak = im.clone();

  fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
+  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }

  std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res, 0.5);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
  cv::imwrite("vis_result.jpg", vis_im);
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
@@ -109,16 +108,17 @@ void GpuInfer(const std::string& model_dir, const std::string& image_file) {
  }

  auto im = cv::imread(image_file);
-  auto im_bak = im.clone();

  fastdeploy::vision::DetectionResult res;
-  if (!model.Predict(&im, &res)) {
-    std::cerr << "Failed to predict." << std::endl;
-    return;
+  for (size_t i = 0; i < 10; ++i) {
+    if (!model.Predict(im, &res)) {
+      std::cerr << "Failed to predict." << std::endl;
+      return;
+    }
  }

  std::cout << res.Str() << std::endl;
-  auto vis_im = fastdeploy::vision::Visualize::VisDetection(im_bak, res, 0.5);
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
  cv::imwrite("vis_result.jpg", vis_im);
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }
--- a/tests/release_task/infer_ppyoloe.py
+++ b/tests/release_task/infer_ppyoloe.py
@@ -70,7 +70,8 @@ model = fd.vision.detection.PPYOLOE(

 # 预测图片检测结果
 im = cv2.imread(args.image)
-result = model.predict(im.copy())
+for i in range(10):
+    result = model.predict(im)
 print(result)

 # 预测结果可视化