[Backend]Add stable_diffusion and detection models support for KunlunXin XPU (#954)

* [FlyCV] Bump up FlyCV -> official release 1.0.0 * add valid_xpu for detection * add paddledetection model support for xpu * support all detection model in c++ and python * fix code * add python stable_diffusion support Co-authored-by: DefTruth <qiustudent_r@163.com> Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
2025-10-04 16:22:57 +08:00 · 2022-12-26 16:22:52 +08:00
parent 8a986c23ec
commit 1911002b90
42 changed files with 857 additions and 38 deletions
--- a/examples/multimodal/stable_diffusion/README.md
+++ b/examples/multimodal/stable_diffusion/README.md
@@ -37,7 +37,11 @@ python infer.py --model_dir stable-diffusion-v1-4/ --scheduler "pndm" --backend
 如果使用stable-diffusion-v1-5模型，则可执行以下命令完成推理：

 ```
+# GPU上推理
 python infer.py --model_dir stable-diffusion-v1-5/ --scheduler "euler_ancestral" --backend paddle
+
+# 在昆仑芯XPU上推理
+python infer.py --model_dir stable-diffusion-v1-5/ --scheduler "euler_ancestral" --backend paddle-xpu
 ```

 #### 参数说明
@@ -48,7 +52,7 @@ python infer.py --model_dir stable-diffusion-v1-5/ --scheduler "euler_ancestral"
 |----------|--------------|
 | --model_dir | 导出后模型的目录。 |
 | --model_format | 模型格式。默认为`'paddle'`，可选列表：`['paddle', 'onnx']`。 |
-| --backend | 推理引擎后端。默认为`paddle`，可选列表：`['onnx_runtime', 'paddle']`，当模型格式为`onnx`时，可选列表为`['onnx_runtime']`。 |
+| --backend | 推理引擎后端。默认为`paddle`，可选列表：`['onnx_runtime', 'paddle', 'paddle-xpu']`，当模型格式为`onnx`时，可选列表为`['onnx_runtime']`。 |
 | --scheduler | StableDiffusion 模型的scheduler。默认为`'pndm'`。可选列表：`['pndm', 'euler_ancestral']`，StableDiffusio模型对应的scheduler可参考[ppdiffuser模型列表](https://github.com/PaddlePaddle/PaddleNLP/tree/develop/ppdiffusers/examples/textual_inversion)。|
 | --unet_model_prefix | UNet模型前缀。默认为`unet`。 |
 | --vae_model_prefix | VAE模型前缀。默认为`vae_decoder`。 |
--- a/examples/multimodal/stable_diffusion/infer.py
+++ b/examples/multimodal/stable_diffusion/infer.py
@@ -69,10 +69,7 @@ def parse_arguments():
        type=str,
        default='paddle',
        # Note(zhoushunjie): Will support 'tensorrt', 'paddle-tensorrt' soon.
-        choices=[
-            'onnx_runtime',
-            'paddle',
-        ],
+        choices=['onnx_runtime', 'paddle', 'paddle-xpu'],
        help="The inference runtime backend of unet model and text encoder model."
    )
    parser.add_argument(
@@ -178,6 +175,24 @@ def create_trt_runtime(model_dir,
    return fd.Runtime(option)


+def create_xpu_runtime(model_dir, model_prefix, device_id=0):
+    option = fd.RuntimeOption()
+    option.use_xpu(
+        device_id,
+        l3_workspace_size=(64 * 1024 * 1024 - 4 * 1024),
+        locked=False,
+        autotune=False,
+        autotune_file="",
+        precision="int16",
+        adaptive_seqlen=True,
+        enable_multi_stream=True)
+    option.use_paddle_lite_backend()
+    model_file = os.path.join(model_dir, model_prefix, "inference.pdmodel")
+    params_file = os.path.join(model_dir, model_prefix, "inference.pdiparams")
+    option.set_model_path(model_file, params_file)
+    return fd.Runtime(option)
+
+
 def get_scheduler(args):
    if args.scheduler == "pndm":
        scheduler = PNDMScheduler(
@@ -291,6 +306,20 @@ if __name__ == "__main__":
            dynamic_shape=unet_dynamic_shape,
            device_id=args.device_id)
        print(f"Spend {time.time() - start : .2f} s to load unet model.")
+    elif args.backend == "paddle-xpu":
+        print("=== build text_encoder_runtime")
+        text_encoder_runtime = create_xpu_runtime(
+            args.model_dir,
+            args.text_encoder_model_prefix,
+            device_id=args.device_id)
+        print("=== build vae_decoder_runtime")
+        vae_decoder_runtime = create_xpu_runtime(
+            args.model_dir, args.vae_model_prefix, device_id=args.device_id)
+        print("=== build unet_runtime")
+        start = time.time()
+        unet_runtime = create_xpu_runtime(
+            args.model_dir, args.unet_model_prefix, device_id=args.device_id)
+        print(f"Spend {time.time() - start : .2f} s to load unet model.")
    pipe = StableDiffusionFastDeployPipeline(
        vae_decoder_runtime=vae_decoder_runtime,
        text_encoder_runtime=text_encoder_runtime,
--- a/examples/vision/detection/paddledetection/cpp/README.md
+++ b/examples/vision/detection/paddledetection/cpp/README.md
@@ -32,6 +32,8 @@ tar xvf ppyoloe_crn_l_300e_coco.tgz
 ./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 1
 # GPU上TensorRT推理
 ./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 2
+# 昆仑芯XPU推理
+./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 3
 ```

 以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:  
--- a/examples/vision/detection/paddledetection/cpp/infer_faster_rcnn.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_faster_rcnn.cc
@@ -47,6 +47,33 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

+void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "infer_cfg.yml";
+  auto option = fastdeploy::RuntimeOption();
+  option.UseXpu(0, 0, false, false);
+  auto model = fastdeploy::vision::detection::FasterRCNN(
+      model_file, params_file, config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << res.Str() << std::endl;
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
 void GpuInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
@@ -82,7 +109,7 @@ int main(int argc, char* argv[]) {
           "e.g ./infer_model ./faster_rcnn_r50_vd_fpn_2x_coco ./test.jpeg 0"
        << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu."
+                 "with gpu; 2: run with xpu."
              << std::endl;
    return -1;
  }
@@ -91,6 +118,8 @@ int main(int argc, char* argv[]) {
    CpuInfer(argv[1], argv[2]);
  } else if (std::atoi(argv[3]) == 1) {
    GpuInfer(argv[1], argv[2]);
+  } else if (std::atoi(argv[3]) == 2) {
+    XpuInfer(argv[1], argv[2]);
  }
  return 0;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_mask_rcnn.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_mask_rcnn.cc
@@ -47,6 +47,33 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

+void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "infer_cfg.yml";
+  auto option = fastdeploy::RuntimeOption();
+  option.UseXpu(0, 0, false, false);
+  auto model = fastdeploy::vision::detection::MaskRCNN(model_file, params_file,
+                                                       config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << res.Str() << std::endl;
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
 void GpuInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
@@ -82,7 +109,7 @@ int main(int argc, char* argv[]) {
           "e.g ./infer_model ./mask_rcnn_r50_1x_coco/ ./test.jpeg 0"
        << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu."
+                 "with gpu; 2: run with xpu."
              << std::endl;
    return -1;
  }
@@ -92,6 +119,8 @@ int main(int argc, char* argv[]) {
  } else if (std::atoi(argv[3]) == 1) {
    GpuInfer(argv[1], argv[2]);
  } else if (std::atoi(argv[3]) == 2) {
+    XpuInfer(argv[1], argv[2]);
+  } else if (std::atoi(argv[3]) == 3) {
    std::cout
        << "Backend::TRT has not been supported yet, will skip this inference."
        << std::endl;
--- a/examples/vision/detection/paddledetection/cpp/infer_picodet.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_picodet.cc
@@ -47,6 +47,33 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

+void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "infer_cfg.yml";
+  auto option = fastdeploy::RuntimeOption();
+  option.UseXpu();
+  auto model = fastdeploy::vision::detection::PicoDet(model_file, params_file,
+                                                      config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << res.Str() << std::endl;
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
 void GpuInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
@@ -111,7 +138,7 @@ int main(int argc, char* argv[]) {
           "e.g ./infer_model ./picodet_model_dir ./test.jpeg 0"
        << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu; 2: run with gpu and use tensorrt backend."
+                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run with xpu."
              << std::endl;
    return -1;
  }
@@ -122,6 +149,8 @@ int main(int argc, char* argv[]) {
    GpuInfer(argv[1], argv[2]);
  } else if (std::atoi(argv[3]) == 2) {
    TrtInfer(argv[1], argv[2]);
+  }  else if (std::atoi(argv[3]) == 3) {
+    XpuInfer(argv[1], argv[2]);
  }
  return 0;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_ppyolo.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_ppyolo.cc
@@ -47,6 +47,33 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

+void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "infer_cfg.yml";
+  auto option = fastdeploy::RuntimeOption();
+  option.UseXpu();
+  auto model = fastdeploy::vision::detection::PPYOLO(model_file, params_file,
+                                                     config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << res.Str() << std::endl;
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
 void GpuInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
@@ -82,7 +109,7 @@ int main(int argc, char* argv[]) {
           "e.g ./infer_model ./ppyolo_dirname ./test.jpeg 0"
        << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu."
+                 "with gpu; 2: run with xpu."
              << std::endl;
    return -1;
  }
@@ -91,6 +118,8 @@ int main(int argc, char* argv[]) {
    CpuInfer(argv[1], argv[2]);
  } else if (std::atoi(argv[3]) == 1) {
    GpuInfer(argv[1], argv[2]);
+  } else if (std::atoi(argv[3]) == 2) {
+    XpuInfer(argv[1], argv[2]);
  }
  return 0;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_ppyoloe.cc
@@ -47,6 +47,33 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

+void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "infer_cfg.yml";
+  auto option = fastdeploy::RuntimeOption();
+  option.UseXpu();
+  auto model = fastdeploy::vision::detection::PPYOLOE(model_file, params_file,
+                                                      config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << res.Str() << std::endl;
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
 void GpuInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
@@ -111,7 +138,7 @@ int main(int argc, char* argv[]) {
           "e.g ./infer_model ./ppyoloe_model_dir ./test.jpeg 0"
        << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu; 2: run with gpu and use tensorrt backend."
+                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run with xpu."
              << std::endl;
    return -1;
  }
@@ -122,6 +149,8 @@ int main(int argc, char* argv[]) {
    GpuInfer(argv[1], argv[2]);
  } else if (std::atoi(argv[3]) == 2) {
    TrtInfer(argv[1], argv[2]);
+  } else if (std::atoi(argv[3]) == 3) {
+    XpuInfer(argv[1], argv[2]);
  }
  return 0;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_rtmdet.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_rtmdet.cc
@@ -48,6 +48,34 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

+void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "infer_cfg.yml";
+  auto option = fastdeploy::RuntimeOption();
+  option.UseXpu();
+  auto model = fastdeploy::vision::detection::RTMDet(model_file, params_file,
+                                                     config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+  auto im_bak = im.clone();
+
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(&im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << res.Str() << std::endl;
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res, 0.5);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
 void GpuInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
@@ -113,7 +141,7 @@ int main(int argc, char* argv[]) {
           "e.g ./infer_model ./ppyolo_dirname ./test.jpeg 0"
        << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu."
+                 "with gpu, 2: run with gpu and use tensorrt backend; 3: run with xpu."
              << std::endl;
    return -1;
  }
@@ -122,8 +150,10 @@ int main(int argc, char* argv[]) {
    CpuInfer(argv[1], argv[2]);
  } else if (std::atoi(argv[3]) == 1) {
    GpuInfer(argv[1], argv[2]);
-  } else if(std::atoi(argv[3]) == 2){
+  } else if(std::atoi(argv[3]) == 2) {
    TrtInfer(argv[1], argv[2]);
+  } else if(std::atoi(argv[3]) == 3) {
+    XpuInfer(argv[1], argv[2]);
  }
  return 0;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_ssd.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_ssd.cc
@@ -48,6 +48,34 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

+void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "infer_cfg.yml";
+  auto option = fastdeploy::RuntimeOption();
+  option.UseXpu();
+  auto model = fastdeploy::vision::detection::SSD(model_file, params_file,
+                                                     config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << res.Str() << std::endl;
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+
 void GpuInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
@@ -83,7 +111,7 @@ int main(int argc, char* argv[]) {
           "e.g ./infer_model ./ssd_dirname ./test.jpeg 0"
        << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu."
+                 "with gpu; 2: run with xpu."
              << std::endl;
    return -1;
  }
@@ -92,6 +120,8 @@ int main(int argc, char* argv[]) {
    CpuInfer(argv[1], argv[2]);
  } else if (std::atoi(argv[3]) == 1) {
    GpuInfer(argv[1], argv[2]);
+  } else if (std::atoi(argv[3]) == 2) {
+    XpuInfer(argv[1], argv[2]);
  }
  return 0;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_yolov3.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_yolov3.cc
@@ -47,6 +47,33 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

+void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "infer_cfg.yml";
+  auto option = fastdeploy::RuntimeOption();
+  option.UseXpu();
+  auto model = fastdeploy::vision::detection::YOLOv3(model_file, params_file,
+                                                     config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << res.Str() << std::endl;
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
 void GpuInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
@@ -82,7 +109,7 @@ int main(int argc, char* argv[]) {
           "e.g ./infer_model ./ppyolo_dirname ./test.jpeg 0"
        << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu."
+                 "with gpu; 2: run with xpu."
              << std::endl;
    return -1;
  }
@@ -91,6 +118,8 @@ int main(int argc, char* argv[]) {
    CpuInfer(argv[1], argv[2]);
  } else if (std::atoi(argv[3]) == 1) {
    GpuInfer(argv[1], argv[2]);
+  } else if (std::atoi(argv[3]) == 2) {
+    XpuInfer(argv[1], argv[2]);
  }
  return 0;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_yolov5.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_yolov5.cc
@@ -48,6 +48,35 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

+void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "infer_cfg.yml";
+  auto option = fastdeploy::RuntimeOption();
+  option.UseXpu();
+  auto model = fastdeploy::vision::detection::PaddleYOLOv5(model_file, params_file,
+                                                     config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+  auto im_bak = im.clone();
+
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(&im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << res.Str() << std::endl;
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res, 0.5);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+
 void GpuInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
@@ -113,7 +142,7 @@ int main(int argc, char* argv[]) {
           "e.g ./infer_model ./ppyolo_dirname ./test.jpeg 0"
        << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu."
+                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run with xpu."
              << std::endl;
    return -1;
  }
@@ -124,6 +153,8 @@ int main(int argc, char* argv[]) {
    GpuInfer(argv[1], argv[2]);
  } else if(std::atoi(argv[3]) == 2){
    TrtInfer(argv[1], argv[2]);
+  } else if(std::atoi(argv[3]) == 3){
+    XpuInfer(argv[1], argv[2]);
  }
  return 0;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_yolov6.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_yolov6.cc
@@ -48,6 +48,34 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

+void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "infer_cfg.yml";
+  auto option = fastdeploy::RuntimeOption();
+  option.UseXpu();
+  auto model = fastdeploy::vision::detection::PaddleYOLOv6(model_file, params_file,
+                                                     config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+  auto im_bak = im.clone();
+
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(&im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << res.Str() << std::endl;
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res, 0.5);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
 void GpuInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
@@ -113,7 +141,7 @@ int main(int argc, char* argv[]) {
           "e.g ./infer_model ./ppyolo_dirname ./test.jpeg 0"
        << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu."
+                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run with xpu."
              << std::endl;
    return -1;
  }
@@ -124,6 +152,8 @@ int main(int argc, char* argv[]) {
    GpuInfer(argv[1], argv[2]);
  } else if(std::atoi(argv[3]) == 2){
    TrtInfer(argv[1], argv[2]);
+  } else if(std::atoi(argv[3]) == 3){
+    XpuInfer(argv[1], argv[2]);
  }
  return 0;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_yolov7.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_yolov7.cc
@@ -48,6 +48,34 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

+void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "infer_cfg.yml";
+  auto option = fastdeploy::RuntimeOption();
+  option.UseXpu();
+  auto model = fastdeploy::vision::detection::PaddleYOLOv7(model_file, params_file,
+                                                     config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+  auto im_bak = im.clone();
+
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(&im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << res.Str() << std::endl;
+  auto vis_im = fastdeploy::vision::VisDetection(im_bak, res, 0.5);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
 void GpuInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
@@ -112,7 +140,7 @@ int main(int argc, char* argv[]) {
           "e.g ./infer_model ./ppyolo_dirname ./test.jpeg 0"
        << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu."
+                 "with gpu; 2: run with gpu and use tensorrt backend; 3: run with xpu."
              << std::endl;
    return -1;
  }
@@ -123,6 +151,8 @@ int main(int argc, char* argv[]) {
    GpuInfer(argv[1], argv[2]);
  } else if(std::atoi(argv[3]) == 2){
    TrtInfer(argv[1], argv[2]);
+  } else if(std::atoi(argv[3]) == 3){
+    XpuInfer(argv[1], argv[2]);
  }
  return 0;
 }
--- a/examples/vision/detection/paddledetection/cpp/infer_yolox.cc
+++ b/examples/vision/detection/paddledetection/cpp/infer_yolox.cc
@@ -47,6 +47,33 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
 }

+void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "infer_cfg.yml";
+  auto option = fastdeploy::RuntimeOption();
+  option.UseXpu();
+  auto model = fastdeploy::vision::detection::PaddleYOLOX(
+      model_file, params_file, config_file, option);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << res.Str() << std::endl;
+  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
 void GpuInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + sep + "model.pdmodel";
  auto params_file = model_dir + sep + "model.pdiparams";
@@ -111,7 +138,7 @@ int main(int argc, char* argv[]) {
           "e.g ./infer_model ./paddle_yolox_dirname ./test.jpeg 0"
        << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
-                 "with gpu; 2: run with gpu by tensorrt."
+                 "with gpu; 2: run with gpu by tensorrt; 3: run with xpu."
              << std::endl;
    return -1;
  }
@@ -122,6 +149,8 @@ int main(int argc, char* argv[]) {
    GpuInfer(argv[1], argv[2]);
  } else if (std::atoi(argv[3]) == 2) {
    TrtInfer(argv[1], argv[2]);
+  } else if (std::atoi(argv[3]) == 3) {
+    XpuInfer(argv[1], argv[2]);
  }
  return 0;
 }
--- a/examples/vision/detection/paddledetection/python/README.md
+++ b/examples/vision/detection/paddledetection/python/README.md
@@ -23,6 +23,8 @@ python infer_ppyoloe.py --model_dir ppyoloe_crn_l_300e_coco --image 000000014439
 python infer_ppyoloe.py --model_dir ppyoloe_crn_l_300e_coco --image 000000014439.jpg --device gpu
 # GPU上使用TensorRT推理 （注意：TensorRT推理第一次运行，有序列化模型的操作，有一定耗时，需要耐心等待）
 python infer_ppyoloe.py --model_dir ppyoloe_crn_l_300e_coco --image 000000014439.jpg --device gpu --use_trt True
+# 昆仑芯XPU推理
+python infer_ppyoloe.py --model_dir ppyoloe_crn_l_300e_coco --image 000000014439.jpg --device xpu
 ```

 运行完成可视化结果如下图所示
--- a/examples/vision/detection/paddledetection/python/infer_faster_rcnn.py
+++ b/examples/vision/detection/paddledetection/python/infer_faster_rcnn.py
@@ -17,7 +17,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'cpu' or 'gpu'.")
+        help="Type of inference device, support 'xpu', 'cpu' or 'gpu'.")
    parser.add_argument(
        "--use_trt",
        type=ast.literal_eval,
@@ -29,6 +29,9 @@ def parse_arguments():
 def build_option(args):
    option = fd.RuntimeOption()

+    if args.device.lower() == "xpu":
+        option.use_xpu(autotune=False, l3_workspace_size=0)
+
    if args.device.lower() == "gpu":
        option.use_gpu()

--- a/examples/vision/detection/paddledetection/python/infer_mask_rcnn.py
+++ b/examples/vision/detection/paddledetection/python/infer_mask_rcnn.py
@@ -17,7 +17,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'cpu' or 'gpu'.")
+        help="Type of inference device, support 'xpu', 'cpu' or 'gpu'.")
    parser.add_argument(
        "--use_trt",
        type=ast.literal_eval,
@@ -29,6 +29,9 @@ def parse_arguments():
 def build_option(args):
    option = fd.RuntimeOption()

+    if args.device.lower() == "xpu":
+        option.use_xpu(autotune=False, l3_workspace_size=0)
+
    if args.device.lower() == "gpu":
        # option.use_gpu()
        print(
--- a/examples/vision/detection/paddledetection/python/infer_picodet.py
+++ b/examples/vision/detection/paddledetection/python/infer_picodet.py
@@ -17,7 +17,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'cpu' or 'gpu'.")
+        help="Type of inference device, support 'xpu', 'cpu' or 'gpu'.")
    parser.add_argument(
        "--use_trt",
        type=ast.literal_eval,
@@ -29,6 +29,9 @@ def parse_arguments():
 def build_option(args):
    option = fd.RuntimeOption()

+    if args.device.lower() == "xpu":
+        option.use_xpu()
+
    if args.device.lower() == "gpu":
        option.use_gpu()

--- a/examples/vision/detection/paddledetection/python/infer_ppyolo.py
+++ b/examples/vision/detection/paddledetection/python/infer_ppyolo.py
@@ -17,7 +17,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'cpu' or 'gpu'.")
+        help="Type of inference device, support 'xpu', 'cpu' or 'gpu'.")
    parser.add_argument(
        "--use_trt",
        type=ast.literal_eval,
@@ -29,6 +29,9 @@ def parse_arguments():
 def build_option(args):
    option = fd.RuntimeOption()

+    if args.device.lower() == "xpu":
+        option.use_xpu()
+
    if args.device.lower() == "gpu":
        option.use_gpu()

--- a/examples/vision/detection/paddledetection/python/infer_ppyoloe.py
+++ b/examples/vision/detection/paddledetection/python/infer_ppyoloe.py
@@ -18,7 +18,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'cpu' or 'gpu'.")
+        help="Type of inference device, support 'xpu', 'cpu' or 'gpu'.")
    parser.add_argument(
        "--use_trt",
        type=ast.literal_eval,
@@ -30,6 +30,9 @@ def parse_arguments():
 def build_option(args):
    option = fd.RuntimeOption()

+    if args.device.lower() == "xpu":
+        option.use_xpu()
+
    if args.device.lower() == "gpu":
        option.use_gpu()

--- a/examples/vision/detection/paddledetection/python/infer_rtmdet.py
+++ b/examples/vision/detection/paddledetection/python/infer_rtmdet.py
@@ -17,7 +17,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'cpu' or 'gpu'.")
+        help="Type of inference device, support 'xpu', 'cpu' or 'gpu'.")
    parser.add_argument(
        "--use_trt",
        type=ast.literal_eval,
@@ -29,6 +29,9 @@ def parse_arguments():
 def build_option(args):
    option = fd.RuntimeOption()

+    if args.device.lower() == "xpu":
+        option.use_xpu()
+
    if args.device.lower() == "gpu":
        option.use_gpu()

--- a/examples/vision/detection/paddledetection/python/infer_ssd.py
+++ b/examples/vision/detection/paddledetection/python/infer_ssd.py
@@ -17,12 +17,15 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'cpu' or 'gpu'.")
+        help="Type of inference device, support 'xpu', 'cpu' or 'gpu'.")
    return parser.parse_args()


 def build_option(args):
    option = fd.RuntimeOption()
+    if args.device.lower() == "xpu":
+        option.use_xpu()
+
    if args.device.lower() == "gpu":
        option.use_gpu()
    return option
@@ -36,8 +39,10 @@ config_file = os.path.join(args.model_dir, "infer_cfg.yml")

 # 配置runtime，加载模型
 runtime_option = build_option(args)
-model = fd.vision.detection.SSD(
-    model_file, params_file, config_file, runtime_option=runtime_option)
+model = fd.vision.detection.SSD(model_file,
+                                params_file,
+                                config_file,
+                                runtime_option=runtime_option)

 # 预测图片检测结果
 im = cv2.imread(args.image)
--- a/examples/vision/detection/paddledetection/python/infer_yolov3.py
+++ b/examples/vision/detection/paddledetection/python/infer_yolov3.py
@@ -17,7 +17,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'cpu' or 'gpu'.")
+        help="Type of inference device, support 'xpu', 'cpu' or 'gpu'.")
    parser.add_argument(
        "--use_trt",
        type=ast.literal_eval,
@@ -29,6 +29,9 @@ def parse_arguments():
 def build_option(args):
    option = fd.RuntimeOption()

+    if args.device.lower() == "xpu":
+        option.use_xpu()
+
    if args.device.lower() == "gpu":
        option.use_gpu()

--- a/examples/vision/detection/paddledetection/python/infer_yolov5.py
+++ b/examples/vision/detection/paddledetection/python/infer_yolov5.py
@@ -17,7 +17,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'cpu' or 'gpu'.")
+        help="Type of inference device, support 'xpu', 'cpu' or 'gpu'.")
    parser.add_argument(
        "--use_trt",
        type=ast.literal_eval,
@@ -29,6 +29,9 @@ def parse_arguments():
 def build_option(args):
    option = fd.RuntimeOption()

+    if args.device.lower() == "xpu":
+        option.use_xpu()
+
    if args.device.lower() == "gpu":
        option.use_gpu()

--- a/examples/vision/detection/paddledetection/python/infer_yolov6.py
+++ b/examples/vision/detection/paddledetection/python/infer_yolov6.py
@@ -17,7 +17,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'cpu' or 'gpu'.")
+        help="Type of inference device, support 'xpu', 'cpu' or 'gpu'.")
    parser.add_argument(
        "--use_trt",
        type=ast.literal_eval,
@@ -29,6 +29,9 @@ def parse_arguments():
 def build_option(args):
    option = fd.RuntimeOption()

+    if args.device.lower() == "xpu":
+        option.use_xpu()
+
    if args.device.lower() == "gpu":
        option.use_gpu()

--- a/examples/vision/detection/paddledetection/python/infer_yolov7.py
+++ b/examples/vision/detection/paddledetection/python/infer_yolov7.py
@@ -17,18 +17,21 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'cpu' or 'gpu'.")
+        help="Type of inference device, support 'xpu', 'cpu' or 'gpu'.")
    parser.add_argument(
        "--use_trt",
        type=ast.literal_eval,
        default=False,
-        help="Wether to use tensorrt.")    
+        help="Wether to use tensorrt.")
    return parser.parse_args()


 def build_option(args):
    option = fd.RuntimeOption()

+    if args.device.lower() == "xpu":
+        option.use_xpu()
+
    if args.device.lower() == "gpu":
        option.use_gpu()

--- a/examples/vision/detection/paddledetection/python/infer_yolox.py
+++ b/examples/vision/detection/paddledetection/python/infer_yolox.py
@@ -17,7 +17,7 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'cpu' or 'gpu'.")
+        help="Type of inference device, support 'xpu', 'cpu' or 'gpu'.")
    parser.add_argument(
        "--use_trt",
        type=ast.literal_eval,
@@ -29,6 +29,9 @@ def parse_arguments():
 def build_option(args):
    option = fd.RuntimeOption()

+    if args.device.lower() == "xpu":
+        option.use_xpu()
+
    if args.device.lower() == "gpu":
        option.use_gpu()

--- a/examples/vision/detection/yolov6/cpp/CMakeLists.txt
+++ b/examples/vision/detection/yolov6/cpp/CMakeLists.txt
@@ -12,3 +12,7 @@ include_directories(${FASTDEPLOY_INCS})
 add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
 # 添加FastDeploy库依赖
 target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
+
+add_executable(infer_paddle_demo ${PROJECT_SOURCE_DIR}/infer_paddle_model.cc)
+# 添加FastDeploy库依赖
+target_link_libraries(infer_paddle_demo ${FASTDEPLOY_LIBS})
--- a/examples/vision/detection/yolov6/cpp/README.md
+++ b/examples/vision/detection/yolov6/cpp/README.md
@@ -18,10 +18,24 @@ tar xvf fastdeploy-linux-x64-x.x.x.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
 make -j

-#下载官方转换好的YOLOv6模型文件和测试图片
-wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov6s.onnx
+#下载Paddle模型文件和测试图片
+https://bj.bcebos.com/paddlehub/fastdeploy/yolov6s_infer.tar
+tar -xf yolov6s_infer.tar
 wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg

+# CPU推理
+./infer_paddle_demo ./../yolov6s_infer 000000014439.jpg 0
+# GPU推理
+./infer_paddle_demo ./../yolov6s_infer 000000014439.jpg 1
+# XPU推理
+./infer_paddle_demo ./../yolov6s_infer 000000014439.jpg 2
+```
+
+如果想要验证ONNX模型的推理，可以参考如下命令：
+```bash
+#下载官方转换好的YOLOv6 ONNX模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov6s.onnx
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg

 # CPU推理
 ./infer_demo yolov6s.onnx 000000014439.jpg 0
--- a/examples/vision/detection/yolov6/cpp/infer_paddle_model.cc
+++ b/examples/vision/detection/yolov6/cpp/infer_paddle_model.cc
@@ -0,0 +1,119 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void CpuInfer(const std::string& model_dir, const std::string& image_file) {
+  fastdeploy::RuntimeOption option;
+  option.UseCpu();
+  option.UseOrtBackend();
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto model = fastdeploy::vision::detection::YOLOv6(model_file, params_file, option, fastdeploy::ModelFormat::PADDLE);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(&im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+  std::cout << res.Str() << std::endl;
+
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+void XpuInfer(const std::string& model_dir, const std::string& image_file) {
+  fastdeploy::RuntimeOption option;
+  option.UseXpu();
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto model = fastdeploy::vision::detection::YOLOv6(model_file, params_file, option, fastdeploy::ModelFormat::PADDLE);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(&im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+  std::cout << res.Str() << std::endl;
+
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+void GpuInfer(const std::string& model_dir, const std::string& image_file) {
+  fastdeploy::RuntimeOption option;
+  option.UseGpu();
+  option.UseTrtBackend();
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto model = fastdeploy::vision::detection::YOLOv6(model_file, params_file, option, fastdeploy::ModelFormat::PADDLE);
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(&im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+  std::cout << res.Str() << std::endl;
+
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+
+int main(int argc, char* argv[]) {
+  if (argc < 4) {
+    std::cout << "Usage: infer_demo path/to/model path/to/image run_option, "
+                 "e.g ./infer_model ./yolov6s_infer ./test.jpeg 0"
+              << std::endl;
+    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
+                 "with gpu; 2: run with xpu."
+              << std::endl;
+    return -1;
+  }
+
+  if (std::atoi(argv[3]) == 0) {
+    CpuInfer(argv[1], argv[2]);
+  } else if (std::atoi(argv[3]) == 1) {
+    GpuInfer(argv[1], argv[2]);
+  } else if (std::atoi(argv[3]) == 2) {
+    XpuInfer(argv[1], argv[2]);
+  }
+  return 0;
+}
--- a/examples/vision/detection/yolov6/python/README.md
+++ b/examples/vision/detection/yolov6/python/README.md
@@ -12,7 +12,19 @@
 git clone https://github.com/PaddlePaddle/FastDeploy.git
 cd examples/vision/detection/yolov6/python/

+https://bj.bcebos.com/paddlehub/fastdeploy/yolov6s_infer.tar
+tar -xf yolov6s_infer.tar
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg

+# CPU推理
+python infer_paddle_model.py --model yolov6s_infer --image 000000014439.jpg  --device cpu
+# GPU推理
+python infer_paddle_model.py --model yolov6s_infer --image 000000014439.jpg  --device gpu
+# XPU推理
+python infer_paddle_model.py --model yolov6s_infer --image 000000014439.jpg  --device xpu
+```
+如果想要验证ONNX模型的推理，可以参考如下命令：
+```bash
 #下载YOLOv6模型文件和测试图片
 wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov6s.onnx
 wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
--- a/examples/vision/detection/yolov6/python/infer_paddle_model.py
+++ b/examples/vision/detection/yolov6/python/infer_paddle_model.py
@@ -0,0 +1,54 @@
+import fastdeploy as fd
+import cv2
+import os
+from fastdeploy import ModelFormat
+
+
+def parse_arguments():
+    import argparse
+    import ast
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model", required=True, help="Path of yolov6 onnx model.")
+    parser.add_argument(
+        "--image", required=True, help="Path of test image file.")
+    parser.add_argument(
+        "--device",
+        type=str,
+        default='cpu',
+        help="Type of inference device, support 'cpu', 'xpu' or 'gpu'.")
+    return parser.parse_args()
+
+
+def build_option(args):
+    option = fd.RuntimeOption()
+    if args.device.lower() == "gpu":
+        option.use_gpu(0)
+
+    if args.device.lower() == "xpu":
+        option.use_xpu()
+
+    return option
+
+
+args = parse_arguments()
+
+model_file = os.path.join(args.model, "model.pdmodel")
+params_file = os.path.join(args.model, "model.pdiparams")
+# 配置runtime，加载模型
+runtime_option = build_option(args)
+model = fd.vision.detection.YOLOv6(
+    model_file,
+    params_file,
+    runtime_option=runtime_option,
+    model_format=ModelFormat.PADDLE)
+
+# 预测图片检测结果
+im = cv2.imread(args.image)
+result = model.predict(im)
+print(result)
+
+# 预测结果可视化
+vis_im = fd.vision.vis_detection(im, result)
+cv2.imwrite("visualized_result.jpg", vis_im)
+print("Visualized result save in ./visualized_result.jpg")
--- a/examples/vision/detection/yolov7/cpp/CMakeLists.txt
+++ b/examples/vision/detection/yolov7/cpp/CMakeLists.txt
@@ -12,3 +12,7 @@ include_directories(${FASTDEPLOY_INCS})
 add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
 # 添加FastDeploy库依赖
 target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
+
+add_executable(infer_paddle_model_demo ${PROJECT_SOURCE_DIR}/infer_paddle_model.cc)
+# 添加FastDeploy库依赖
+target_link_libraries(infer_paddle_model_demo ${FASTDEPLOY_LIBS})
--- a/examples/vision/detection/yolov7/cpp/README.md
+++ b/examples/vision/detection/yolov7/cpp/README.md
@@ -18,7 +18,20 @@ tar xvf fastdeploy-linux-x64-x.x.x.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
 make -j

-#下载官方转换好的yolov7模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov7_infer.tar
+tar -xf yolov7_infer.tar
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+
+# CPU推理
+./infer_paddle_model_demo yolov7_infer 000000014439.jpg 0
+# GPU推理
+./infer_paddle_model_demo yolov7_infer 000000014439.jpg 1
+# XPU推理
+./infer_paddle_model_demo yolov7_infer 000000014439.jpg 2
+```
+如果想要验证ONNX模型的推理，可以参考如下命令：
+```bash
+#下载官方转换好的yolov7 ONNX模型文件和测试图片
 wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov7.onnx
 wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg

--- a/examples/vision/detection/yolov7/cpp/infer_paddle_model.cc
+++ b/examples/vision/detection/yolov7/cpp/infer_paddle_model.cc
@@ -0,0 +1,77 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void InitAndInfer(const std::string& model_dir, const std::string& image_file,
+                  const fastdeploy::RuntimeOption& option) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+
+  auto model = fastdeploy::vision::detection::YOLOv7(
+      model_file, params_file, option, fastdeploy::ModelFormat::PADDLE);
+  assert(model.Initialized());
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(&im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << res.Str() << std::endl;
+
+  auto vis_im = fastdeploy::vision::VisDetection(im, res);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+  if (argc < 4) {
+    std::cout << "Usage: infer_demo path/to/paddle_model "
+                 "path/to/image "
+                 "run_option, "
+                 "e.g ./infer_demo ./yolov7s_infer ./000000014439.jpg 0"
+              << std::endl;
+    std::cout << "The data type of run_option is int, 0: run on cpu with ORT "
+                 "backend; 1: run "
+                 "on gpu with TensorRT backend ; 2: run with xpu. "
+              << std::endl;
+    return -1;
+  }
+
+  fastdeploy::RuntimeOption option;
+  int flag = std::atoi(argv[3]);
+
+  if (flag == 0) {
+    option.UseCpu();
+    option.UseOrtBackend();
+  } else if (flag == 1) {
+    option.UseGpu();
+    option.UseTrtBackend();
+  } else if (flag == 2) {
+    option.UseXpu();
+    }
+
+  std::string model_dir = argv[1];
+  std::string test_image = argv[2];
+  InitAndInfer(model_dir, test_image, option);
+  return 0;
+}
--- a/examples/vision/detection/yolov7/python/README.md
+++ b/examples/vision/detection/yolov7/python/README.md
@@ -14,6 +14,19 @@
 git clone https://github.com/PaddlePaddle/FastDeploy.git
 cd examples/vision/detection/yolov7/python/

+wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov7_infer.tar
+tar -xf yolov7_infer.tar
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+
+# CPU推理
+python infer_paddle_model.py --model yolov7_infer --image 000000014439.jpg --device cpu
+# GPU推理
+python infer_paddle_model.py --model yolov7_infer --image 000000014439.jpg --device gpu
+# XPU推理
+python infer_paddle_model.py --model yolov7_infer --image 000000014439.jpg --device xpu
+```
+如果想要验证ONNX模型的推理，可以参考如下命令：
+```bash
 #下载yolov7模型文件和测试图片
 wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov7.onnx
 wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
--- a/examples/vision/detection/yolov7/python/README_EN.md
+++ b/examples/vision/detection/yolov7/python/README_EN.md
@@ -14,7 +14,19 @@ This doc provides a quick `infer.py` demo of YOLOv7 deployment on CPU/GPU, and a
 # Download sample deployment code
 git clone https://github.com/PaddlePaddle/FastDeploy.git
 cd examples/vision/detection/yolov7/python/
+wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov7_infer.tar
+tar -xf yolov7_infer.tar
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg

+# CPU
+python infer_paddle_model.py --model yolov7_infer --image 000000014439.jpg --device cpu
+# GPU
+python infer_paddle_model.py --model yolov7_infer --image 000000014439.jpg --device gpu
+# XPU
+python infer_paddle_model.py --model yolov7_infer --image 000000014439.jpg --device xpu
+```
+If you want to test ONNX model:
+```bash
 # Download yolov7 model files and test images
 wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov7.onnx
 wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
@@ -23,7 +35,7 @@ wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/0000000
 python infer.py --model yolov7.onnx --image 000000014439.jpg --device cpu
 # GPU
 python infer.py --model yolov7.onnx --image 000000014439.jpg --device gpu
-# GPU上使用TensorRT推理
+# Infer with TensorRT on GPU
 python infer.py --model yolov7.onnx --image 000000014439.jpg --device gpu --use_trt True
 ```

--- a/examples/vision/detection/yolov7/python/infer_paddle_model.py
+++ b/examples/vision/detection/yolov7/python/infer_paddle_model.py
@@ -0,0 +1,54 @@
+import fastdeploy as fd
+import cv2
+import os
+from fastdeploy import ModelFormat
+
+
+def parse_arguments():
+    import argparse
+    import ast
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model", required=True, help="Path of yolov7 paddle model.")
+    parser.add_argument(
+        "--image", required=True, help="Path of test image file.")
+    parser.add_argument(
+        "--device",
+        type=str,
+        default='cpu',
+        help="Type of inference device, support 'cpu', 'xpu' or 'gpu'.")
+    return parser.parse_args()
+
+
+def build_option(args):
+    option = fd.RuntimeOption()
+    if args.device.lower() == "gpu":
+        option.use_gpu(0)
+
+    if args.device.lower() == "xpu":
+        option.use_xpu()
+
+    return option
+
+
+args = parse_arguments()
+
+model_file = os.path.join(args.model, "model.pdmodel")
+params_file = os.path.join(args.model, "model.pdiparams")
+# 配置runtime，加载模型
+runtime_option = build_option(args)
+model = fd.vision.detection.YOLOv7(
+    model_file,
+    params_file,
+    runtime_option=runtime_option,
+    model_format=ModelFormat.PADDLE)
+
+# 预测图片检测结果
+im = cv2.imread(args.image)
+result = model.predict(im)
+print(result)
+
+# 预测结果可视化
+vis_im = fd.vision.vis_detection(im, result)
+cv2.imwrite("visualized_result.jpg", vis_im)
+print("Visualized result save in ./visualized_result.jpg")
--- a/fastdeploy/vision/detection/contrib/yolov6.cc
+++ b/fastdeploy/vision/detection/contrib/yolov6.cc
@@ -72,6 +72,7 @@ YOLOv6::YOLOv6(const std::string& model_file, const std::string& params_file,
  } else {
    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+    valid_xpu_backends = {Backend::LITE};
    valid_ascend_backends = {Backend::LITE};
  }
  runtime_option = custom_option;
@@ -339,4 +340,4 @@ bool YOLOv6::Predict(cv::Mat* im, DetectionResult* result, float conf_threshold,

 }  // namespace detection
 }  // namespace vision
-}  // namespace fastdeploy
+}  // namespace fastdeploy
--- a/fastdeploy/vision/detection/contrib/yolov7/yolov7.cc
+++ b/fastdeploy/vision/detection/contrib/yolov7/yolov7.cc
@@ -27,6 +27,7 @@ YOLOv7::YOLOv7(const std::string& model_file, const std::string& params_file,
  } else {
    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+    valid_xpu_backends = {Backend::LITE};
    valid_ascend_backends = {Backend::LITE};
  }
  runtime_option = custom_option;
--- a/fastdeploy/vision/detection/ppdet/model.h
+++ b/fastdeploy/vision/detection/ppdet/model.h
@@ -39,6 +39,7 @@ class FASTDEPLOY_DECL PicoDet : public PPDetBase {
                        Backend::PDINFER, Backend::LITE};
    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
    valid_rknpu_backends = {Backend::RKNPU2};
+    valid_xpu_backends = {Backend::LITE};
    valid_ascend_backends = {Backend::LITE};
    initialized = Initialize();
  }
@@ -66,6 +67,7 @@ class FASTDEPLOY_DECL PPYOLOE : public PPDetBase {
                        Backend::PDINFER, Backend::LITE};
    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
    valid_timvx_backends = {Backend::LITE};
+    valid_xpu_backends = {Backend::LITE};
    valid_ascend_backends = {Backend::LITE};
    initialized = Initialize();
  }
@@ -91,6 +93,7 @@ class FASTDEPLOY_DECL PPYOLO : public PPDetBase {
                model_format) {
    valid_cpu_backends = {Backend::PDINFER, Backend::LITE};
    valid_gpu_backends = {Backend::PDINFER};
+    valid_xpu_backends = {Backend::LITE};
    valid_ascend_backends = {Backend::LITE};
    initialized = Initialize();
  }
@@ -109,6 +112,7 @@ class FASTDEPLOY_DECL YOLOv3 : public PPDetBase {
    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER,
                        Backend::LITE};
    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
+    valid_xpu_backends = {Backend::LITE};
    valid_ascend_backends = {Backend::LITE};
    initialized = Initialize();
  }
@@ -127,6 +131,7 @@ class FASTDEPLOY_DECL PaddleYOLOX : public PPDetBase {
    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER,
                        Backend::LITE};
    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
+    valid_xpu_backends = {Backend::LITE};
    valid_ascend_backends = {Backend::LITE};
    initialized = Initialize();
  }
@@ -144,6 +149,7 @@ class FASTDEPLOY_DECL FasterRCNN : public PPDetBase {
                model_format) {
    valid_cpu_backends = {Backend::PDINFER, Backend::LITE};
    valid_gpu_backends = {Backend::PDINFER};
+    valid_xpu_backends = {Backend::LITE};
    initialized = Initialize();
  }

@@ -160,6 +166,7 @@ class FASTDEPLOY_DECL MaskRCNN : public PPDetBase {
                model_format) {
    valid_cpu_backends = {Backend::PDINFER, Backend::LITE};
    valid_gpu_backends = {Backend::PDINFER};
+    valid_xpu_backends = {Backend::LITE};
    initialized = Initialize();
  }

@@ -176,6 +183,7 @@ class FASTDEPLOY_DECL SSD : public PPDetBase {
                model_format) {
    valid_cpu_backends = {Backend::PDINFER, Backend::LITE};
    valid_gpu_backends = {Backend::PDINFER};
+    valid_xpu_backends = {Backend::LITE};
    valid_ascend_backends = {Backend::LITE};
    initialized = Initialize();
  }
@@ -193,6 +201,7 @@ class FASTDEPLOY_DECL PaddleYOLOv5 : public PPDetBase {
                model_format) {
    valid_cpu_backends = {Backend::ORT, Backend::PDINFER};
    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
+    valid_xpu_backends = {Backend::LITE};
    initialized = Initialize();
  }

@@ -209,6 +218,7 @@ class FASTDEPLOY_DECL PaddleYOLOv6 : public PPDetBase {
                model_format) {
    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER};
    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
+    valid_xpu_backends = {Backend::LITE};
    initialized = Initialize();
  }

@@ -225,6 +235,7 @@ class FASTDEPLOY_DECL PaddleYOLOv7 : public PPDetBase {
                model_format) {
    valid_cpu_backends = {Backend::ORT, Backend::PDINFER};
    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
+    valid_xpu_backends = {Backend::LITE};
    initialized = Initialize();
  }

@@ -241,6 +252,7 @@ class FASTDEPLOY_DECL RTMDet : public PPDetBase {
                model_format) {
    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER};
    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
+    valid_xpu_backends = {Backend::LITE};
    initialized = Initialize();
  }