[Turorials] Add tutorials for intel gpu (#860)

* Add tutorials for intel gpu * fix gflags dependency * Update README_CN.md * Update README.md * Update README.md
2025-10-06 09:07:10 +08:00 · 2022-12-13 10:21:56 +08:00
parent 5fc6cf30df
commit 534d5b8c8b
20 changed files with 650 additions and 39 deletions
--- a/cmake/gflags.cmake
+++ b/cmake/gflags.cmake
@@ -107,11 +107,16 @@ ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL)
 SET_PROPERTY(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES})
 ADD_DEPENDENCIES(gflags extern_gflags)

+if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
+  list(APPEND GFLAGS_LIBRARIES pthread)
+endif()
+
 # On Windows (including MinGW), the Shlwapi library is used by gflags if available.
 if (WIN32)
  include(CheckIncludeFileCXX)
  check_include_file_cxx("shlwapi.h" HAVE_SHLWAPI)
  if (HAVE_SHLWAPI)
    set_property(GLOBAL PROPERTY OS_DEPENDENCY_MODULES shlwapi.lib)
+    list(APPEND GFLAGS_LIBRARIES shlwapi.lib)
  endif(HAVE_SHLWAPI)
 endif (WIN32)
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -48,18 +48,7 @@ function(add_fastdeploy_executable FIELD CC_FILE)
  if(EXISTS ${TEMP_TARGET_FILE} AND TARGET fastdeploy)
    add_executable(${TEMP_TARGET_NAME} ${TEMP_TARGET_FILE})
    target_link_libraries(${TEMP_TARGET_NAME} PUBLIC fastdeploy)
-    if(TARGET gflags)
-      if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
-        target_link_libraries(${TEMP_TARGET_NAME} PRIVATE gflags pthread)
-      elseif(WIN32)
-        target_link_libraries(${TEMP_TARGET_NAME} PRIVATE gflags)
-        if (HAVE_SHLWAPI)
-          target_link_libraries(${TEMP_TARGET_NAME} PRIVATE shlwapi.lib)
-        endif()  
-      else()
-        target_link_libraries(${TEMP_TARGET_NAME} PRIVATE gflags)
-      endif()
-    endif()
+    target_link_libraries(${TEMP_TARGET_NAME} PRIVATE ${GFLAGS_LIBRARIES})
    config_fastdeploy_executable_link_flags(${TEMP_TARGET_NAME})
    math(EXPR _EXAMPLES_NUM "${EXAMPLES_NUM} + 1")
    set(EXAMPLES_NUM ${_EXAMPLES_NUM} PARENT_SCOPE)
--- a/examples/vision/facealign/face_landmark_1000/cpp/CMakeLists.txt
+++ b/examples/vision/facealign/face_landmark_1000/cpp/CMakeLists.txt
@@ -10,9 +10,5 @@ include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
 include_directories(${FASTDEPLOY_INCS})

 add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
-# 添加FastDeploy库依赖
-if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
-  target_link_libraries(infer_demo ${FASTDEPLOY_LIBS} gflags pthread)
-else()
-  target_link_libraries(infer_demo ${FASTDEPLOY_LIBS} gflags)
-endif()
+
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS} ${GFLAGS_LIBRARIES})
--- a/examples/vision/facealign/face_landmark_1000/cpp/README.md
+++ b/examples/vision/facealign/face_landmark_1000/cpp/README.md
@@ -7,7 +7,7 @@
 - 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
 - 2. 根据开发环境，下载预编译部署库和samples代码，参考[FastDeploy预编译库](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)

-以Linux上CPU推理为例，在本目录执行如下命令即可完成编译测试，支持此模型需保证FastDeploy版本0.7.0以上(x.x.x>=0.7.0)
+以Linux上CPU推理为例，在本目录执行如下命令即可完成编译测试，支持此模型需保证FastDeploy版本1.0.2以上(x.x.x>=1.0.2), 或使用nightly built版本

 ```bash
 mkdir build
--- a/examples/vision/facealign/pfld/cpp/CMakeLists.txt
+++ b/examples/vision/facealign/pfld/cpp/CMakeLists.txt
@@ -10,9 +10,4 @@ include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
 include_directories(${FASTDEPLOY_INCS})

 add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
-# 添加FastDeploy库依赖
-if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
-  target_link_libraries(infer_demo ${FASTDEPLOY_LIBS} gflags pthread)
-else()
-  target_link_libraries(infer_demo ${FASTDEPLOY_LIBS} gflags)
-endif()
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS} ${GFLAGS_LIBRARIES})
--- a/examples/vision/facealign/pfld/cpp/README.md
+++ b/examples/vision/facealign/pfld/cpp/README.md
@@ -7,7 +7,7 @@
 - 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
 - 2. 根据开发环境，下载预编译部署库和samples代码，参考[FastDeploy预编译库](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)

-以Linux上CPU推理为例，在本目录执行如下命令即可完成编译测试，支持此模型需保证FastDeploy版本0.7.0以上(x.x.x>=0.7.0)
+以Linux上CPU推理为例，在本目录执行如下命令即可完成编译测试，支持此模型需保证FastDeploy版本1.0.2以上(x.x.x>=1.0.2), 或使用nightly built版本

 ```bash
 mkdir build
--- a/examples/vision/facealign/pipnet/cpp/CMakeLists.txt
+++ b/examples/vision/facealign/pipnet/cpp/CMakeLists.txt
@@ -10,9 +10,4 @@ include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
 include_directories(${FASTDEPLOY_INCS})

 add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
-# 添加FastDeploy库依赖
-if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
-  target_link_libraries(infer_demo ${FASTDEPLOY_LIBS} gflags pthread)
-else()
-  target_link_libraries(infer_demo ${FASTDEPLOY_LIBS} gflags)
-endif()
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS} ${GFLAGS_LIBRARIES})
--- a/examples/vision/headpose/fsanet/cpp/CMakeLists.txt
+++ b/examples/vision/headpose/fsanet/cpp/CMakeLists.txt
@@ -10,9 +10,4 @@ include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
 include_directories(${FASTDEPLOY_INCS})

 add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
-# 添加FastDeploy库依赖
-if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
-  target_link_libraries(infer_demo ${FASTDEPLOY_LIBS} gflags pthread)
-else()
-  target_link_libraries(infer_demo ${FASTDEPLOY_LIBS} gflags)
-endif()
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS} ${GFLAGS_LIBRARIES})
--- a/examples/vision/headpose/fsanet/cpp/README.md
+++ b/examples/vision/headpose/fsanet/cpp/README.md
@@ -7,7 +7,7 @@
 - 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
 - 2. 根据开发环境，下载预编译部署库和samples代码，参考[FastDeploy预编译库](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)

-以Linux上CPU推理为例，在本目录执行如下命令即可完成编译测试，支持此模型需保证FastDeploy版本0.7.0以上(x.x.x>=0.7.0)
+以Linux上CPU推理为例，在本目录执行如下命令即可完成编译测试，支持此模型需保证FastDeploy版本1.0.2以上(x.x.x>=1.0.2), 或使用nightly built版本

 ```bash
 mkdir build
--- a/tutorials/intel_gpu/README.md
+++ b/tutorials/intel_gpu/README.md
@@ -0,0 +1,57 @@
+English | [中文](README_CN.md)
+
+# Deploy on Intel GPU
+
+Intel GPU is supported by OpenVINO backend in FastDeploy. There're two notices while inference on Intel GPU
+
+- The model's inputs shape have to be fixed
+- There may exists some operators supported on CPU but not supported on GPU
+
+FastDeploy provides two examples for these situations in this directory
+
+## Fixed input shape
+
+While deploying a computer vision model, it includes 3 steps
+- Input a image data, after the preprocessing steps, we get the `tensors` which will be feed to the deeplearning model
+- Inference the model by Runtime with the input `tensors`, and get the output `tensors`
+- Postprocessing the output `tensors`, and get the final results we need, e.g `DetectionResult`, `SegmentationResult`
+
+Fixed input shape means that the shape of the `tensors` received by the runtime is the same each time and cannot be changed. Such as PP-OCR and RCNN, the shape of each input to the model is changing, so it is not supported on Intel GPU temporarily. For PaddleClas model, PP-YOLOE, PicoDet, YOLOv5, etc., the input shape after each preprocessing is the same, which can be supported.
+
+At the same time, when we export the deployment model from the framework, we may not have fixed the shape. For example, the ResNet50 model of PaddleClas receives [1, 3, 224, 224] size data all the time during reasoning, but actually when we export the model, the input shape is set to [- 1, 3, - 1, - 1], which also causes OpenVINO to be unable to confirm the input shape of the model.
+
+FastDeploy provides the following interfaces to help fix the shape of the model
+
+- Python: `RuntimeOption.set_openvino_shape_info()`
+- C++: `RuntimeOption::SetOpenVINOShapeInfo()`
+
+## Operators supporting
+
+In essence, the deep learning model is a topological directed graph, and each node in the graph is an operator OP (Operator). Due to the implementation of different inference engine codes, the number of OPs supported by each backend is inconsistent. For OpenVINO, the number of OPs supported on the CPU and GPU is different, which means that the same model can run on the CPU, but may not be able to run on the GPU. Taking PP-YOLOE as an example, when running directly on the GPU, the following prompt will appear, which means that the 'MulticlassNms' OP is not supported by the GPU.
+
+```
+RuntimeError: Operation: multiclass_nms3_0.tmp_1 of type MulticlassNms(op::v0) is not supported
+```
+
+In this case, we can execute the model in a heterogeneous way, that is, let the unsupported OPs run on the CPU, and the remaining OPs still run on the GPU.
+
+Heterogeneous execution is used through the settings of the following interfaces
+
+**Python**
+
+```
+import fastdeploy as fd
+option = fd.RuntimeOption()
+option.use_openvino_backend()
+option.set_openvino_device("HETERO:GPU,CPU")
+option.set_openvino_cpu_operators(["MulticlassNms"])
+```
+
+**C++**
+
+```
+fastdeploy::RuntimeOption option;
+option.UseOpenVINOBackend();
+option.SetOpenVINODevice("HETERO:GPU,CPU");
+option.SetOpenVINOCpuOperators({"MulticlassNms"});
+```
--- a/tutorials/intel_gpu/README_CN.md
+++ b/tutorials/intel_gpu/README_CN.md
@@ -0,0 +1,54 @@
+[English](README.md) | 中文
+
+# Intel GPU(独立显卡/集成显卡)的使用
+
+FastDeploy通过OpenVINO后端支持Intel GPU显卡的使用。整体在部署模型时，与现有FastDeploy部署其它模型的流程类似，但在GPU上推理存在以下2个注意事项
+
+- OpenVINO在显卡上推理时，要求模型的输入保持固定
+- OpenVINO在显卡上支持的OP数量，与CPU不一致，需要异构执行
+
+目前PaddleClas中所有OP均可使用GPU运行，而一些模型如PPYOLOE，则需要异构执行。具体使用示例可参考此目录下示例
+
+## 输入固定说明
+
+针对一个视觉模型的推理包含3个环节
+- 输入图像，图像经过预处理，最终得到要输入给模型Runtime的Tensor
+- 模型Runtime接收Tensor，进行推理，得到Runtime的输出Tensor
+- 对Runtime的输出Tensor做后处理，得到最后的结构化信息，如DetectionResult, SegmentationResult等等
+
+而输入固定，也即表示要求Runtime接收的Tensor，每次数据大小是一样的，不能变化。现有FastDeploy中，例如PP-OCR, RCNN这些每次输入给模型的大小就是在不断变化的（），因此暂不支持。而对于PaddleClas模型、PP-YOLOE、PicoDet，YOLOv5等，每次预处理后的数据大小是一样，则可以支持。
+
+同时，我们在从框架导出部署模型时，可能也未进行Shape固定，例如PaddleClas的ResNet50模型，虽然推理时，一直接收的是[1, 3, 224, 224]大小的数据，但实际上导出模型时，输入的Shape被设定为了[-1, 3, -1, -1]，这也会导致OpenVINO无法确认模型的输入Shape。
+
+FastDeploy提供如下接口，帮助来固定模型的Shape
+
+- Python: `RuntimeOption.set_openvino_shape_info()`
+- C++: `RuntimeOption::SetOpenVINOShapeInfo()`
+
+## OP支持说明
+
+深度学习模型本质是一个拓扑有向图，而图中的每一个节点，即为一个算子OP(Operator)。受限于不同推理引擎代码的实现，各后端支持的OP数量不一致。对于OpenVINO而言，在CPU和GPU上同样支持的OP数量不同，这也就意味着，同样一个模型使用OpenVINO可以跑在CPU上，但不一定能跑在GPU上。以PP-YOLOE为例，在GPU上直接跑，会出现如下提示，即表示`MulticlassNms`这个OP不被GPU支持。
+```
+RuntimeError: Operation: multiclass_nms3_0.tmp_1 of type MulticlassNms(op::v0) is not supported
+```
+
+这种情况下，我们可以通过异构的方式来执行模型，即让不支持的OP跑在CPU上，其余OP仍然在GPU上跑。
+
+通过如下接口的设定，使用异构执行
+
+### Python
+```
+import fastdeploy as fd
+option = fd.RuntimeOption()
+option.use_openvino_backend()
+option.set_openvino_device("HETERO:GPU,CPU")
+option.set_openvino_cpu_operators(["MulticlassNms"])
+```
+
+### C++
+```
+fastdeploy::RuntimeOption option;
+option.UseOpenVINOBackend();
+option.SetOpenVINODevice("HETERO:GPU,CPU");
+option.SetOpenVINOCpuOperators({"MulticlassNms"});
+```
--- a/tutorials/intel_gpu/cpp/CMakeLists.txt
+++ b/tutorials/intel_gpu/cpp/CMakeLists.txt
@@ -0,0 +1,20 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+# specify the decompress directory of FastDeploy SDK
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+include(${FASTDEPLOY_INSTALL_DIR}/utils/gflags.cmake)
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_resnet50 ${PROJECT_SOURCE_DIR}/infer_resnet50.cc)
+add_executable(infer_ppyoloe ${PROJECT_SOURCE_DIR}/infer_ppyoloe.cc)
+
+if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
+  target_link_libraries(infer_resnet50 ${FASTDEPLOY_LIBS} gflags pthread)
+  target_link_libraries(infer_ppyoloe ${FASTDEPLOY_LIBS} gflags pthread)
+else()
+  target_link_libraries(infer_resnet50 ${FASTDEPLOY_LIBS} gflags)
+  target_link_libraries(infer_ppyoloe ${FASTDEPLOY_LIBS} gflags)
+endif()
--- a/tutorials/intel_gpu/cpp/README.md
+++ b/tutorials/intel_gpu/cpp/README.md
@@ -0,0 +1,52 @@
+English | [中文](README_CN.md)
+
+# PaddleClas Python Example
+
+Before deployment, confirm the following two steps
+
+- 1. The software and hardware environment meet the requirements. Refer to [FastDeploy Environment Requirements](../../../docs/en/build_and_install/download_prebuilt_libraries.md)
+- 2. Install FastDeploy Python wheel package. Refer to [Install FastDeploy](../../../docs/en/build_and_install/download_prebuilt_libraries.md)
+
+**Notice** This doc require FastDeploy version >= 1.0.2, or just use nightly built version.
+
+```bash
+# Get FastDeploy codes
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/tutorials/intel_gpu/cpu
+
+mkdir build && cd build
+
+# Please the preparation step to get the download link
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-x.x.x.tgz
+
+# Download PaddleClas model and test image
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ResNet50_vd_infer.tgz
+wget https://gitee.com/paddlepaddle/PaddleClas/raw/release/2.4/deploy/images/ImageNet/ILSVRC2012_val_00000010.jpeg
+tar -xvf ResNet50_vd_infer.tgz
+
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j
+
+# Inference with CPU
+./infer_resnet50 -model ResNet50_vd_infer -image ILSVRC2012_val_00000010.jpeg -device cpu -topk 3
+
+# Inference with Intel GPU
+./infer_resnet50 -model ResNet50_vd_infer -image ILSVRC2012_val_00000010.jpeg -device intel_gpu -topk 3
+
+
+# Download PaddleDetection/PP-YOLOE model and test image
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+tar xvf ppyoloe_crn_l_300e_coco.tgz
+
+# Inference with CPU
+./infer_ppyoloe -model ppyoloe_crn_l_300e_coco -image 000000014439.jpg -device cpu
+
+# Inference with Intel GPU
+./infer_ppyoloe -model ppyoloe_crn_l_300e_coco -image 000000014439.jpg -device intel_gpu
+```
+
+This documents only shows how to compile on Linux/Mac, if you are using Windows, please refer the following documents
+
+- [How to use FastDeploy C++ SDK on Windows](../../../docs/en/faq/use_sdk_on_windows.md)
--- a/tutorials/intel_gpu/cpp/README_CN.md
+++ b/tutorials/intel_gpu/cpp/README_CN.md
@@ -0,0 +1,52 @@
+English | [中文](README_CN.md)
+
+# PaddleClas Python Example
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. FastDeploy Python whl包安装，参考[FastDeploy Python安装](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+**注意** 本文档依赖FastDeploy>=1.0.2版本，或nightly built版本。
+
+```bash
+# Get FastDeploy codes
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/tutorials/intel_gpu/cpu
+
+mkdir build && cd build
+
+# Please the preparation step to get the download link
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-x.x.x.tgz
+
+# Download PaddleClas model and test image
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ResNet50_vd_infer.tgz
+wget https://gitee.com/paddlepaddle/PaddleClas/raw/release/2.4/deploy/images/ImageNet/ILSVRC2012_val_00000010.jpeg
+tar -xvf ResNet50_vd_infer.tgz
+
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j
+
+# Inference with CPU
+./infer_resnet50 -model ResNet50_vd_infer -image ILSVRC2012_val_00000010.jpeg -device cpu -topk 3
+
+# Inference with Intel GPU
+./infer_resnet50 -model ResNet50_vd_infer -image ILSVRC2012_val_00000010.jpeg -device intel_gpu -topk 3
+
+
+# Download PaddleDetection/PP-YOLOE model and test image
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+tar xvf ppyoloe_crn_l_300e_coco.tgz
+
+# Inference with CPU
+./infer_ppyoloe -model ppyoloe_crn_l_300e_coco -image 000000014439.jpg -device cpu
+
+# Inference with Intel GPU
+./infer_ppyoloe -model ppyoloe_crn_l_300e_coco -image 000000014439.jpg -device intel_gpu
+```
+
+这篇文档展示的是如何在Linux/Mac上编译和运行，如果你是使用Windows系统，请参考下面的文档进行使用
+
+- [Windows上使用FastDeploy C++ SDK](../../../docs/cn/faq/use_sdk_on_windows.md)
--- a/tutorials/intel_gpu/cpp/infer_ppyoloe.cc
+++ b/tutorials/intel_gpu/cpp/infer_ppyoloe.cc
@@ -0,0 +1,100 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+#include "gflags/gflags.h"
+
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+DEFINE_string(model, "", "Directory of the inference model");
+DEFINE_string(image, "", "Path of the image file.");
+
+DEFINE_string(device, "cpu", "Type of openvino device, 'cpu' or 'intel_gpu'");
+
+void InitAndInfer(const std::string& model_dir, const std::string& image_file, const fastdeploy::RuntimeOption& option) {
+  auto model_file = model_dir + sep + "model.pdmodel";
+  auto params_file = model_dir + sep + "model.pdiparams";
+  auto config_file = model_dir + sep + "infer_cfg.yml";
+
+  auto model = fastdeploy::vision::detection::PPYOLOE(
+      model_file, params_file, config_file, option);
+
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  std::cout << "Warmup 20 times..." << std::endl;
+  for (int i = 0; i < 20; ++i) {
+    fastdeploy::vision::DetectionResult res;
+    if (!model.Predict(im, &res)) {
+      std::cerr << "Failed to predict." << std::endl;
+      return;
+    }
+  }
+
+  std::cout << "Counting time..." << std::endl;
+  fastdeploy::TimeCounter tc;
+  tc.Start();
+  for (int i = 0; i < 50; ++i) {
+    fastdeploy::vision::DetectionResult res;
+    if (!model.Predict(im, &res)) {
+      std::cerr << "Failed to predict." << std::endl;
+      return;
+    }
+  }
+  tc.End();
+  std::cout << "Elapsed time: " << tc.Duration() * 1000 << "ms." << std::endl;
+
+  fastdeploy::vision::DetectionResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  cv::Mat vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+  cv::imwrite("vis_result.jpg", vis_im);
+  std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+fastdeploy::RuntimeOption BuildOption(const std::string& device) {
+  if (device != "cpu" && device != "intel_gpu") {
+    std::cerr << "The flag device only can be 'cpu' or 'intel_gpu'" << std::endl;
+    std::abort();
+  }
+  fastdeploy::RuntimeOption option;
+  option.UseOpenVINOBackend();
+  if (device == "intel_gpu") {
+    option.SetOpenVINODevice("HETERO:GPU,CPU");
+    std::map<std::string, std::vector<int64_t>> shape_info;
+    shape_info["image"] = {1, 3, 640, 640};
+    shape_info["scale_factor"] = {1, 2};
+    option.SetOpenVINOShapeInfo(shape_info);
+    option.SetOpenVINOCpuOperators({"MulticlassNms"});
+  }
+  return option;
+}
+
+int main(int argc, char* argv[]) {
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  auto option = BuildOption(FLAGS_device);
+  InitAndInfer(FLAGS_model, FLAGS_image, option);
+  return 0;
+}
--- a/tutorials/intel_gpu/cpp/infer_resnet50.cc
+++ b/tutorials/intel_gpu/cpp/infer_resnet50.cc
@@ -0,0 +1,100 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+#include "gflags/gflags.h"
+
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+DEFINE_string(model, "", "Directory of the inference model");
+DEFINE_string(image, "", "Path of the image file.");
+DEFINE_int64(topk, 1, "Topk classify result of the image file");
+
+DEFINE_string(device, "cpu", "Type of openvino device, 'cpu' or 'intel_gpu'");
+
+void InitAndInfer(const std::string& model_dir, const std::string& image_file, int topk, const fastdeploy::RuntimeOption& option) {
+  auto model_file = model_dir + sep + "inference.pdmodel";
+  auto params_file = model_dir + sep + "inference.pdiparams";
+  auto config_file = model_dir + sep + "inference_cls.yaml";
+
+  auto model = fastdeploy::vision::classification::PaddleClasModel(
+      model_file, params_file, config_file, option);
+
+  model.GetPostprocessor().SetTopk(topk);
+
+  if (!model.Initialized()) {
+    std::cerr << "Failed to initialize." << std::endl;
+    return;
+  }
+
+  auto im = cv::imread(image_file);
+
+  std::cout << "Warmup 20 times..." << std::endl;
+  for (int i = 0; i < 20; ++i) {
+    fastdeploy::vision::ClassifyResult res;
+    if (!model.Predict(im, &res)) {
+      std::cerr << "Failed to predict." << std::endl;
+      return;
+    }
+  }
+
+  std::cout << "Counting time..." << std::endl;
+  fastdeploy::TimeCounter tc;
+  tc.Start();
+  for (int i = 0; i < 50; ++i) {
+    fastdeploy::vision::ClassifyResult res;
+    if (!model.Predict(im, &res)) {
+      std::cerr << "Failed to predict." << std::endl;
+      return;
+    }
+  }
+  tc.End();
+  std::cout << "Elapsed time: " << tc.Duration() * 1000 << "ms." << std::endl;
+
+
+  fastdeploy::vision::ClassifyResult res;
+  if (!model.Predict(im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+  // print res
+  std::cout << res.Str() << std::endl;
+}
+
+fastdeploy::RuntimeOption BuildOption(const std::string& device) {
+  if (device != "cpu" && device != "intel_gpu") {
+    std::cerr << "The flag device only can be 'cpu' or 'intel_gpu'" << std::endl;
+    std::abort();
+  }
+  fastdeploy::RuntimeOption option;
+  option.UseOpenVINOBackend();
+  if (device == "intel_gpu") {
+    option.SetOpenVINODevice("GPU");
+    std::map<std::string, std::vector<int64_t>> shape_info;
+    shape_info["inputs"] = {1, 3, 224, 224};
+    option.SetOpenVINOShapeInfo(shape_info);
+  }
+  return option;
+}
+
+int main(int argc, char* argv[]) {
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  auto option = BuildOption(FLAGS_device);
+  InitAndInfer(FLAGS_model, FLAGS_image, FLAGS_topk, option);
+  return 0;
+}
--- a/tutorials/intel_gpu/python/README.md
+++ b/tutorials/intel_gpu/python/README.md
@@ -0,0 +1,38 @@
+English | [中文](README_CN.md)
+
+# PaddleClas Python Example
+
+Before deployment, confirm the following two steps
+
+- 1. The software and hardware environment meet the requirements. Refer to [FastDeploy Environment Requirements](../../../docs/en/build_and_install/download_prebuilt_libraries.md)
+- 2. Install FastDeploy Python wheel package. Refer to [Install FastDeploy](../../../docs/en/build_and_install/download_prebuilt_libraries.md)
+
+```bash
+# Get FastDeploy codes
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/tutorials/intel_gpu/python
+
+# Download PaddleClas model and test image
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ResNet50_vd_infer.tgz
+wget https://gitee.com/paddlepaddle/PaddleClas/raw/release/2.4/deploy/images/ImageNet/ILSVRC2012_val_00000010.jpeg
+tar -xvf ResNet50_vd_infer.tgz
+
+# Inference with CPU
+python infer_resnet50.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg --device cpu --topk 1
+
+# Inference with Intel GPU
+python infer_resnet50.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg --device intel_gpu --topk 1
+
+
+
+# Download PaddleDetection/PP-YOLOE model and test image
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+tar xvf ppyoloe_crn_l_300e_coco.tgz
+
+# Inference with CPU
+python infer_ppyoloe.py --model ppyoloe_crn_l_300e_coco --image 000000014439.jpg --device cpu
+
+# Inference with Intel GPU
+python infer_ppyoloe.py --model ppyoloe_crn_l_300e_coco --image 000000014439.jpg --device intel_gpu
+```
--- a/tutorials/intel_gpu/python/README_CN.md
+++ b/tutorials/intel_gpu/python/README_CN.md
@@ -0,0 +1,37 @@
+English | [中文](README_CN.md)
+
+# PaddleClas Python Example
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
+- 2. FastDeploy Python whl包安装，参考[FastDeploy Python安装](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+```bash
+# Get FastDeploy codes
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/tutorials/intel_gpu/python
+
+# Download model and test image
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ResNet50_vd_infer.tgz
+tar -xvf ResNet50_vd_infer.tgz
+wget https://gitee.com/paddlepaddle/PaddleClas/raw/release/2.4/deploy/images/ImageNet/ILSVRC2012_val_00000010.jpeg
+
+# Inference with CPU
+python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg --device cpu --topk 1
+
+# Inference with Intel GPU
+python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg --device intel_gpu --topk 1
+
+
+# Download PaddleDetection/PP-YOLOE model and test image
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+tar xvf ppyoloe_crn_l_300e_coco.tgz
+
+# Inference with CPU
+python infer_ppyoloe.py --model ppyoloe_crn_l_300e_coco --image 000000014439.jpg --device cpu
+
+# Inference with Intel GPU
+python infer_ppyoloe.py --model ppyoloe_crn_l_300e_coco --image 000000014439.jpg --device intel_gpu
+```
--- a/tutorials/intel_gpu/python/infer_ppyoloe.py
+++ b/tutorials/intel_gpu/python/infer_ppyoloe.py
@@ -0,0 +1,65 @@
+import fastdeploy as fd
+import cv2
+import os
+import time
+
+
+def parse_arguments():
+    import argparse
+    import ast
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model", required=True, help="Path of PP-YOLOE model.")
+    parser.add_argument(
+        "--image", type=str, required=True, help="Path of test image file.")
+    parser.add_argument(
+        "--device",
+        type=str,
+        default='cpu',
+        help="Type of inference device, support 'cpu' or 'intel_gpu'.")
+    return parser.parse_args()
+
+
+def build_option(args):
+    option = fd.RuntimeOption()
+    option.use_openvino_backend()
+
+    assert args.device.lower(
+    ) in ["cpu", "intel_gpu"], "--device only support ['cpu', 'intel_gpu']"
+
+    if args.device.lower() == "intel_gpu":
+        option.set_openvino_device("HETERO:GPU,CPU")
+        option.set_openvino_shape_info({
+            "image": [1, 3, 640, 640],
+            "scale_factor": [1, 2]
+        })
+        option.set_openvino_cpu_operators(["MulticlassNms"])
+    return option
+
+
+args = parse_arguments()
+
+runtime_option = build_option(args)
+
+model_file = os.path.join(args.model, "model.pdmodel")
+params_file = os.path.join(args.model, "model.pdiparams")
+config_file = os.path.join(args.model, "infer_cfg.yml")
+model = fd.vision.detection.PPYOLOE(
+    model_file, params_file, config_file, runtime_option=runtime_option)
+
+im = cv2.imread(args.image)
+
+print("Warmup 20 times...")
+for i in range(20):
+    result = model.predict(im)
+
+print("Counting time...")
+start = time.time()
+for i in range(50):
+    result = model.predict(im)
+end = time.time()
+print("Elapsed time: {}ms".format((end - start) * 1000))
+
+vis_im = fd.vision.vis_detection(im, result, score_threshold=0.5)
+cv2.imwrite("visualized_result.jpg", vis_im)
+print("Visualized result save in ./visualized_result.jpg")
--- a/tutorials/intel_gpu/python/infer_resnet50.py
+++ b/tutorials/intel_gpu/python/infer_resnet50.py
@@ -0,0 +1,61 @@
+import fastdeploy as fd
+import cv2
+import os
+import time
+
+
+def parse_arguments():
+    import argparse
+    import ast
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model", required=True, help="Path of PaddleClas model.")
+    parser.add_argument(
+        "--image", type=str, required=True, help="Path of test image file.")
+    parser.add_argument(
+        "--topk", type=int, default=1, help="Return topk results.")
+    parser.add_argument(
+        "--device",
+        type=str,
+        default='cpu',
+        help="Type of inference device, support 'cpu' or 'intel_gpu'.")
+    return parser.parse_args()
+
+
+def build_option(args):
+    option = fd.RuntimeOption()
+    option.use_openvino_backend()
+
+    assert args.device.lower(
+    ) in ["cpu", "intel_gpu"], "--device only support ['cpu', 'intel_gpu']"
+
+    if args.device.lower() == "intel_gpu":
+        option.set_openvino_device("GPU")
+        option.set_openvino_shape_info({"inputs": [1, 3, 224, 224]})
+
+    return option
+
+
+args = parse_arguments()
+
+runtime_option = build_option(args)
+
+model_file = os.path.join(args.model, "inference.pdmodel")
+params_file = os.path.join(args.model, "inference.pdiparams")
+config_file = os.path.join(args.model, "inference_cls.yaml")
+model = fd.vision.classification.PaddleClasModel(
+    model_file, params_file, config_file, runtime_option=runtime_option)
+
+im = cv2.imread(args.image)
+
+print("Warmup 20 times...")
+for i in range(20):
+    result = model.predict(im, args.topk)
+
+print("Counting time...")
+start = time.time()
+for i in range(50):
+    result = model.predict(im, args.topk)
+end = time.time()
+print("Elapsed time: {}ms".format((end - start) * 1000))
+print(result)