[Backend] Add KunlunXin XPU deploy support (#747)

* add xpu support

* fix docs

* update code

* update doc

* update code

* update yolov5

* update cmake

* add int64_t data support

* fix

* update download links

* add en doc

* update code

* update xpu options

* update doc

* update doc

* update doc

* update lib links

* update doc

* update code

* update lite xpu link

* update xpu lib

* update doc

* update en doc
This commit is contained in:
yeliang2258
2022-12-15 21:17:14 +08:00
committed by GitHub
parent 6e79df40d9
commit 5be839b322
39 changed files with 870 additions and 58 deletions

View File

@@ -40,7 +40,7 @@ if(NOT MSVC)
add_definitions(-D_GLIBCXX_USE_CXX11_ABI=1) add_definitions(-D_GLIBCXX_USE_CXX11_ABI=1)
endif(NOT MSVC) endif(NOT MSVC)
if(UNIX AND (NOT APPLE) AND (NOT ANDROID) AND (NOT ENABLE_TIMVX)) if(UNIX AND (NOT APPLE) AND (NOT ANDROID) AND (NOT WITH_TIMVX))
include(${PROJECT_SOURCE_DIR}/cmake/patchelf.cmake) include(${PROJECT_SOURCE_DIR}/cmake/patchelf.cmake)
endif() endif()
@@ -64,7 +64,8 @@ option(ENABLE_LITE_BACKEND "Whether to enable paddle lite backend." OFF)
option(ENABLE_VISION "Whether to enable vision models usage." OFF) option(ENABLE_VISION "Whether to enable vision models usage." OFF)
option(ENABLE_TEXT "Whether to enable text models usage." OFF) option(ENABLE_TEXT "Whether to enable text models usage." OFF)
option(ENABLE_FLYCV "Whether to enable flycv to boost image preprocess." OFF) option(ENABLE_FLYCV "Whether to enable flycv to boost image preprocess." OFF)
option(ENABLE_TIMVX "Whether to compile for TIMVX deploy." OFF) option(WITH_TIMVX "Whether to compile for TIMVX deploy." OFF)
option(WITH_XPU "Whether to compile for KunlunXin XPU deploy." OFF)
option(WITH_TESTING "Whether to compile with unittest." OFF) option(WITH_TESTING "Whether to compile with unittest." OFF)
############################# Options for Android cross compiling ######################### ############################# Options for Android cross compiling #########################
option(WITH_OPENCV_STATIC "Use OpenCV static lib for Android." OFF) option(WITH_OPENCV_STATIC "Use OpenCV static lib for Android." OFF)
@@ -138,10 +139,23 @@ set(HEAD_DIR "${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}")
include_directories(${HEAD_DIR}) include_directories(${HEAD_DIR})
include_directories(${CMAKE_CURRENT_BINARY_DIR}) include_directories(${CMAKE_CURRENT_BINARY_DIR})
if (ENABLE_TIMVX) if (WITH_TIMVX)
include(${PROJECT_SOURCE_DIR}/cmake/timvx.cmake) include(${PROJECT_SOURCE_DIR}/cmake/timvx.cmake)
endif() endif()
if (WITH_XPU)
if(NOT ENABLE_LITE_BACKEND)
message(WARNING "While compiling with -DWITH_XPU=ON, will force to set -DENABLE_LITE_BACKEND=ON")
set(ENABLE_LITE_BACKEND ON)
endif()
if(NOT CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64")
message(FATAL_ERROR "XPU is only supported on Linux x64 platform")
endif()
if(NOT PADDLELITE_URL)
set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/third_libs/lite-linux-x64-xpu-20221215.tgz")
endif()
endif()
if(ANDROID OR IOS) if(ANDROID OR IOS)
if(ENABLE_ORT_BACKEND) if(ENABLE_ORT_BACKEND)

View File

@@ -27,6 +27,7 @@ set(OPENCV_DIRECTORY "@OPENCV_DIRECTORY@")
set(ORT_DIRECTORY "@ORT_DIRECTORY@") set(ORT_DIRECTORY "@ORT_DIRECTORY@")
set(OPENVINO_DIRECTORY "@OPENVINO_DIRECTORY@") set(OPENVINO_DIRECTORY "@OPENVINO_DIRECTORY@")
set(RKNN2_TARGET_SOC "@RKNN2_TARGET_SOC@") set(RKNN2_TARGET_SOC "@RKNN2_TARGET_SOC@")
set(WITH_XPU @WITH_XPU@)
set(FASTDEPLOY_LIBS "") set(FASTDEPLOY_LIBS "")
set(FASTDEPLOY_INCS "") set(FASTDEPLOY_INCS "")
@@ -237,6 +238,10 @@ if(ENABLE_PADDLE_FRONTEND)
list(APPEND FASTDEPLOY_LIBS ${PADDLE2ONNX_LIB}) list(APPEND FASTDEPLOY_LIBS ${PADDLE2ONNX_LIB})
endif() endif()
if(WITH_XPU)
list(APPEND FASTDEPLOY_LIBS -lpthread -lrt -ldl)
endif()
remove_duplicate_libraries(FASTDEPLOY_LIBS) remove_duplicate_libraries(FASTDEPLOY_LIBS)
# Print compiler information # Print compiler information

View File

@@ -37,6 +37,8 @@ function(fastdeploy_summary)
message(STATUS " ENABLE_POROS_BACKEND : ${ENABLE_POROS_BACKEND}") message(STATUS " ENABLE_POROS_BACKEND : ${ENABLE_POROS_BACKEND}")
message(STATUS " ENABLE_TRT_BACKEND : ${ENABLE_TRT_BACKEND}") message(STATUS " ENABLE_TRT_BACKEND : ${ENABLE_TRT_BACKEND}")
message(STATUS " ENABLE_OPENVINO_BACKEND : ${ENABLE_OPENVINO_BACKEND}") message(STATUS " ENABLE_OPENVINO_BACKEND : ${ENABLE_OPENVINO_BACKEND}")
message(STATUS " WITH_TIMVX : ${WITH_TIMVX}")
message(STATUS " WITH_XPU : ${WITH_XPU}")
if(ENABLE_ORT_BACKEND) if(ENABLE_ORT_BACKEND)
message(STATUS " ONNXRuntime version : ${ONNXRUNTIME_VERSION}") message(STATUS " ONNXRuntime version : ${ONNXRUNTIME_VERSION}")
endif() endif()

1
docs/README_CN.md Normal file → Executable file
View File

@@ -8,6 +8,7 @@
- [GPU部署环境编译安装](cn/build_and_install/gpu.md) - [GPU部署环境编译安装](cn/build_and_install/gpu.md)
- [CPU部署环境编译安装](cn/build_and_install/cpu.md) - [CPU部署环境编译安装](cn/build_and_install/cpu.md)
- [IPU部署环境编译安装](cn/build_and_install/ipu.md) - [IPU部署环境编译安装](cn/build_and_install/ipu.md)
- [昆仑芯XPU部署环境编译安装](cn/build_and_install/xpu.md)
- [Jetson部署环境编译安装](cn/build_and_install/jetson.md) - [Jetson部署环境编译安装](cn/build_and_install/jetson.md)
- [Android平台部署环境编译安装](cn/build_and_install/android.md) - [Android平台部署环境编译安装](cn/build_and_install/android.md)
- [服务化部署镜像编译安装](../serving/docs/zh_CN/compile.md) - [服务化部署镜像编译安装](../serving/docs/zh_CN/compile.md)

5
docs/README_EN.md Normal file → Executable file
View File

@@ -8,6 +8,7 @@
- [Build and Install FastDeploy Library on GPU Platform](en/build_and_install/gpu.md) - [Build and Install FastDeploy Library on GPU Platform](en/build_and_install/gpu.md)
- [Build and Install FastDeploy Library on CPU Platform](en/build_and_install/cpu.md) - [Build and Install FastDeploy Library on CPU Platform](en/build_and_install/cpu.md)
- [Build and Install FastDeploy Library on IPU Platform](en/build_and_install/ipu.md) - [Build and Install FastDeploy Library on IPU Platform](en/build_and_install/ipu.md)
- [Build and Install FastDeploy Library on KunlunXin XPU Platform](en/build_and_install/xpu.md)
- [Build and Install FastDeploy Library on Nvidia Jetson Platform](en/build_and_install/jetson.md) - [Build and Install FastDeploy Library on Nvidia Jetson Platform](en/build_and_install/jetson.md)
- [Build and Install FastDeploy Library on Android Platform](en/build_and_install/android.md) - [Build and Install FastDeploy Library on Android Platform](en/build_and_install/android.md)
- [Build and Install FastDeploy Serving Deployment Image](../serving/docs/EN/compile-en.md) - [Build and Install FastDeploy Serving Deployment Image](../serving/docs/EN/compile-en.md)
@@ -19,10 +20,10 @@
- [A Quick Start on Runtime Python](en/quick_start/runtime/python.md) - [A Quick Start on Runtime Python](en/quick_start/runtime/python.md)
- [A Quick Start on Runtime C++](en/quick_start/runtime/cpp.md) - [A Quick Start on Runtime C++](en/quick_start/runtime/cpp.md)
## API ## API
- [Python API](https://baidu-paddle.github.io/fastdeploy-api/python/html/) - [Python API](https://baidu-paddle.github.io/fastdeploy-api/python/html/)
- [C++ API](https://baidu-paddle.github.io/fastdeploy-api/cpp/html/) - [C++ API](https://baidu-paddle.github.io/fastdeploy-api/cpp/html/)
- [Android Java API](../java/android) - [Android Java API](../java/android)
## Performance Optimization ## Performance Optimization

View File

@@ -13,6 +13,7 @@
- [Android平台部署环境](android.md) - [Android平台部署环境](android.md)
- [瑞芯微RV1126部署环境](rv1126.md) - [瑞芯微RV1126部署环境](rv1126.md)
- [晶晨A311D部署环境](a311d.md) - [晶晨A311D部署环境](a311d.md)
- [昆仑芯XPU部署环境](xpu.md)
## FastDeploy编译选项说明 ## FastDeploy编译选项说明
@@ -20,10 +21,11 @@
| 选项 | 说明 | | 选项 | 说明 |
|:------------------------|:--------------------------------------------------------------------------| |:------------------------|:--------------------------------------------------------------------------|
| ENABLE_ORT_BACKEND | 默认OFF, 是否编译集成ONNX Runtime后端(CPU/GPU上推荐打开) | | ENABLE_ORT_BACKEND | 默认OFF, 是否编译集成ONNX Runtime后端(CPU/GPU上推荐打开) |
| ENABLE_PADDLE_BACKEND | 默认OFF是否编译集成Paddle Inference后端(CPU/GPU上推荐打开) | | ENABLE_PADDLE_BACKEND | 默认OFF是否编译集成Paddle Inference后端(CPU/GPU上推荐打开) |
| ENABLE_LITE_BACKEND | 默认OFF是否编译集成Paddle Lite后端(编译Android库时需要设置为ON) | | ENABLE_LITE_BACKEND | 默认OFF是否编译集成Paddle Lite后端(编译Android库时需要设置为ON) |
| ENABLE_RKNPU2_BACKEND | 默认OFF是否编译集成RKNPU2后端(RK3588/RK3568/RK3566上推荐打开) | | ENABLE_RKNPU2_BACKEND | 默认OFF是否编译集成RKNPU2后端(RK3588/RK3568/RK3566上推荐打开) |
| WITH_TIMVX | 默认OFF需要在RV1126/RV1109/A311D上部署时需设置为ON | | WITH_XPU | 默认OFF当在昆仑芯XPU上部署时需设置为ON |
| WITH_TIMVX | 默认OFF需要在RV1126/RV1109/A311D上部署时需设置为ON |
| ENABLE_TRT_BACKEND | 默认OFF是否编译集成TensorRT后端(GPU上推荐打开) | | ENABLE_TRT_BACKEND | 默认OFF是否编译集成TensorRT后端(GPU上推荐打开) |
| ENABLE_OPENVINO_BACKEND | 默认OFF是否编译集成OpenVINO后端(CPU上推荐打开) | | ENABLE_OPENVINO_BACKEND | 默认OFF是否编译集成OpenVINO后端(CPU上推荐打开) |
| ENABLE_VISION | 默认OFF是否编译集成视觉模型的部署模块 | | ENABLE_VISION | 默认OFF是否编译集成视觉模型的部署模块 |

View File

@@ -9,7 +9,8 @@ FastDeploy 基于 Paddle-Lite 后端支持在晶晨 NPU 上进行部署推理。
|编译选项|默认值|说明|备注| |编译选项|默认值|说明|备注|
|:---|:---|:---|:---| |:---|:---|:---|:---|
|ENABLE_LITE_BACKEND|OFF|编译A311D部署库时需要设置为ON| - | |ENABLE_LITE_BACKEND|OFF|编译A311D部署库时需要设置为ON| - |
|WITH_TIMVX|OFF|编译A311D部署库时需要设置为ON| - | |WITH_TIMVX|OFF|编译A311D部署库时需要设置为ON| - |
|TARGET_ABI|NONE|编译RK库时需要设置为arm64| - |
更多编译选项请参考[FastDeploy编译选项说明](./README.md) 更多编译选项请参考[FastDeploy编译选项说明](./README.md)

View File

@@ -8,8 +8,9 @@ FastDeploy基于 Paddle-Lite 后端支持在瑞芯微RockchipSoc 上进行
相关编译选项说明如下: 相关编译选项说明如下:
|编译选项|默认值|说明|备注| |编译选项|默认值|说明|备注|
|:---|:---|:---|:---| |:---|:---|:---|:---|
|ENABLE_LITE_BACKEND|OFF|编译RK库时需要设置为ON| - | |ENABLE_LITE_BACKEND|OFF|编译RK库时需要设置为ON| - |
|WITH_TIMVX|OFF|编译RK库时需要设置为ON| - | |WITH_TIMVX|OFF|编译RK库时需要设置为ON| - |
|TARGET_ABI|NONE|编译RK库时需要设置为armhf| - |
更多编译选项请参考[FastDeploy编译选项说明](./README.md) 更多编译选项请参考[FastDeploy编译选项说明](./README.md)
@@ -86,7 +87,7 @@ dmesg | grep Galcore
wget https://paddlelite-demo.bj.bcebos.com/devices/generic/PaddleLite-generic-demo.tar.gz wget https://paddlelite-demo.bj.bcebos.com/devices/generic/PaddleLite-generic-demo.tar.gz
tar -xf PaddleLite-generic-demo.tar.gz tar -xf PaddleLite-generic-demo.tar.gz
``` ```
2. 使用 `uname -a` 查看 `Linux Kernel` 版本,确定为 `Linux` 系统 4.19.111 版本 2. 使用 `uname -a` 查看 `Linux Kernel` 版本,确定为 `Linux` 系统 4.19.111 版本
3.`PaddleLite-generic-demo/libs/PaddleLite/linux/armhf/lib/verisilicon_timvx/viv_sdk_6_4_6_5/lib/1126/4.19.111/` 路径下的 `galcore.ko` 上传至开发板。 3.`PaddleLite-generic-demo/libs/PaddleLite/linux/armhf/lib/verisilicon_timvx/viv_sdk_6_4_6_5/lib/1126/4.19.111/` 路径下的 `galcore.ko` 上传至开发板。
4. 登录开发板,命令行输入 `sudo rmmod galcore` 来卸载原始驱动,输入 `sudo insmod galcore.ko` 来加载传上设备的驱动。(是否需要 sudo 根据开发板实际情况,部分 adb 链接的设备请提前 adb root。此步骤如果操作失败请跳转至方法 2。 4. 登录开发板,命令行输入 `sudo rmmod galcore` 来卸载原始驱动,输入 `sudo insmod galcore.ko` 来加载传上设备的驱动。(是否需要 sudo 根据开发板实际情况,部分 adb 链接的设备请提前 adb root。此步骤如果操作失败请跳转至方法 2。

View File

@@ -0,0 +1,75 @@
# 昆仑芯 XPU 部署环境编译安装
FastDeploy 基于 Paddle-Lite 后端支持在昆仑芯 XPU 上进行部署推理。
更多详细的信息请参考:[PaddleLite部署示例](https://www.paddlepaddle.org.cn/lite/develop/demo_guides/kunlunxin_xpu.html#xpu)。
本文档介绍如何编译基于 PaddleLite 的 C++ FastDeploy 编译库。
相关编译选项说明如下:
|编译选项|默认值|说明|备注|
|:---|:---|:---|:---|
| WITH_XPU| OFF | 需要在XPU上部署时需要设置为ON | - |
| ENABLE_ORT_BACKEND | OFF | 是否编译集成ONNX Runtime后端 | - |
| ENABLE_PADDLE_BACKEND | OFF | 是否编译集成Paddle Inference后端 | - |
| ENABLE_OPENVINO_BACKEND | OFF | 是否编译集成OpenVINO后端 | - |
| ENABLE_VISION | OFF | 是否编译集成视觉模型的部署模块 | - |
| ENABLE_TEXT | OFF | 是否编译集成文本NLP模型的部署模块 | - |
第三方库依赖指定(不设定如下参数,会自动下载预编译库)
| 选项 | 说明 |
| :---------------------- | :--------------------------------------------------------------------------------------------- |
| ORT_DIRECTORY | 当开启ONNX Runtime后端时用于指定用户本地的ONNX Runtime库路径如果不指定编译过程会自动下载ONNX Runtime库 |
| OPENCV_DIRECTORY | 当ENABLE_VISION=ON时用于指定用户本地的OpenCV库路径如果不指定编译过程会自动下载OpenCV库 |
| OPENVINO_DIRECTORY | 当开启OpenVINO后端时, 用于指定用户本地的OpenVINO库路径如果不指定编译过程会自动下载OpenVINO库 |
更多编译选项请参考[FastDeploy编译选项说明](./README.md)
## 基于 PaddleLite 的 C++ FastDeploy 库编译
- OS: Linux
- gcc/g++: version >= 8.2
- cmake: version >= 3.15
此外更推荐开发者自行安装,编译时通过`-DOPENCV_DIRECTORY`来指定环境中的OpenCV如若不指定-DOPENCV_DIRECTORY会自动下载FastDeploy提供的预编译的OpenCV但在**Linux平台**无法支持Video的读取以及imshow等可视化界面功能
```
sudo apt-get install libopencv-dev
```
编译命令如下:
```bash
# Download the latest source code
git clone https://github.com/PaddlePaddle/FastDeploy.git
cd FastDeploy
mkdir build && cd build
# CMake configuration with KunlunXin xpu toolchain
cmake -DWITH_XPU=ON \
-DWITH_GPU=OFF \ # 不编译 GPU
-DENABLE_ORT_BACKEND=ON \ # 可选择开启 ORT 后端
-DENABLE_PADDLE_BACKEND=ON \ # 可选择开启 Paddle 后端
-DCMAKE_INSTALL_PREFIX=fastdeploy-xpu \
-DENABLE_VISION=ON \ # 是否编译集成视觉模型的部署模块,可选择开启
-DOPENCV_DIRECTORY=/usr/lib/x86_64-linux-gnu/cmake/opencv4 \
..
# Build FastDeploy KunlunXin XPU C++ SDK
make -j8
make install
```
编译完成之后,会生成 fastdeploy-xpu 目录,表示基于 PadddleLite 的 FastDeploy 库编译完成。
## Python 编译
编译命令如下:
```bash
git clone https://github.com/PaddlePaddle/FastDeploy.git
cd FastDeploy/python
export WITH_XPU=ON
export WITH_GPU=OFF
export ENABLE_ORT_BACKEND=ON
export ENABLE_PADDLE_BACKEND=ON
export ENABLE_VISION=ON
# OPENCV_DIRECTORY可选不指定会自动下载FastDeploy提供的预编译OpenCV库
export OPENCV_DIRECTORY=/usr/lib/x86_64-linux-gnu/cmake/opencv4
python setup.py build
python setup.py bdist_wheel
```
编译完成即会在 `FastDeploy/python/dist` 目录下生成编译后的 `wheel` 包,直接 pip install 即可
编译过程中,如若修改编译参数,为避免带来缓存影响,可删除 `FastDeploy/python` 目录下的 `build` 和 `.setuptools-cmake-build` 两个子目录后再重新编译

5
docs/en/build_and_install/README.md Normal file → Executable file
View File

@@ -12,6 +12,9 @@ English | [中文](../../cn/build_and_install/README.md)
- [Build and Install on IPU Platform](ipu.md) - [Build and Install on IPU Platform](ipu.md)
- [Build and Install on Nvidia Jetson Platform](jetson.md) - [Build and Install on Nvidia Jetson Platform](jetson.md)
- [Build and Install on Android Platform](android.md) - [Build and Install on Android Platform](android.md)
- [Build and Install on RV1126 Platform](rv1126.md)
- [Build and Install on A311D Platform](a311d.md)
- [Build and Install on KunlunXin XPU Platform](xpu.md)
## Build options ## Build options
@@ -25,6 +28,8 @@ English | [中文](../../cn/build_and_install/README.md)
| ENABLE_VISION | Default OFFwhether to enable vision models deployment module | | ENABLE_VISION | Default OFFwhether to enable vision models deployment module |
| ENABLE_TEXT | Default OFFwhether to enable text models deployment module | | ENABLE_TEXT | Default OFFwhether to enable text models deployment module |
| WITH_GPU | Default OFF, if build on GPU, this need to be ON | | WITH_GPU | Default OFF, if build on GPU, this need to be ON |
| WITH_XPU | Default OFFif deploy on KunlunXin XPUthis need to be ON |
| WITH_TIMVX | Default OFFif deploy on RV1126/RV1109/A311Dthis need to be ON |
| CUDA_DIRECTORY | Default /usr/local/cuda, if build on GPU, this defines the path of CUDA(>=11.2) | | CUDA_DIRECTORY | Default /usr/local/cuda, if build on GPU, this defines the path of CUDA(>=11.2) |
| TRT_DIRECTORY | If build with ENABLE_TRT_BACKEND=ON, this defines the path of TensorRT(>=8.4) | | TRT_DIRECTORY | If build with ENABLE_TRT_BACKEND=ON, this defines the path of TensorRT(>=8.4) |
| ORT_DIRECTORY | [Optional] If build with ENABLE_ORT_BACKEND=ON, this flag defines the path of ONNX Runtime, but if this flag is not set, it will download ONNX Runtime library automatically | | ORT_DIRECTORY | [Optional] If build with ENABLE_ORT_BACKEND=ON, this flag defines the path of ONNX Runtime, but if this flag is not set, it will download ONNX Runtime library automatically |

View File

@@ -0,0 +1,105 @@
# How to Build A311D Deployment Environment
FastDeploy supports AI deployment on Rockchip Soc based on Paddle-Lite backend. For more detailed information, please refer to: [PaddleLite Deployment Example](https://www.paddlepaddle.org.cn/lite/develop/demo_guides/verisilicon_timvx.html).
This document describes how to compile the PaddleLite-based C++ FastDeploy cross-compilation library.
The relevant compilation options are described as follows:
|Compile Options|Default Values|Description|Remarks|
|:---|:---|:---|:---|
|ENABLE_LITE_BACKEND|OFF|It needs to be set to ON when compiling the A311D library| - |
|WITH_TIMVX|OFF|It needs to be set to ON when compiling the A311D library| - |
|TARGET_ABI|NONE|It needs to be set to arm64 when compiling the A311D library| - |
For more compilation options, please refer to [Description of FastDeploy compilation options](./README.md)
## Cross-compilation environment construction
### Host Environment Requirements
- osUbuntu == 16.04
- cmake version >= 3.10.0
### Building the compilation environment
You can enter the FastDeploy/tools/timvx directory and use the following command to install:
```bash
cd FastDeploy/tools/timvx
bash install.sh
```
You can also install it with the following commands:
```bash
# 1. Install basic software
apt update
apt-get install -y --no-install-recommends \
gcc g++ git make wget python unzip
# 2. Install arm gcc toolchains
apt-get install -y --no-install-recommends \
g++-arm-linux-gnueabi gcc-arm-linux-gnueabi \
g++-arm-linux-gnueabihf gcc-arm-linux-gnueabihf \
gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
# 3. Install cmake 3.10 or above
wget -c https://mms-res.cdn.bcebos.com/cmake-3.10.3-Linux-x86_64.tar.gz && \
tar xzf cmake-3.10.3-Linux-x86_64.tar.gz && \
mv cmake-3.10.3-Linux-x86_64 /opt/cmake-3.10 && \
ln -s /opt/cmake-3.10/bin/cmake /usr/bin/cmake && \
ln -s /opt/cmake-3.10/bin/ccmake /usr/bin/ccmake
```
## FastDeploy cross-compilation library compilation based on PaddleLite
After setting up the cross-compilation environment, the compilation command is as follows:
```bash
# Download the latest source code
git clone https://github.com/PaddlePaddle/FastDeploy.git
cd FastDeploy
mkdir build && cd build
# CMake configuration with A311D toolchain
cmake -DCMAKE_TOOLCHAIN_FILE=./../cmake/toolchain.cmake \
-DWITH_TIMVX=ON \
-DTARGET_ABI=arm64 \
-DCMAKE_INSTALL_PREFIX=fastdeploy-tmivx \
-DENABLE_VISION=ON \ # 是否编译集成视觉模型的部署模块,可选择开启
-Wno-dev ..
# Build FastDeploy A311D C++ SDK
make -j8
make install
```
After the compilation is complete, the fastdeploy-tmivx directory will be generated, indicating that the FastDeploy library based on PadddleLite TIM-VX has been compiled.
## Prepare the Soc environment
Before deployment, ensure that the version of the driver galcore.so of the Verisilicon Linux Kernel NPU meets the requirements. Before deployment, please log in to the development board, and enter the following command through the command line to query the NPU driver version. The recommended version of the Rockchip driver is: 6.4.4.3
```bash
dmesg | grep Galcore
```
If the current version does not comply with the above, please read the following content carefully to ensure that the underlying NPU driver environment is correct.
There are two ways to modify the current NPU driver version:
1. Manually replace the NPU driver version. (recommend)
2. flash the machine, and flash the firmware that meets the requirements of the NPU driver version.
### Manually replace the NPU driver version
1. Use the following command to download and decompress the PaddleLite demo, which provides ready-made driver files
```bash
wget https://paddlelite-demo.bj.bcebos.com/devices/generic/PaddleLite-generic-demo.tar.gz
tar -xf PaddleLite-generic-demo.tar.gz
```
2. Use `uname -a` to check `Linux Kernel` version, it is determined to be version 4.19.111.
3. Upload `galcore.ko` under `PaddleLite-generic-demo/libs/PaddleLite/linux/arm64/lib/verisilicon_timvx/viv_sdk_6_4_4_3/lib/a311d/4.9.113` path to the development board.
4. Log in to the development board, enter `sudo rmmod galcore` on the command line to uninstall the original driver, and enter `sudo insmod galcore.ko` to load the uploaded device driver. (Whether sudo is needed depends on the actual situation of the development board. For some adb-linked devices, please adb root in advance). If this step fails, go to method 2.
5. Enter `dmesg | grep Galcore` in the development board to query the NPU driver version, and it is determined to be: 6.4.4.3
### flash
According to the specific development board model, ask the development board seller or the official website customer service for the firmware and flashing method corresponding to the 6.4.4.3 version of the NPU driver.
For more details, please refer to: [PaddleLite prepares the device environment](https://www.paddlepaddle.org.cn/lite/develop/demo_guides/verisilicon_timvx.html#zhunbeishebeihuanjing)
## Deployment example based on FastDeploy on A311D
1. For deploying the PaddleClas classification model on A311D, please refer to: [C++ deployment example of PaddleClas classification model on A311D](../../../examples/vision/classification/paddleclas/a311d/README.md)
2. For deploying PPYOLOE detection model on A311D, please refer to: [C++ deployment example of PPYOLOE detection model on A311D](../../../examples/vision/detection/paddledetection/a311d/README.md)
3. For deploying YOLOv5 detection model on A311D, please refer to: [C++ Deployment Example of YOLOv5 Detection Model on A311D](../../../examples/vision/detection/yolov5/a311d/README.md)
4. For deploying PP-LiteSeg segmentation model on A311D, please refer to: [C++ Deployment Example of PP-LiteSeg Segmentation Model on A311D](../../../examples/vision/segmentation/paddleseg/a311d/README.md)

View File

@@ -0,0 +1,105 @@
# How to Build RV1126 Deployment Environment
FastDeploy supports AI deployment on Rockchip Soc based on Paddle-Lite backend. For more detailed information, please refer to: [PaddleLite Deployment Example](https://www.paddlepaddle.org.cn/lite/develop/demo_guides/verisilicon_timvx.html).
This document describes how to compile the PaddleLite-based C++ FastDeploy cross-compilation library.
The relevant compilation options are described as follows:
|Compile Options|Default Values|Description|Remarks|
|:---|:---|:---|:---|
|ENABLE_LITE_BACKEND|OFF|It needs to be set to ON when compiling the RK library| - |
|WITH_TIMVX|OFF|It needs to be set to ON when compiling the RK library| - |
|TARGET_ABI|NONE|It needs to be set to armhf when compiling the RK library| - |
For more compilation options, please refer to [Description of FastDeploy compilation options](./README.md)
## Cross-compilation environment construction
### Host Environment Requirements
- osUbuntu == 16.04
- cmake version >= 3.10.0
### Building the compilation environment
You can enter the FastDeploy/tools/timvx directory and use the following command to install:
```bash
cd FastDeploy/tools/timvx
bash install.sh
```
You can also install it with the following commands:
```bash
# 1. Install basic software
apt update
apt-get install -y --no-install-recommends \
gcc g++ git make wget python unzip
# 2. Install arm gcc toolchains
apt-get install -y --no-install-recommends \
g++-arm-linux-gnueabi gcc-arm-linux-gnueabi \
g++-arm-linux-gnueabihf gcc-arm-linux-gnueabihf \
gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
# 3. Install cmake 3.10 or above
wget -c https://mms-res.cdn.bcebos.com/cmake-3.10.3-Linux-x86_64.tar.gz && \
tar xzf cmake-3.10.3-Linux-x86_64.tar.gz && \
mv cmake-3.10.3-Linux-x86_64 /opt/cmake-3.10 && \
ln -s /opt/cmake-3.10/bin/cmake /usr/bin/cmake && \
ln -s /opt/cmake-3.10/bin/ccmake /usr/bin/ccmake
```
## FastDeploy cross-compilation library compilation based on PaddleLite
After setting up the cross-compilation environment, the compilation command is as follows:
```bash
# Download the latest source code
git clone https://github.com/PaddlePaddle/FastDeploy.git
cd FastDeploy
mkdir build && cd build
# CMake configuration with RK toolchain
cmake -DCMAKE_TOOLCHAIN_FILE=./../cmake/toolchain.cmake \
-DWITH_TIMVX=ON \
-DTARGET_ABI=armhf \
-DCMAKE_INSTALL_PREFIX=fastdeploy-tmivx \
-DENABLE_VISION=ON \ # 是否编译集成视觉模型的部署模块,可选择开启
-Wno-dev ..
# Build FastDeploy RV1126 C++ SDK
make -j8
make install
```
After the compilation is complete, the fastdeploy-tmivx directory will be generated, indicating that the FastDeploy library based on PadddleLite TIM-VX has been compiled.
## Prepare the Soc environment
Before deployment, ensure that the version of the driver galcore.so of the Verisilicon Linux Kernel NPU meets the requirements. Before deployment, please log in to the development board, and enter the following command through the command line to query the NPU driver version. The recommended version of the Rockchip driver is: 6.4.6.5
```bash
dmesg | grep Galcore
```
If the current version does not comply with the above, please read the following content carefully to ensure that the underlying NPU driver environment is correct.
There are two ways to modify the current NPU driver version:
1. Manually replace the NPU driver version. (recommend)
2. flash the machine, and flash the firmware that meets the requirements of the NPU driver version.
### Manually replace the NPU driver version
1. Use the following command to download and decompress the PaddleLite demo, which provides ready-made driver files
```bash
wget https://paddlelite-demo.bj.bcebos.com/devices/generic/PaddleLite-generic-demo.tar.gz
tar -xf PaddleLite-generic-demo.tar.gz
```
2. Use `uname -a` to check `Linux Kernel` version, it is determined to be version 4.19.111.
3. Upload `galcore.ko` under `PaddleLite-generic-demo/libs/PaddleLite/linux/armhf/lib/verisilicon_timvx/viv_sdk_6_4_6_5/lib/1126/4.19.111/` path to the development board.
4. Log in to the development board, enter `sudo rmmod galcore` on the command line to uninstall the original driver, and enter `sudo insmod galcore.ko` to load the uploaded device driver. (Whether sudo is needed depends on the actual situation of the development board. For some adb-linked devices, please adb root in advance). If this step fails, go to method 2.
5. Enter `dmesg | grep Galcore` in the development board to query the NPU driver version, and it is determined to be: 6.4.6.5
### flash
According to the specific development board model, ask the development board seller or the official website customer service for the firmware and flashing method corresponding to the 6.4.6.5 version of the NPU driver.
For more details, please refer to: [PaddleLite prepares the device environment](https://www.paddlepaddle.org.cn/lite/develop/demo_guides/verisilicon_timvx.html#zhunbeishebeihuanjing)
## Deployment example based on FastDeploy on RV1126
1. For deploying the PaddleClas classification model on RV1126, please refer to: [C++ deployment example of PaddleClas classification model on RV1126](../../../examples/vision/classification/paddleclas/rv1126/README.md)
2. For deploying PPYOLOE detection model on RV1126, please refer to: [C++ deployment example of PPYOLOE detection model on RV1126](../../../examples/vision/detection/paddledetection/rv1126/README.md)
3. For deploying YOLOv5 detection model on RV1126, please refer to: [C++ Deployment Example of YOLOv5 Detection Model on RV1126](../../../examples/vision/detection/yolov5/rv1126/README.md)
4. For deploying PP-LiteSeg segmentation model on RV1126, please refer to: [C++ Deployment Example of PP-LiteSeg Segmentation Model on RV1126](../../../examples/vision/segmentation/paddleseg/rv1126/README.md)

View File

@@ -0,0 +1,78 @@
# How to Build KunlunXin XPU Deployment Environment
FastDeploy supports deployment AI on KunlunXin XPU based on Paddle-Lite backend. For more detailed information, please refer to: [PaddleLite Deployment Example](https://www.paddlepaddle.org.cn/lite/develop/demo_guides/kunlunxin_xpu.html#xpu)。
This document describes how to compile the C++ FastDeploy library based on PaddleLite.
The relevant compilation options are described as follows:
|Compile Options|Default Values|Description|Remarks|
|:---|:---|:---|:---|
| ENABLE_LITE_BACKEND | OFF | It needs to be set to ON when compiling the RK library| - |
| WITH_XPU | OFF | It needs to be set to ON when compiling the KunlunXin XPU library| - |
| ENABLE_ORT_BACKEND | OFF | whether to intergrate ONNX Runtime backend | - |
| ENABLE_PADDLE_BACKEND | OFF | whether to intergrate Paddle Inference backend | - |
| ENABLE_OPENVINO_BACKEND | OFF | whether to intergrate OpenVINO backend | - |
| ENABLE_VISION | OFF | whether to intergrate vision models | - |
| ENABLE_TEXT | OFF | whether to intergrate text models | - |
The configuration for third libraries(Optional, if the following option is not defined, the prebuilt third libraries will download automaticly while building FastDeploy).
| Option | Description |
| :---------------------- | :--------------------------------------------------------------------------------------------- |
| ORT_DIRECTORY | While ENABLE_ORT_BACKEND=ON, use ORT_DIRECTORY to specify your own ONNX Runtime library path. |
| OPENCV_DIRECTORY | While ENABLE_VISION=ON, use OPENCV_DIRECTORY to specify your own OpenCV library path. |
| OPENVINO_DIRECTORY | While ENABLE_OPENVINO_BACKEND=ON, use OPENVINO_DIRECTORY to specify your own OpenVINO library path. |
For more compilation options, please refer to [Description of FastDeploy compilation options](./README.md)
## C++ FastDeploy library compilation based on PaddleLite
- OS: Linux
- gcc/g++: version >= 8.2
- cmake: version >= 3.15
It it recommend install OpenCV library manually, and define `-DOPENCV_DIRECTORY` to set path of OpenCV library(If the flag is not defined, a prebuilt OpenCV library will be downloaded automaticly while building FastDeploy, but the prebuilt OpenCV cannot support reading video file or other function e.g `imshow`)
```
sudo apt-get install libopencv-dev
```
The compilation command is as follows:
```bash
# Download the latest source code
git clone https://github.com/PaddlePaddle/FastDeploy.git
cd FastDeploy
mkdir build && cd build
# CMake configuration with KunlunXin xpu toolchain
cmake -DWITH_XPU=ON \
-DWITH_GPU=OFF \
-DENABLE_ORT_BACKEND=ON \
-DENABLE_PADDLE_BACKEND=ON \
-DCMAKE_INSTALL_PREFIX=fastdeploy-xpu \
-DENABLE_VISION=ON \
-DOPENCV_DIRECTORY=/usr/lib/x86_64-linux-gnu/cmake/opencv4 \
..
# Build FastDeploy KunlunXin XPU C++ SDK
make -j8
make install
```
After the compilation is complete, the fastdeploy-xpu directory will be generated, indicating that the PadddleLite-based FastDeploy library has been compiled.
## Python compile
The compilation command is as follows:
```bash
git clone https://github.com/PaddlePaddle/FastDeploy.git
cd FastDeploy/python
export WITH_XPU=ON
export WITH_GPU=OFF
export ENABLE_ORT_BACKEND=ON
export ENABLE_PADDLE_BACKEND=ON
export ENABLE_VISION=ON
# The OPENCV_DIRECTORY is optional, if not exported, a prebuilt OpenCV library will be downloaded
export OPENCV_DIRECTORY=/usr/lib/x86_64-linux-gnu/cmake/opencv4
python setup.py build
python setup.py bdist_wheel
```
After the compilation is completed, the compiled `wheel` package will be generated in the `FastDeploy/python/dist` directory, just pip install it directly
During the compilation process, if you modify the compilation parameters, in order to avoid the cache impact, you can delete the two subdirectories `build` and `.setuptools-cmake-build` under the `FastDeploy/python` directory and then recompile.

View File

@@ -30,6 +30,10 @@ wget https://gitee.com/paddlepaddle/PaddleClas/raw/release/2.4/deploy/images/Ima
./infer_demo ResNet50_vd_infer ILSVRC2012_val_00000010.jpeg 1 ./infer_demo ResNet50_vd_infer ILSVRC2012_val_00000010.jpeg 1
# GPU上TensorRT推理 # GPU上TensorRT推理
./infer_demo ResNet50_vd_infer ILSVRC2012_val_00000010.jpeg 2 ./infer_demo ResNet50_vd_infer ILSVRC2012_val_00000010.jpeg 2
# IPU推理
./infer_demo ResNet50_vd_infer ILSVRC2012_val_00000010.jpeg 3
# KunlunXin XPU推理
./infer_demo ResNet50_vd_infer ILSVRC2012_val_00000010.jpeg 4
``` ```
以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考: 以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:

30
examples/vision/classification/paddleclas/cpp/infer.cc Normal file → Executable file
View File

@@ -96,6 +96,32 @@ void IpuInfer(const std::string& model_dir, const std::string& image_file) {
std::cout << res.Str() << std::endl; std::cout << res.Str() << std::endl;
} }
void XpuInfer(const std::string& model_dir, const std::string& image_file) {
auto model_file = model_dir + sep + "inference.pdmodel";
auto params_file = model_dir + sep + "inference.pdiparams";
auto config_file = model_dir + sep + "inference_cls.yaml";
auto option = fastdeploy::RuntimeOption();
option.UseXpu();
auto model = fastdeploy::vision::classification::PaddleClasModel(
model_file, params_file, config_file, option);
if (!model.Initialized()) {
std::cerr << "Failed to initialize." << std::endl;
return;
}
auto im = cv::imread(image_file);
fastdeploy::vision::ClassifyResult res;
if (!model.Predict(im, &res)) {
std::cerr << "Failed to predict." << std::endl;
return;
}
// print res
std::cout << res.Str() << std::endl;
}
void TrtInfer(const std::string& model_dir, const std::string& image_file) { void TrtInfer(const std::string& model_dir, const std::string& image_file) {
auto model_file = model_dir + sep + "inference.pdmodel"; auto model_file = model_dir + sep + "inference.pdmodel";
auto params_file = model_dir + sep + "inference.pdiparams"; auto params_file = model_dir + sep + "inference.pdiparams";
@@ -128,7 +154,7 @@ int main(int argc, char* argv[]) {
"e.g ./infer_demo ./ResNet50_vd ./test.jpeg 0" "e.g ./infer_demo ./ResNet50_vd ./test.jpeg 0"
<< std::endl; << std::endl;
std::cout << "The data type of run_option is int, 0: run with cpu; 1: run " std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
"with gpu; 2: run with gpu and use tensorrt backend." "with gpu; 2: run with gpu and use tensorrt backend; 3: run with ipu; 4: run with xpu."
<< std::endl; << std::endl;
return -1; return -1;
} }
@@ -141,6 +167,8 @@ int main(int argc, char* argv[]) {
TrtInfer(argv[1], argv[2]); TrtInfer(argv[1], argv[2]);
} else if (std::atoi(argv[3]) == 3) { } else if (std::atoi(argv[3]) == 3) {
IpuInfer(argv[1], argv[2]); IpuInfer(argv[1], argv[2]);
} else if (std::atoi(argv[3]) == 4) {
XpuInfer(argv[1], argv[2]);
} }
return 0; return 0;
} }

View File

@@ -25,6 +25,8 @@ python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg -
python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg --device gpu --use_trt True --topk 1 python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg --device gpu --use_trt True --topk 1
# IPU推理注意IPU推理首次运行会有序列化模型的操作有一定耗时需要耐心等待 # IPU推理注意IPU推理首次运行会有序列化模型的操作有一定耗时需要耐心等待
python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg --device ipu --topk 1 python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg --device ipu --topk 1
# XPU推理
python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg --device xpu --topk 1
``` ```
运行完成后返回结果如下所示 运行完成后返回结果如下所示

View File

@@ -35,6 +35,9 @@ def build_option(args):
if args.device.lower() == "ipu": if args.device.lower() == "ipu":
option.use_ipu() option.use_ipu()
if args.device.lower() == "xpu":
option.use_xpu()
if args.use_trt: if args.use_trt:
option.use_trt_backend() option.use_trt_backend()
return option return option

4
examples/vision/detection/yolov5/cpp/CMakeLists.txt Normal file → Executable file
View File

@@ -12,3 +12,7 @@ include_directories(${FASTDEPLOY_INCS})
add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc) add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
# 添加FastDeploy库依赖 # 添加FastDeploy库依赖
target_link_libraries(infer_demo ${FASTDEPLOY_LIBS}) target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
add_executable(infer_paddle_demo ${PROJECT_SOURCE_DIR}/infer_paddle_model.cc)
# 添加FastDeploy库依赖
target_link_libraries(infer_paddle_demo ${FASTDEPLOY_LIBS})

24
examples/vision/detection/yolov5/cpp/README.md Normal file → Executable file
View File

@@ -12,16 +12,33 @@
```bash ```bash
mkdir build mkdir build
cd build cd build
# 下载FastDeploy预编译库用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用 # 下载 FastDeploy 预编译库,用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
tar xvf fastdeploy-linux-x64-x.x.x.tgz tar xvf fastdeploy-linux-x64-x.x.x.tgz
cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
make -j make -j
#下载官方转换好的yolov5模型文件和测试图片 #下载官方转换好的 yolov5 Paddle 模型文件和测试图片
wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov5s.onnx wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov5s_infer.tar
tar -xvf yolov5s_infer.tar
wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
# CPU推理
./infer_paddle_demo yolov5s_infer 000000014439.jpg 0
# GPU推理
./infer_paddle_demo yolov5s_infer 000000014439.jpg 1
# GPU上TensorRT推理
./infer_paddle_demo yolov5s_infer 000000014439.jpg 2
# XPU推理
./infer_paddle_demo yolov5s_infer 000000014439.jpg 3
```
上述的模型为 Paddle 模型的推理,如果想要做 ONNX 模型的推理,可以按照如下步骤:
```bash
# 1. 下载官方转换好的 yolov5 ONNX 模型文件和测试图片
wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov5s.onnx
wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
# CPU推理 # CPU推理
./infer_demo yolov5s.onnx 000000014439.jpg 0 ./infer_demo yolov5s.onnx 000000014439.jpg 0
# GPU推理 # GPU推理
@@ -29,7 +46,6 @@ wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/0000000
# GPU上TensorRT推理 # GPU上TensorRT推理
./infer_demo yolov5s.onnx 000000014439.jpg 2 ./infer_demo yolov5s.onnx 000000014439.jpg 2
``` ```
运行完成可视化结果如下图所示 运行完成可视化结果如下图所示
<img width="640" src="https://user-images.githubusercontent.com/67993288/184309358-d803347a-8981-44b6-b589-4608021ad0f4.jpg"> <img width="640" src="https://user-images.githubusercontent.com/67993288/184309358-d803347a-8981-44b6-b589-4608021ad0f4.jpg">

2
examples/vision/detection/yolov5/cpp/infer.cc Normal file → Executable file
View File

@@ -102,4 +102,4 @@ int main(int argc, char* argv[]) {
TrtInfer(argv[1], argv[2]); TrtInfer(argv[1], argv[2]);
} }
return 0; return 0;
} }

View File

@@ -0,0 +1,154 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/vision.h"
#ifdef WIN32
const char sep = '\\';
#else
const char sep = '/';
#endif
void CpuInfer(const std::string& model_dir, const std::string& image_file) {
auto model_file = model_dir + sep + "model.pdmodel";
auto params_file = model_dir + sep + "model.pdiparams";
fastdeploy::RuntimeOption option;
option.UseCpu();
auto model = fastdeploy::vision::detection::YOLOv5(
model_file, params_file, option, fastdeploy::ModelFormat::PADDLE);
if (!model.Initialized()) {
std::cerr << "Failed to initialize." << std::endl;
return;
}
auto im = cv::imread(image_file);
fastdeploy::vision::DetectionResult res;
if (!model.Predict(im, &res)) {
std::cerr << "Failed to predict." << std::endl;
return;
}
std::cout << res.Str() << std::endl;
auto vis_im = fastdeploy::vision::VisDetection(im, res);
cv::imwrite("vis_result.jpg", vis_im);
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
}
void GpuInfer(const std::string& model_dir, const std::string& image_file) {
auto model_file = model_dir + sep + "model.pdmodel";
auto params_file = model_dir + sep + "model.pdiparams";
auto option = fastdeploy::RuntimeOption();
option.UseGpu();
auto model = fastdeploy::vision::detection::YOLOv5(
model_file, params_file, option, fastdeploy::ModelFormat::PADDLE);
if (!model.Initialized()) {
std::cerr << "Failed to initialize." << std::endl;
return;
}
auto im = cv::imread(image_file);
fastdeploy::vision::DetectionResult res;
if (!model.Predict(im, &res)) {
std::cerr << "Failed to predict." << std::endl;
return;
}
std::cout << res.Str() << std::endl;
auto vis_im = fastdeploy::vision::VisDetection(im, res);
cv::imwrite("vis_result.jpg", vis_im);
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
}
void TrtInfer(const std::string& model_dir, const std::string& image_file) {
auto model_file = model_dir + sep + "model.pdmodel";
auto params_file = model_dir + sep + "model.pdiparams";
auto option = fastdeploy::RuntimeOption();
option.UseGpu();
option.UseTrtBackend();
option.SetTrtInputShape("images", {1, 3, 640, 640});
auto model = fastdeploy::vision::detection::YOLOv5(
model_file, params_file, option, fastdeploy::ModelFormat::PADDLE);
if (!model.Initialized()) {
std::cerr << "Failed to initialize." << std::endl;
return;
}
auto im = cv::imread(image_file);
fastdeploy::vision::DetectionResult res;
if (!model.Predict(im, &res)) {
std::cerr << "Failed to predict." << std::endl;
return;
}
std::cout << res.Str() << std::endl;
auto vis_im = fastdeploy::vision::Visualize::VisDetection(im, res);
cv::imwrite("vis_result.jpg", vis_im);
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
}
void XpuInfer(const std::string& model_dir, const std::string& image_file) {
auto model_file = model_dir + sep + "model.pdmodel";
auto params_file = model_dir + sep + "model.pdiparams";
fastdeploy::RuntimeOption option;
option.UseXpu();
auto model = fastdeploy::vision::detection::YOLOv5(
model_file, params_file, option, fastdeploy::ModelFormat::PADDLE);
if (!model.Initialized()) {
std::cerr << "Failed to initialize." << std::endl;
return;
}
auto im = cv::imread(image_file);
fastdeploy::vision::DetectionResult res;
if (!model.Predict(im, &res)) {
std::cerr << "Failed to predict." << std::endl;
return;
}
std::cout << res.Str() << std::endl;
auto vis_im = fastdeploy::vision::VisDetection(im, res);
cv::imwrite("vis_result.jpg", vis_im);
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
}
int main(int argc, char* argv[]) {
if (argc < 4) {
std::cout << "Usage: infer_demo path/to/model path/to/image run_option, "
"e.g ./infer_model ./yolov5s_infer ./test.jpeg 0"
<< std::endl;
std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
"with gpu; 2: run with gpu and use tensorrt backend; 3: run with KunlunXin XPU."
<< std::endl;
return -1;
}
if (std::atoi(argv[3]) == 0) {
CpuInfer(argv[1], argv[2]);
} else if (std::atoi(argv[3]) == 1) {
GpuInfer(argv[1], argv[2]);
} else if (std::atoi(argv[3]) == 2) {
TrtInfer(argv[1], argv[2]);
} else if (std::atoi(argv[3]) == 3) {
XpuInfer(argv[1], argv[2]);
}
return 0;
}

11
examples/vision/detection/yolov5/python/README.md Normal file → Executable file
View File

@@ -13,15 +13,18 @@ git clone https://github.com/PaddlePaddle/FastDeploy.git
cd examples/vision/detection/yolov5/python/ cd examples/vision/detection/yolov5/python/
#下载yolov5模型文件和测试图片 #下载yolov5模型文件和测试图片
wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov5s.onnx wget https://bj.bcebos.com/paddlehub/fastdeploy/yolov5s_infer.tar
tar -xf yolov5s_infer.tar
wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
# CPU推理 # CPU推理
python infer.py --model yolov5s.onnx --image 000000014439.jpg --device cpu python infer.py --model yolov5s_infer --image 000000014439.jpg --device cpu
# GPU推理 # GPU推理
python infer.py --model yolov5s.onnx --image 000000014439.jpg --device gpu python infer.py --model yolov5s_infer --image 000000014439.jpg --device gpu
# GPU上使用TensorRT推理 # GPU上使用TensorRT推理
python infer.py --model yolov5s.onnx --image 000000014439.jpg --device gpu --use_trt True python infer.py --model yolov5s_infer --image 000000014439.jpg --device gpu --use_trt True
# XPU推理
python infer.py --model yolov5s_infer --image 000000014439.jpg --device xpu
``` ```
运行完成可视化结果如下图所示 运行完成可视化结果如下图所示

21
examples/vision/detection/yolov5/python/infer.py Normal file → Executable file
View File

@@ -1,20 +1,20 @@
import fastdeploy as fd import fastdeploy as fd
import cv2 import cv2
import os
def parse_arguments(): def parse_arguments():
import argparse import argparse
import ast import ast
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument( parser.add_argument("--model", default=None, help="Path of yolov5 model.")
"--model", default=None, help="Path of yolov5 onnx model.")
parser.add_argument( parser.add_argument(
"--image", default=None, help="Path of test image file.") "--image", default=None, help="Path of test image file.")
parser.add_argument( parser.add_argument(
"--device", "--device",
type=str, type=str,
default='cpu', default='cpu',
help="Type of inference device, support 'cpu' or 'gpu'.") help="Type of inference device, support 'cpu' or 'gpu' or 'xpu'.")
parser.add_argument( parser.add_argument(
"--use_trt", "--use_trt",
type=ast.literal_eval, type=ast.literal_eval,
@@ -25,6 +25,8 @@ def parse_arguments():
def build_option(args): def build_option(args):
option = fd.RuntimeOption() option = fd.RuntimeOption()
if args.device.lower() == "xpu":
option.use_xpu()
if args.device.lower() == "gpu": if args.device.lower() == "gpu":
option.use_gpu() option.use_gpu()
@@ -37,14 +39,15 @@ def build_option(args):
args = parse_arguments() args = parse_arguments()
if args.model is None:
model = fd.download_model(name='YOLOv5s')
else:
model = args.model
# 配置runtime加载模型 # 配置runtime加载模型
runtime_option = build_option(args) runtime_option = build_option(args)
model = fd.vision.detection.YOLOv5(model, runtime_option=runtime_option) model_file = os.path.join(args.model, "model.pdmodel")
params_file = os.path.join(args.model, "model.pdiparams")
model = fd.vision.detection.YOLOv5(
model_file,
params_file,
runtime_option=runtime_option,
model_format=fd.ModelFormat.PADDLE)
# 预测图片检测结果 # 预测图片检测结果
if args.image is None: if args.image is None:

View File

@@ -43,24 +43,33 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
option_ = option; option_ = option;
std::vector<paddle::lite_api::Place> valid_places; std::vector<paddle::lite_api::Place> valid_places;
if (option_.enable_int8) { if (option_.enable_int8) {
valid_places.push_back( if(option_.enable_xpu) {
valid_places.push_back(
paddle::lite_api::Place{TARGET(kXPU), PRECISION(kInt8)});
} else {
valid_places.push_back(
paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)}); paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)});
}
FDINFO << "Lite::Backend enable_int8 option is ON ! Lite::Backend will " FDINFO << "Lite::Backend enable_int8 option is ON ! Lite::Backend will "
<< "inference with int8 precision!" << std::endl; << "inference with int8 precision!" << std::endl;
} }
if (option_.enable_fp16) { if (option_.enable_fp16) {
paddle::lite_api::MobileConfig check_fp16_config; if(option_.enable_xpu){
// Determine whether the device supports the FP16
// instruction set (or whether it is an arm device
// of the armv8.2 architecture)
supported_fp16_ = check_fp16_config.check_fp16_valid();
if (supported_fp16_) {
valid_places.push_back( valid_places.push_back(
paddle::lite_api::Place{TARGET(kARM), PRECISION(kFP16)}); paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFP16)});
FDINFO << "Your device is supported fp16 ! Lite::Backend will "
<< "inference with fp16 precision!" << std::endl;
} else { } else {
FDWARNING << "This device is not supported fp16, will skip fp16 option."; paddle::lite_api::MobileConfig check_fp16_config;
// Determine whether the device supports the FP16
// instruction set (or whether it is an arm device
// of the armv8.2 architecture)
supported_fp16_ = check_fp16_config.check_fp16_valid();
if (supported_fp16_) {
valid_places.push_back(
paddle::lite_api::Place{TARGET(kARM), PRECISION(kFP16)});
FDINFO << "The device supports FP16, Lite::Backend will inference with FP16 precision." << std::endl;
} else {
FDWARNING << "The device doesn't support FP16, will fallback to FP32.";
}
} }
} }
if (!option_.nnadapter_subgraph_partition_config_path.empty()) { if (!option_.nnadapter_subgraph_partition_config_path.empty()) {
@@ -81,8 +90,24 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
valid_places.push_back( valid_places.push_back(
paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)}); paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)});
} }
valid_places.push_back(
if(option_.enable_xpu){
valid_places.push_back(
paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFloat)});
valid_places.push_back(
paddle::lite_api::Place{TARGET(kX86), PRECISION(kFloat)});
config_.set_xpu_dev_per_thread(option_.device_id);
config_.set_xpu_workspace_l3_size_per_thread(option_.xpu_l3_workspace_size);
config_.set_xpu_l3_cache_method(option_.xpu_l3_workspace_size, option_.xpu_locked);
config_.set_xpu_conv_autotune(option_.xpu_autotune, option_.xpu_autotune_file);
config_.set_xpu_multi_encoder_method(option_.xpu_precision, option_.xpu_adaptive_seqlen);
if (option_.xpu_enable_multi_stream) {
config_.enable_xpu_multi_stream();
}
} else {
valid_places.push_back(
paddle::lite_api::Place{TARGET(kARM), PRECISION(kFloat)}); paddle::lite_api::Place{TARGET(kARM), PRECISION(kFloat)});
}
config_.set_valid_places(valid_places); config_.set_valid_places(valid_places);
if (option_.threads > 0) { if (option_.threads > 0) {
config_.set_threads(option_.threads); config_.set_threads(option_.threads);
@@ -160,7 +185,9 @@ bool LiteBackend::InitFromPaddle(const std::string& model_file,
auto shape = tensor->shape(); auto shape = tensor->shape();
info.shape.assign(shape.begin(), shape.end()); info.shape.assign(shape.begin(), shape.end());
info.name = output_names[i]; info.name = output_names[i];
info.dtype = LiteDataTypeToFD(tensor->precision()); if(!option_.enable_xpu){
info.dtype = LiteDataTypeToFD(tensor->precision());
}
outputs_desc_.emplace_back(info); outputs_desc_.emplace_back(info);
} }
@@ -239,6 +266,9 @@ bool LiteBackend::Infer(std::vector<FDTensor>& inputs,
outputs->resize(outputs_desc_.size()); outputs->resize(outputs_desc_.size());
for (size_t i = 0; i < outputs_desc_.size(); ++i) { for (size_t i = 0; i < outputs_desc_.size(); ++i) {
auto tensor = predictor_->GetOutput(i); auto tensor = predictor_->GetOutput(i);
if(outputs_desc_[i].dtype != LiteDataTypeToFD(tensor->precision())){
outputs_desc_[i].dtype = LiteDataTypeToFD(tensor->precision());
}
(*outputs)[i].Resize(tensor->shape(), outputs_desc_[i].dtype, (*outputs)[i].Resize(tensor->shape(), outputs_desc_[i].dtype,
outputs_desc_[i].name); outputs_desc_[i].name);
memcpy((*outputs)[i].MutableData(), tensor->data<void>(), memcpy((*outputs)[i].MutableData(), tensor->data<void>(),

View File

@@ -45,6 +45,15 @@ struct LiteBackendOption {
// Such as fp16, different device target (kARM/kXPU/kNPU/...) // Such as fp16, different device target (kARM/kXPU/kNPU/...)
std::string nnadapter_subgraph_partition_config_path = ""; std::string nnadapter_subgraph_partition_config_path = "";
bool enable_timvx = false; bool enable_timvx = false;
bool enable_xpu = false;
int device_id = 0;
int xpu_l3_workspace_size = 0xfffc00;
bool xpu_locked = false;
bool xpu_autotune = true;
std::string xpu_autotune_file = "";
std::string xpu_precision = "int16";
bool xpu_adaptive_seqlen = false;
bool xpu_enable_multi_stream = false;
}; };
// Convert data type from paddle lite to fastdeploy // Convert data type from paddle lite to fastdeploy

View File

@@ -62,6 +62,9 @@ std::string Str(const Device& d) {
case Device::TIMVX: case Device::TIMVX:
out = "Device::TIMVX"; out = "Device::TIMVX";
break; break;
case Device::XPU:
out = "Device::XPU";
break;
default: default:
out = "Device::UNKOWN"; out = "Device::UNKOWN";
} }
@@ -82,6 +85,9 @@ std::ostream& operator<<(std::ostream& out,const Device& d){
case Device::TIMVX: case Device::TIMVX:
out << "Device::TIMVX"; out << "Device::TIMVX";
break; break;
case Device::XPU:
out << "Device::XPU";
break;
default: default:
out << "Device::UNKOWN"; out << "Device::UNKOWN";
} }

View File

@@ -22,7 +22,7 @@
namespace fastdeploy { namespace fastdeploy {
enum FASTDEPLOY_DECL Device { CPU, GPU, RKNPU, IPU, TIMVX}; enum FASTDEPLOY_DECL Device { CPU, GPU, RKNPU, IPU, TIMVX, XPU};
FASTDEPLOY_DECL std::string Str(const Device& d); FASTDEPLOY_DECL std::string Str(const Device& d);

View File

@@ -51,6 +51,7 @@ bool FastDeployModel::InitRuntimeWithSpecifiedBackend() {
bool use_ipu = (runtime_option.device == Device::IPU); bool use_ipu = (runtime_option.device == Device::IPU);
bool use_rknpu = (runtime_option.device == Device::RKNPU); bool use_rknpu = (runtime_option.device == Device::RKNPU);
bool use_timvx = (runtime_option.device == Device::TIMVX); bool use_timvx = (runtime_option.device == Device::TIMVX);
bool use_xpu = (runtime_option.device == Device::XPU);
if (use_gpu) { if (use_gpu) {
if (!IsSupported(valid_gpu_backends, runtime_option.backend)) { if (!IsSupported(valid_gpu_backends, runtime_option.backend)) {
@@ -67,6 +68,11 @@ bool FastDeployModel::InitRuntimeWithSpecifiedBackend() {
FDERROR << "The valid timvx backends of model " << ModelName() << " are " << Str(valid_timvx_backends) << ", " << runtime_option.backend << " is not supported." << std::endl; FDERROR << "The valid timvx backends of model " << ModelName() << " are " << Str(valid_timvx_backends) << ", " << runtime_option.backend << " is not supported." << std::endl;
return false; return false;
} }
} else if (use_xpu) {
if (!IsSupported(valid_xpu_backends, runtime_option.backend)) {
FDERROR << "The valid xpu backends of model " << ModelName() << " are " << Str(valid_xpu_backends) << ", " << runtime_option.backend << " is not supported." << std::endl;
return false;
}
} else if(use_ipu) { } else if(use_ipu) {
if (!IsSupported(valid_ipu_backends, runtime_option.backend)) { if (!IsSupported(valid_ipu_backends, runtime_option.backend)) {
FDERROR << "The valid ipu backends of model " << ModelName() << " are " << Str(valid_ipu_backends) << ", " << runtime_option.backend << " is not supported." << std::endl; FDERROR << "The valid ipu backends of model " << ModelName() << " are " << Str(valid_ipu_backends) << ", " << runtime_option.backend << " is not supported." << std::endl;
@@ -102,6 +108,8 @@ bool FastDeployModel::InitRuntimeWithSpecifiedDevice() {
return CreateRKNPUBackend(); return CreateRKNPUBackend();
} else if (runtime_option.device == Device::TIMVX) { } else if (runtime_option.device == Device::TIMVX) {
return CreateTimVXBackend(); return CreateTimVXBackend();
} else if (runtime_option.device == Device::XPU) {
return CreateXPUBackend();
} else if (runtime_option.device == Device::IPU) { } else if (runtime_option.device == Device::IPU) {
#ifdef WITH_IPU #ifdef WITH_IPU
return CreateIpuBackend(); return CreateIpuBackend();
@@ -111,7 +119,7 @@ bool FastDeployModel::InitRuntimeWithSpecifiedDevice() {
return false; return false;
#endif #endif
} }
FDERROR << "Only support CPU/GPU/IPU/RKNPU/TIMVX now." << std::endl; FDERROR << "Only support CPU/GPU/IPU/RKNPU/TIMVX/XPU now." << std::endl;
return false; return false;
} }
@@ -225,6 +233,29 @@ bool FastDeployModel::CreateTimVXBackend() {
return false; return false;
} }
bool FastDeployModel::CreateXPUBackend() {
if (valid_xpu_backends.size() == 0) {
FDERROR << "There's no valid xpu backends for model: " << ModelName()
<< std::endl;
return false;
}
for (size_t i = 0; i < valid_xpu_backends.size(); ++i) {
if (!IsBackendAvailable(valid_xpu_backends[i])) {
continue;
}
runtime_option.backend = valid_xpu_backends[i];
runtime_ = std::unique_ptr<Runtime>(new Runtime());
if (!runtime_->Init(runtime_option)) {
return false;
}
runtime_initialized_ = true;
return true;
}
FDERROR << "Found no valid backend for model: " << ModelName() << std::endl;
return false;
}
bool FastDeployModel::CreateIpuBackend() { bool FastDeployModel::CreateIpuBackend() {
if (valid_ipu_backends.size() == 0) { if (valid_ipu_backends.size() == 0) {
FDERROR << "There's no valid ipu backends for model: " << ModelName() FDERROR << "There's no valid ipu backends for model: " << ModelName()

View File

@@ -45,6 +45,9 @@ class FASTDEPLOY_DECL FastDeployModel {
/** Model's valid timvx backends. This member defined all the timvx backends have successfully tested for the model /** Model's valid timvx backends. This member defined all the timvx backends have successfully tested for the model
*/ */
std::vector<Backend> valid_timvx_backends = {}; std::vector<Backend> valid_timvx_backends = {};
/** Model's valid KunlunXin xpu backends. This member defined all the KunlunXin xpu backends have successfully tested for the model
*/
std::vector<Backend> valid_xpu_backends = {};
/** Model's valid hardware backends. This member defined all the gpu backends have successfully tested for the model /** Model's valid hardware backends. This member defined all the gpu backends have successfully tested for the model
*/ */
std::vector<Backend> valid_rknpu_backends = {}; std::vector<Backend> valid_rknpu_backends = {};
@@ -143,6 +146,7 @@ class FASTDEPLOY_DECL FastDeployModel {
bool CreateIpuBackend(); bool CreateIpuBackend();
bool CreateRKNPUBackend(); bool CreateRKNPUBackend();
bool CreateTimVXBackend(); bool CreateTimVXBackend();
bool CreateXPUBackend();
std::shared_ptr<Runtime> runtime_; std::shared_ptr<Runtime> runtime_;
bool runtime_initialized_ = false; bool runtime_initialized_ = false;

17
fastdeploy/pybind/runtime.cc Normal file → Executable file
View File

@@ -23,6 +23,7 @@ void BindRuntime(pybind11::module& m) {
.def("use_gpu", &RuntimeOption::UseGpu) .def("use_gpu", &RuntimeOption::UseGpu)
.def("use_cpu", &RuntimeOption::UseCpu) .def("use_cpu", &RuntimeOption::UseCpu)
.def("use_rknpu2", &RuntimeOption::UseRKNPU2) .def("use_rknpu2", &RuntimeOption::UseRKNPU2)
.def("use_xpu", &RuntimeOption::UseXpu)
.def("set_external_stream", &RuntimeOption::SetExternalStream) .def("set_external_stream", &RuntimeOption::SetExternalStream)
.def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum) .def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum)
.def("use_paddle_backend", &RuntimeOption::UsePaddleBackend) .def("use_paddle_backend", &RuntimeOption::UsePaddleBackend)
@@ -100,7 +101,21 @@ void BindRuntime(pybind11::module& m) {
.def_readwrite("ipu_available_memory_proportion", .def_readwrite("ipu_available_memory_proportion",
&RuntimeOption::ipu_available_memory_proportion) &RuntimeOption::ipu_available_memory_proportion)
.def_readwrite("ipu_enable_half_partial", .def_readwrite("ipu_enable_half_partial",
&RuntimeOption::ipu_enable_half_partial); &RuntimeOption::ipu_enable_half_partial)
.def_readwrite("xpu_l3_workspace_size",
&RuntimeOption::xpu_l3_workspace_size)
.def_readwrite("xpu_locked",
&RuntimeOption::xpu_locked)
.def_readwrite("xpu_autotune",
&RuntimeOption::xpu_autotune)
.def_readwrite("xpu_autotune_file",
&RuntimeOption::xpu_autotune_file)
.def_readwrite("xpu_precision",
&RuntimeOption::xpu_precision)
.def_readwrite("xpu_adaptive_seqlen",
&RuntimeOption::xpu_adaptive_seqlen)
.def_readwrite("xpu_enable_multi_stream",
&RuntimeOption::xpu_enable_multi_stream);
pybind11::class_<TensorInfo>(m, "TensorInfo") pybind11::class_<TensorInfo>(m, "TensorInfo")
.def_readwrite("name", &TensorInfo::name) .def_readwrite("name", &TensorInfo::name)

35
fastdeploy/runtime.cc Normal file → Executable file
View File

@@ -236,7 +236,26 @@ void RuntimeOption::UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name,
void RuntimeOption::UseTimVX() { void RuntimeOption::UseTimVX() {
enable_timvx = true; enable_timvx = true;
device = Device::TIMVX; device = Device::TIMVX;
UseLiteBackend(); }
void RuntimeOption::UseXpu(int xpu_id,
int l3_workspace_size,
bool locked,
bool autotune,
const std::string &autotune_file,
const std::string &precision,
bool adaptive_seqlen,
bool enable_multi_stream) {
enable_xpu = true;
device_id = xpu_id;
xpu_l3_workspace_size = l3_workspace_size;
xpu_locked=locked;
xpu_autotune=autotune;
xpu_autotune_file=autotune_file;
xpu_precision = precision;
xpu_adaptive_seqlen=adaptive_seqlen;
xpu_enable_multi_stream=enable_multi_stream;
device = Device::XPU;
} }
void RuntimeOption::SetExternalStream(void* external_stream) { void RuntimeOption::SetExternalStream(void* external_stream) {
@@ -532,8 +551,8 @@ bool Runtime::Init(const RuntimeOption& _option) {
FDINFO << "Runtime initialized with Backend::OPENVINO in " FDINFO << "Runtime initialized with Backend::OPENVINO in "
<< Str(option.device) << "." << std::endl; << Str(option.device) << "." << std::endl;
} else if (option.backend == Backend::LITE) { } else if (option.backend == Backend::LITE) {
FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX, FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX || option.device == Device::XPU,
"Backend::LITE only supports Device::CPU/Device::TIMVX."); "Backend::LITE only supports Device::CPU/Device::TIMVX/Device::XPU.");
CreateLiteBackend(); CreateLiteBackend();
FDINFO << "Runtime initialized with Backend::LITE in " << Str(option.device) FDINFO << "Runtime initialized with Backend::LITE in " << Str(option.device)
<< "." << std::endl; << "." << std::endl;
@@ -784,6 +803,16 @@ void Runtime::CreateLiteBackend() {
lite_option.nnadapter_subgraph_partition_config_path = lite_option.nnadapter_subgraph_partition_config_path =
option.lite_nnadapter_subgraph_partition_config_path; option.lite_nnadapter_subgraph_partition_config_path;
lite_option.enable_timvx = option.enable_timvx; lite_option.enable_timvx = option.enable_timvx;
lite_option.enable_xpu = option.enable_xpu;
lite_option.device_id = option.device_id;
lite_option.xpu_l3_workspace_size = option.xpu_l3_workspace_size;
lite_option.xpu_locked = option.xpu_locked;
lite_option.xpu_autotune = option.xpu_autotune;
lite_option.xpu_autotune_file = option.xpu_autotune_file;
lite_option.xpu_precision = option.xpu_precision;
lite_option.xpu_adaptive_seqlen = option.xpu_adaptive_seqlen;
lite_option.xpu_enable_multi_stream = option.xpu_enable_multi_stream;
FDASSERT(option.model_format == ModelFormat::PADDLE, FDASSERT(option.model_format == ModelFormat::PADDLE,
"LiteBackend only support model format of ModelFormat::PADDLE"); "LiteBackend only support model format of ModelFormat::PADDLE");
backend_ = utils::make_unique<LiteBackend>(); backend_ = utils::make_unique<LiteBackend>();

View File

@@ -102,6 +102,37 @@ struct FASTDEPLOY_DECL RuntimeOption {
/// Use TimVX to inference /// Use TimVX to inference
void UseTimVX(); void UseTimVX();
///
/// \brief Turn on XPU.
///
/// \param xpu_id the XPU card to use (default is 0).
/// \param l3_workspace_size The size of the video memory allocated by the l3
/// cache, the maximum is 16M.
/// \param locked Whether the allocated L3 cache can be locked. If false,
/// it means that the L3 cache is not locked, and the allocated L3
/// cache can be shared by multiple models, and multiple models
/// sharing the L3 cache will be executed sequentially on the card.
/// \param autotune Whether to autotune the conv operator in the model. If
/// true, when the conv operator of a certain dimension is executed
/// for the first time, it will automatically search for a better
/// algorithm to improve the performance of subsequent conv operators
/// of the same dimension.
/// \param autotune_file Specify the path of the autotune file. If
/// autotune_file is specified, the algorithm specified in the
/// file will be used and autotune will not be performed again.
/// \param precision Calculation accuracy of multi_encoder
/// \param adaptive_seqlen Is the input of multi_encoder variable length
/// \param enable_multi_stream Whether to enable the multi stream of xpu.
///
void UseXpu(int xpu_id = 0,
int l3_workspace_size = 0xfffc00,
bool locked = false,
bool autotune = true,
const std::string& autotune_file = "",
const std::string& precision = "int16",
bool adaptive_seqlen = false,
bool enable_multi_stream = false);
void SetExternalStream(void* external_stream); void SetExternalStream(void* external_stream);
/* /*
@@ -354,6 +385,7 @@ struct FASTDEPLOY_DECL RuntimeOption {
std::string lite_optimized_model_dir = ""; std::string lite_optimized_model_dir = "";
std::string lite_nnadapter_subgraph_partition_config_path = ""; std::string lite_nnadapter_subgraph_partition_config_path = "";
bool enable_timvx = false; bool enable_timvx = false;
bool enable_xpu = false;
// ======Only for Trt Backend======= // ======Only for Trt Backend=======
std::map<std::string, std::vector<int32_t>> trt_max_shape; std::map<std::string, std::vector<int32_t>> trt_max_shape;
@@ -386,6 +418,15 @@ struct FASTDEPLOY_DECL RuntimeOption {
fastdeploy::rknpu2::CoreMask rknpu2_core_mask_ = fastdeploy::rknpu2::CoreMask rknpu2_core_mask_ =
fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO; fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO;
// ======Only for XPU Backend=======
int xpu_l3_workspace_size = 0xfffc00;
bool xpu_locked = false;
bool xpu_autotune = true;
std::string xpu_autotune_file = "";
std::string xpu_precision = "int16";
bool xpu_adaptive_seqlen = false;
bool xpu_enable_multi_stream = false;
std::string model_file = ""; // Path of model file std::string model_file = ""; // Path of model file
std::string params_file = ""; // Path of parameters file, can be empty std::string params_file = ""; // Path of parameters file, can be empty
// format of input model // format of input model

View File

@@ -29,6 +29,7 @@ PaddleClasModel::PaddleClasModel(const std::string& model_file,
Backend::LITE}; Backend::LITE};
valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT}; valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
valid_timvx_backends = {Backend::LITE}; valid_timvx_backends = {Backend::LITE};
valid_xpu_backends = {Backend::LITE};
valid_ipu_backends = {Backend::PDINFER}; valid_ipu_backends = {Backend::PDINFER};
} else if (model_format == ModelFormat::ONNX) { } else if (model_format == ModelFormat::ONNX) {
valid_cpu_backends = {Backend::ORT, Backend::OPENVINO}; valid_cpu_backends = {Backend::ORT, Backend::OPENVINO};

View File

@@ -72,10 +72,11 @@ class FASTDEPLOY_DECL RKYOLOPostprocessor {
} }
// Set Anchor // Set Anchor
void SetAnchor(std::vector<int> anchors,int anchor_per_branch){ void SetAnchor(std::vector<int> anchors, int anchor_per_branch) {
anchors_ = anchors; anchors_ = anchors;
anchor_per_branch_ = anchor_per_branch; anchor_per_branch_ = anchor_per_branch;
}; }
private: private:
std::vector<int> anchors_ = {10, 13, 16, 30, 33, 23, 30, 61, 62, std::vector<int> anchors_ = {10, 13, 16, 30, 33, 23, 30, 61, 62,
45, 59, 119, 116, 90, 156, 198, 373, 326}; 45, 59, 119, 116, 90, 156, 198, 373, 326};

View File

@@ -31,4 +31,4 @@ int NMS(int valid_count, std::vector<float>& output_locations,
} // namespace detection } // namespace detection
} // namespace vision } // namespace vision
} // namespace fastdeploy } // namespace fastdeploy

View File

@@ -27,6 +27,7 @@ YOLOv5::YOLOv5(const std::string& model_file, const std::string& params_file,
} else { } else {
valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE}; valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
valid_xpu_backends = {Backend::LITE};
valid_timvx_backends = {Backend::LITE}; valid_timvx_backends = {Backend::LITE};
} }
runtime_option = custom_option; runtime_option = custom_option;

View File

@@ -185,7 +185,7 @@ class FASTDEPLOY_DECL PaddleYOLOv5 : public PPDetBase {
const ModelFormat& model_format = ModelFormat::PADDLE) const ModelFormat& model_format = ModelFormat::PADDLE)
: PPDetBase(model_file, params_file, config_file, custom_option, : PPDetBase(model_file, params_file, config_file, custom_option,
model_format) { model_format) {
valid_cpu_backends = {Backend::ORT,Backend::PDINFER}; valid_cpu_backends = {Backend::ORT, Backend::PDINFER};
valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT}; valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
initialized = Initialize(); initialized = Initialize();
} }
@@ -201,7 +201,7 @@ class FASTDEPLOY_DECL PaddleYOLOv6 : public PPDetBase {
const ModelFormat& model_format = ModelFormat::PADDLE) const ModelFormat& model_format = ModelFormat::PADDLE)
: PPDetBase(model_file, params_file, config_file, custom_option, : PPDetBase(model_file, params_file, config_file, custom_option,
model_format) { model_format) {
valid_cpu_backends = {Backend::OPENVINO, Backend::ORT,Backend::PDINFER}; valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER};
valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT}; valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
initialized = Initialize(); initialized = Initialize();
} }
@@ -217,7 +217,7 @@ class FASTDEPLOY_DECL PaddleYOLOv7 : public PPDetBase {
const ModelFormat& model_format = ModelFormat::PADDLE) const ModelFormat& model_format = ModelFormat::PADDLE)
: PPDetBase(model_file, params_file, config_file, custom_option, : PPDetBase(model_file, params_file, config_file, custom_option,
model_format) { model_format) {
valid_cpu_backends = {Backend::ORT,Backend::PDINFER}; valid_cpu_backends = {Backend::ORT, Backend::PDINFER};
valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT}; valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
initialized = Initialize(); initialized = Initialize();
} }

View File

@@ -245,6 +245,34 @@ class RuntimeOption:
return return
return self._option.use_gpu(device_id) return self._option.use_gpu(device_id)
def use_xpu(self,
device_id=0,
l3_workspace_size=16 * 1024 * 1024,
locked=False,
autotune=True,
autotune_file="",
precision="int16",
adaptive_seqlen=False,
enable_multi_stream=False):
"""Inference with XPU
:param device_id: (int)The index of XPU will be used for inference, default 0
:param l3_workspace_size: (int)The size of the video memory allocated by the l3 cache, the maximum is 16M, default 16M
:param locked: (bool)Whether the allocated L3 cache can be locked. If false, it means that the L3 cache is not locked,
and the allocated L3 cache can be shared by multiple models, and multiple models
:param autotune: (bool)Whether to autotune the conv operator in the model.
If true, when the conv operator of a certain dimension is executed for the first time,
it will automatically search for a better algorithm to improve the performance of subsequent conv operators of the same dimension.
:param autotune_file: (str)Specify the path of the autotune file. If autotune_file is specified,
the algorithm specified in the file will be used and autotune will not be performed again.
:param precision: (str)Calculation accuracy of multi_encoder
:param adaptive_seqlen: (bool)adaptive_seqlen Is the input of multi_encoder variable length
:param enable_multi_stream: (bool)Whether to enable the multi stream of xpu.
"""
return self._option.use_xpu(device_id, l3_workspace_size, locked,
autotune, autotune_file, precision,
adaptive_seqlen, enable_multi_stream)
def use_cpu(self): def use_cpu(self):
"""Inference with CPU """Inference with CPU
""" """

View File

@@ -65,11 +65,13 @@ setup_configs["ENABLE_POROS_BACKEND"] = os.getenv("ENABLE_POROS_BACKEND",
"OFF") "OFF")
setup_configs["ENABLE_TRT_BACKEND"] = os.getenv("ENABLE_TRT_BACKEND", "OFF") setup_configs["ENABLE_TRT_BACKEND"] = os.getenv("ENABLE_TRT_BACKEND", "OFF")
setup_configs["ENABLE_LITE_BACKEND"] = os.getenv("ENABLE_LITE_BACKEND", "OFF") setup_configs["ENABLE_LITE_BACKEND"] = os.getenv("ENABLE_LITE_BACKEND", "OFF")
setup_configs["PADDLELITE_URL"] = os.getenv("PADDLELITE_URL", "OFF")
setup_configs["ENABLE_VISION"] = os.getenv("ENABLE_VISION", "OFF") setup_configs["ENABLE_VISION"] = os.getenv("ENABLE_VISION", "OFF")
setup_configs["ENABLE_FLYCV"] = os.getenv("ENABLE_FLYCV", "OFF") setup_configs["ENABLE_FLYCV"] = os.getenv("ENABLE_FLYCV", "OFF")
setup_configs["ENABLE_TEXT"] = os.getenv("ENABLE_TEXT", "OFF") setup_configs["ENABLE_TEXT"] = os.getenv("ENABLE_TEXT", "OFF")
setup_configs["WITH_GPU"] = os.getenv("WITH_GPU", "OFF") setup_configs["WITH_GPU"] = os.getenv("WITH_GPU", "OFF")
setup_configs["WITH_IPU"] = os.getenv("WITH_IPU", "OFF") setup_configs["WITH_IPU"] = os.getenv("WITH_IPU", "OFF")
setup_configs["WITH_XPU"] = os.getenv("WITH_XPU", "OFF")
setup_configs["BUILD_ON_JETSON"] = os.getenv("BUILD_ON_JETSON", "OFF") setup_configs["BUILD_ON_JETSON"] = os.getenv("BUILD_ON_JETSON", "OFF")
setup_configs["TRT_DIRECTORY"] = os.getenv("TRT_DIRECTORY", "UNDEFINED") setup_configs["TRT_DIRECTORY"] = os.getenv("TRT_DIRECTORY", "UNDEFINED")
setup_configs["CUDA_DIRECTORY"] = os.getenv("CUDA_DIRECTORY", setup_configs["CUDA_DIRECTORY"] = os.getenv("CUDA_DIRECTORY",
@@ -78,10 +80,12 @@ setup_configs["LIBRARY_NAME"] = PACKAGE_NAME
setup_configs["PY_LIBRARY_NAME"] = PACKAGE_NAME + "_main" setup_configs["PY_LIBRARY_NAME"] = PACKAGE_NAME + "_main"
setup_configs["OPENCV_DIRECTORY"] = os.getenv("OPENCV_DIRECTORY", "") setup_configs["OPENCV_DIRECTORY"] = os.getenv("OPENCV_DIRECTORY", "")
setup_configs["ORT_DIRECTORY"] = os.getenv("ORT_DIRECTORY", "") setup_configs["ORT_DIRECTORY"] = os.getenv("ORT_DIRECTORY", "")
setup_configs["PADDLEINFERENCE_DIRECTORY"] = os.getenv("PADDLEINFERENCE_DIRECTORY", "") setup_configs["PADDLEINFERENCE_DIRECTORY"] = os.getenv(
"PADDLEINFERENCE_DIRECTORY", "")
setup_configs["RKNN2_TARGET_SOC"] = os.getenv("RKNN2_TARGET_SOC", "") setup_configs["RKNN2_TARGET_SOC"] = os.getenv("RKNN2_TARGET_SOC", "")
if setup_configs["RKNN2_TARGET_SOC"] != "" or setup_configs["BUILD_ON_JETSON"] != "OFF": if setup_configs["RKNN2_TARGET_SOC"] != "" or setup_configs[
"BUILD_ON_JETSON"] != "OFF":
REQUIRED_PACKAGES = REQUIRED_PACKAGES.replace("opencv-python", "") REQUIRED_PACKAGES = REQUIRED_PACKAGES.replace("opencv-python", "")
if setup_configs["WITH_GPU"] == "ON" or setup_configs[ if setup_configs["WITH_GPU"] == "ON" or setup_configs[