[Backend] Add RKNPU2 backend support (#456)

* 10-29/14:05
* 新增cmake
* 新增rknpu2 backend

* 10-29/14:43
* Runtime fd_type新增RKNPU代码

* 10-29/15:02
* 新增ppseg RKNPU2推理代码

* 10-29/15:46
* 新增ppseg RKNPU2 cpp example代码

* 10-29/15:51
* 新增README文档

* 10-29/15:51
* 按照要求修改部分注释以及变量名称

* 10-29/15:51
* 修复重命名之后,cc文件中的部分代码还用旧函数名的bug

* 10-29/22:32
* str(Device::NPU)将输出NPU而不是UNKOWN
* 修改runtime文件中的注释格式
* 新增Building Summary ENABLE_RKNPU2_BACKEND输出
* pybind新增支持rknpu2
* 新增python编译选项
* 新增PPSeg Python代码
* 新增以及更新各种文档

* 10-30/14:11
* 尝试修复编译cuda时产生的错误

* 10-30/19:27
* 修改CpuName和CoreMask层级
* 修改ppseg rknn推理层级
* 图片将移动到网络进行下载

* 10-30/19:39
* 更新文档

* 10-30/19:39
* 更新文档
* 更新ppseg rknpu2 example中的函数命名方式
* 更新ppseg rknpu2 example为一个cc文件
* 修复disable_normalize_and_permute部分的逻辑错误
* 移除rknpu2初始化时的无用参数

* 10-30/19:39
* 尝试重置python代码

* 10-30/10:16
* rknpu2_config.h文件不再包含rknn_api头文件防止出现导入错误的问题

* 10-31/14:31
* 修改pybind,支持最新的rknpu2 backends
* 再次支持ppseg python推理
* 移动cpuname 和 coremask的层级

* 10-31/15:35
* 尝试修复rknpu2导入错误

* 10-31/19:00
* 新增RKNPU2模型导出代码以及其对应的文档
* 更新大量文档错误

* 10-31/19:00
* 现在编译完fastdeploy仓库后无需重新设置RKNN2_TARGET_SOC

* 10-31/19:26
* 修改部分错误文档

* 10-31/19:26
* 修复错误删除的部分
* 修复各种错误文档
* 修复FastDeploy.cmake在设置RKNN2_TARGET_SOC错误时,提示错误的信息
* 修复rknpu2_backend.cc中存在的中文注释

* 10-31/20:45
* 删除无用的注释

* 10-31/20:45
* 按照要求修改Device::NPU为Device::RKNPU,硬件将共用valid_hardware_backends
* 删除无用注释以及debug代码

* 11-01/09:45
* 更新变量命名方式

* 11-01/10:16
* 修改部分文档,修改函数命名方式

Co-authored-by: Jason <jiangjiajun@baidu.com>
This commit is contained in:
Zheng_Bicheng
2022-11-01 11:14:05 +08:00
committed by GitHub
parent bb00e0757e
commit 4ffcfbe726
37 changed files with 1567 additions and 74 deletions

View File

@@ -58,6 +58,7 @@ option(ENABLE_TRT_BACKEND "Whether to enable tensorrt backend." OFF)
option(ENABLE_PADDLE_BACKEND "Whether to enable paddle backend." OFF)
option(ENABLE_POROS_BACKEND "Whether to enable poros backend." OFF)
option(ENABLE_OPENVINO_BACKEND "Whether to enable openvino backend." OFF)
option(ENABLE_RKNPU2_BACKEND "Whether to enable RKNPU2 backend." OFF)
option(ENABLE_LITE_BACKEND "Whether to enable paddle lite backend." OFF)
option(ENABLE_VISION "Whether to enable vision models usage." OFF)
option(ENABLE_TEXT "Whether to enable text models usage." OFF)
@@ -164,13 +165,14 @@ file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fas
file(GLOB_RECURSE DEPLOY_POROS_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/poros/*.cc)
file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cpp)
file(GLOB_RECURSE DEPLOY_OPENVINO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/openvino/*.cc)
file(GLOB_RECURSE DEPLOY_RKNPU2_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/rknpu/rknpu2/*.cc)
file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/lite/*.cc)
file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cc)
file(GLOB_RECURSE DEPLOY_PIPELINE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pipeline/*.cc)
file(GLOB_RECURSE DEPLOY_VISION_CUDA_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cu)
file(GLOB_RECURSE DEPLOY_TEXT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/text/*.cc)
file(GLOB_RECURSE DEPLOY_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*_pybind.cc)
list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} ${DEPLOY_POROS_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_OPENVINO_SRCS} ${DEPLOY_LITE_SRCS} ${DEPLOY_VISION_SRCS} ${DEPLOY_TEXT_SRCS} ${DEPLOY_PIPELINE_SRCS})
list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} ${DEPLOY_POROS_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_OPENVINO_SRCS} ${DEPLOY_LITE_SRCS} ${DEPLOY_VISION_SRCS} ${DEPLOY_TEXT_SRCS} ${DEPLOY_PIPELINE_SRCS} ${DEPLOY_RKNPU2_SRCS})
set(DEPEND_LIBS "")
@@ -227,6 +229,13 @@ if(ENABLE_OPENVINO_BACKEND)
include(${PROJECT_SOURCE_DIR}/cmake/openvino.cmake)
endif()
if(ENABLE_RKNPU2_BACKEND)
add_definitions(-DENABLE_RKNPU2_BACKEND)
list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_RKNPU2_SRCS})
include(${PROJECT_SOURCE_DIR}/cmake/rknpu2.cmake)
list(APPEND DEPEND_LIBS ${RKNN_RT_LIB})
endif()
if(ENABLE_POROS_BACKEND)
set(CMAKE_CXX_STANDARD 14)
add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)

View File

@@ -2,6 +2,7 @@ CMAKE_MINIMUM_REQUIRED(VERSION 3.8)
set(WITH_GPU @WITH_GPU@)
set(ENABLE_ORT_BACKEND @ENABLE_ORT_BACKEND@)
set(ENABLE_RKNPU2_BACKEND @ENABLE_RKNPU2_BACKEND@)
set(ENABLE_LITE_BACKEND @ENABLE_LITE_BACKEND@)
set(ENABLE_PADDLE_BACKEND @ENABLE_PADDLE_BACKEND@)
set(ENABLE_OPENVINO_BACKEND @ENABLE_OPENVINO_BACKEND@)
@@ -27,6 +28,7 @@ set(LIBRARY_NAME @LIBRARY_NAME@)
set(OPENCV_DIRECTORY "@OPENCV_DIRECTORY@")
set(ORT_DIRECTORY "@ORT_DIRECTORY@")
set(OPENVINO_DIRECTORY "@OPENVINO_DIRECTORY@")
set(RKNN2_TARGET_SOC "@RKNN2_TARGET_SOC@")
set(FASTDEPLOY_LIBS "")
set(FASTDEPLOY_INCS "")
@@ -88,6 +90,18 @@ if(ENABLE_OPENVINO_BACKEND)
list(APPEND FASTDEPLOY_LIBS ${OPENVINO_LIBS})
endif()
if(ENABLE_RKNPU2_BACKEND)
if(RKNN2_TARGET_SOC STREQUAL "RK356X")
set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/RK356X/lib/librknn_api.so)
elseif (RKNN2_TARGET_SOC STREQUAL "RK3588")
set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/RK3588/lib/librknn_api.so)
else ()
message(FATAL_ERROR "RKNN2_TARGET_SOC is not set, ref value: RK356X or RK3588")
endif()
message(STATUS "The path of RKNPU2 is ${RKNPU2_LIB}.")
list(APPEND FASTDEPLOY_LIBS ${RKNPU2_LIB})
endif()
if(ENABLE_LITE_BACKEND)
set(LITE_DIR ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/${PADDLELITE_FILENAME})
if(ANDROID)
@@ -234,6 +248,7 @@ message(STATUS " C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}")
message(STATUS " CXX flags : ${CMAKE_CXX_FLAGS}")
message(STATUS " WITH_GPU : ${WITH_GPU}")
message(STATUS " ENABLE_ORT_BACKEND : ${ENABLE_ORT_BACKEND}")
message(STATUS " ENABLE_RKNPU2_BACKEND : ${ENABLE_RKNPU2_BACKEND}")
message(STATUS " ENABLE_PADDLE_BACKEND : ${ENABLE_PADDLE_BACKEND}")
message(STATUS " ENABLE_POROS_BACKEND : ${ENABLE_POROS_BACKEND}")
message(STATUS " ENABLE_OPENVINO_BACKEND : ${ENABLE_OPENVINO_BACKEND}")

26
cmake/rknpu2.cmake Normal file
View File

@@ -0,0 +1,26 @@
# get RKNPU2_URL
set(RKNPU2_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
set(RKNPU2_VERSION "1.4.0")
set(RKNPU2_FILE "rknpu2_runtime-linux-x64-${RKNPU2_VERSION}.tgz")
set(RKNPU2_URL "${RKNPU2_URL_BASE}${RKNPU2_FILE}")
# download_and_decompress
download_and_decompress(${RKNPU2_URL} ${CMAKE_CURRENT_BINARY_DIR}/${RKNPU2_FILE} ${THIRD_PARTY_PATH}/install/)
# set path
set(RKNPU_RUNTIME_PATH ${THIRD_PARTY_PATH}/install/rknpu2_runtime)
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
else ()
message(FATAL_ERROR "[rknpu2.cmake] Only support build rknpu2 in Linux")
endif ()
if (EXISTS ${RKNPU_RUNTIME_PATH})
set(RKNN_RT_LIB ${RKNPU_RUNTIME_PATH}/${RKNN2_TARGET_SOC}/lib/librknnrt.so)
include_directories(${RKNPU_RUNTIME_PATH}/${RKNN2_TARGET_SOC}/include)
else ()
message(FATAL_ERROR "[rknpu2.cmake] download_and_decompress rknpu2_runtime error")
endif ()

View File

@@ -31,6 +31,7 @@ function(fastdeploy_summary)
message(STATUS " FastDeploy version : ${FASTDEPLOY_VERSION}")
message(STATUS " Paddle2ONNX version : ${PADDLE2ONNX_VERSION}")
message(STATUS " ENABLE_ORT_BACKEND : ${ENABLE_ORT_BACKEND}")
message(STATUS " ENABLE_RKNPU2_BACKEND : ${ENABLE_RKNPU2_BACKEND}")
message(STATUS " ENABLE_PADDLE_BACKEND : ${ENABLE_PADDLE_BACKEND}")
message(STATUS " ENABLE_POROS_BACKEND : ${ENABLE_POROS_BACKEND}")
message(STATUS " ENABLE_TRT_BACKEND : ${ENABLE_TRT_BACKEND}")

View File

@@ -10,15 +10,17 @@
## FastDeploy编译选项说明
| 选项 | 说明 |
| :--- | :---- |
|:------------------------|:--------------------------------------------------------------------------|
| ENABLE_ORT_BACKEND | 默认OFF, 是否编译集成ONNX Runtime后端(CPU/GPU上推荐打开) |
| ENABLE_PADDLE_BACKEND | 默认OFF是否编译集成Paddle Inference后端(CPU/GPU上推荐打开) |
| ENABLE_LITE_BACKEND | 默认OFF是否编译集成Paddle Lite后端(编译Android库时需要设置为ON) |
| ENABLE_RKNPU2_BACKEND | 默认OFF是否编译集成RKNPU2后端(RK3588/RK3568/RK3566上推荐打开) |
| ENABLE_TRT_BACKEND | 默认OFF是否编译集成TensorRT后端(GPU上推荐打开) |
| ENABLE_OPENVINO_BACKEND | 默认OFF是否编译集成OpenVINO后端(CPU上推荐打开) |
| ENABLE_VISION | 默认OFF是否编译集成视觉模型的部署模块 |
| ENABLE_TEXT | 默认OFF是否编译集成文本NLP模型的部署模块 |
| WITH_GPU | 默认OFF, 当需要在GPU上部署时需设置为ON |
| RKNN2_TARGET_SOC | ENABLE_RKNPU2_BACKEND时才需要使用这个编译选项。无默认值, 可输入值为RK3588/RK356X, 必须填入,否则 将编译失败 |
| CUDA_DIRECTORY | 默认/usr/local/cuda, 当需要在GPU上部署时用于指定CUDA(>=11.2)的路径 |
| TRT_DIRECTORY | 当开启TensorRT后端时必须通过此开关指定TensorRT(>=8.4)的路径 |
| ORT_DIRECTORY | 当开启ONNX Runtime后端时用于指定用户本地的ONNX Runtime库路径如果不指定编译过程会自动下载ONNX Runtime库 |

View File

@@ -0,0 +1,102 @@
# RK2代NPU部署库编译
## 写在前面
FastDeploy已经初步支持RKNPU2的部署。使用的过程中如果出现Bug请提Issues反馈。
## 简介
FastDeploy当前在RK平台上支持后端引擎如下:
| 后端 | 平台 | 支持模型格式 | 说明 |
|:------------------|:---------------------|:-------|:-------------------------------------------|
| ONNX&nbsp;Runtime | RK356X <br> RK3588 | ONNX | 编译开关`ENABLE_ORT_BACKEND`为ON或OFF控制默认OFF |
| RKNPU2 | RK356X <br> RK3588 | RKNN | 编译开关`ENABLE_RKNPU2_BACKEND`为ON或OFF控制默认OFF |
## C++ SDK编译安装
RKNPU2仅支持linux下进行编译,以下教程均在linux环境下完成。
### 更新驱动和安装编译时需要的环境
在运行代码之前我们需要安装以下最新的RKNPU驱动目前驱动更新至1.4.0。为了简化安装我编写了快速安装脚本,一键即可进行安装。
**方法1: 通过脚本安装**
```bash
# 下载解压rknpu2_device_install_1.4.0
wget https://bj.bcebos.com/fastdeploy/third_libs/rknpu2_device_install_1.4.0.zip
unzip rknpu2_device_install_1.4.0.zip
cd rknpu2_device_install_1.4.0
# RK3588运行以下代码
sudo rknn_install_rk3588.sh
# RK356X运行以下代码
sudo rknn_install_rk356X.sh
```
**方法2: 通过gitee安装**
```bash
# 安装必备的包
sudo apt update -y
sudo apt install -y python3
sudo apt install -y python3-dev
sudo apt install -y python3-pip
sudo apt install -y gcc
sudo apt install -y python3-opencv
sudo apt install -y python3-numpy
sudo apt install -y cmake
# 下载rknpu2
# RK3588运行以下代码
git clone https://gitee.com/mirrors_rockchip-linux/rknpu2.git
sudo cp ./rknpu2/runtime/RK3588/Linux/librknn_api/aarch64/* /usr/lib
sudo cp ./rknpu2/runtime/RK3588/Linux/rknn_server/aarch64/usr/bin/* /usr/bin/
# RK356X运行以下代码
git clone https://gitee.com/mirrors_rockchip-linux/rknpu2.git
sudo cp ./rknpu2/runtime/RK356X/Linux/librknn_api/aarch64/* /usr/lib
sudo cp ./rknpu2/runtime/RK356X/Linux/rknn_server/aarch64/usr/bin/* /usr/bin/
```
### 编译C++ SDK
```bash
git clone https://github.com/PaddlePaddle/FastDeploy.git
cd FastDeploy
mkdir build && cd build
# 编译配置详情见README文件这里只介绍关键的几个配置
# -DENABLE_ORT_BACKEND: 是否开启ONNX模型默认关闭
# -DENABLE_RKNPU2_BACKEND: 是否开启RKNPU模型默认关闭
# -RKNN2_TARGET_SOC: 编译SDK的板子型号只能输入RK356X或者RK3588注意区分大小写
cmake .. -DENABLE_ORT_BACKEND=ON \
-DENABLE_RKNPU2_BACKEND=ON \
-DENABLE_VISION=ON \
-DRKNN2_TARGET_SOC=RK3588 \
-DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3
make -j8
make install
```
### 编译Python SDK
```bash
git clone https://github.com/PaddlePaddle/FastDeploy.git
cd FastDeploy
cd python
export ENABLE_ORT_BACKEND=ON
export ENABLE_RKNPU2_BACKEND=ON
export ENABLE_VISION=ON
export RKNN2_TARGET_SOC=RK3588
python3 setup.py build
python3 setup.py bdist_wheel
cd dist
pip3 install fastdeploy_python-0.0.0-cp39-cp39-linux_aarch64.whl
```
## 部署模型
请查看[RKNPU2部署模型教程](../faq/rknpu2/rknpu2.md)

View File

@@ -0,0 +1,48 @@
# 导出模型指南
## 简介
Fastdeploy已经简单的集成了onnx->rknn的转换过程。本教程使用tools/export.py文件导出模型在导出之前需要编写yaml配置文件。
在进行转换前请根据[rknn_toolkit2安装文档](./install_rknn_toolkit2.md)检查环境是否已经安装成功。
## export.py 配置参数介绍
| 参数名称 | 是否可以为空 | 参数作用 |
|-----------------|------------|--------------------|
| verbose | 是默认值为True | 是否在屏幕上输出转换模型时的具体信息 |
| config_path | 否 | 配置文件路径 |
## config 配置文件介绍
### config yaml文件模版
```yaml
model_path: ./portrait_pp_humansegv2_lite_256x144_pretrained.onnx
output_folder: ./
target_platform: RK3588
normalize:
mean: [0.5,0.5,0.5]
std: [0.5,0.5,0.5]
outputs: None
```
### config 配置参数介绍
* model_path: 模型储存路径
* output_folder: 模型储存文件夹名字
* target_platform: 模型跑在哪一个设备上只能为RK3588或RK3568
* normalize: 配置在NPU上的normalize操作有std和mean两个参数
* std: 如果在外部做normalize操作请配置为[1/255,1/255,1/255]
* mean: 如果在外部做normalize操作请配置为[0,0,0]
* outputs: 输出节点列表如果使用默认输出节点请配置为None
## 如何转换模型
根目录下执行以下代码
```bash
python tools/export.py --config_path=./config.yaml
```
## 模型导出要注意的事项
* 请不要导出带softmax和argmax的模型这两个算子存在bug请在外部进行运算

View File

@@ -0,0 +1,49 @@
# 安装rknn_toolkit2仓库
## 下载rknn_toolkit2
rknn_toolkit2的下载一般有两种方式以下将一一介绍:
* github仓库下载
github仓库中提供了稳定版本的rknn_toolkit2下载
```bash
git clone https://github.com/rockchip-linux/rknn-toolkit2.git
```
* 百度网盘下载
在有些时候如果稳定版本的rknn_toolkit2存在bug不满足模型部署的要求我们也可以使用百度网盘下载beta版本的rknn_toolkit2使用。其安装方式与
稳定版本一致
```text
链接https://eyun.baidu.com/s/3eTDMk6Y 密码rknn
```
## 安装rknn_toolkit2
安装rknn_toolkit2中会存在依赖问题这里介绍以下如何安装。首先因为rknn_toolkit2依赖一些特定的包因此建议使用conda新建一个虚拟环境进行安装。
安装conda的方法百度有很多这里跳过直接介绍如何安装rknn_toolkit2。
### 下载安装需要的软件包
```bash
sudo apt-get install libxslt1-dev zlib1g zlib1g-dev libglib2.0-0 \
libsm6 libgl1-mesa-glx libprotobuf-dev gcc g++
```
### 安装rknn_toolkit2环境
```bash
# 创建虚拟环境
conda create -n rknn2 python=3.6
conda activate rknn2
# rknn_toolkit2对numpy存在特定依赖,因此需要先安装numpy==1.16.6
pip install numpy==1.16.6
# 安装rknn_toolkit2-1.3.0_11912b58-cp38-cp38-linux_x86_64.whl
cd ~/下载/rknn-toolkit2-master/packages
pip install rknn_toolkit2-1.3.0_11912b58-cp38-cp38-linux_x86_64.whl
```
## 其他文档
- [onnx转换rknn文档](./export.md)

View File

@@ -0,0 +1,64 @@
# RKNPU2模型部署
## ONNX模型转换为RKNN模型
ONNX模型不能直接调用RK芯片中的NPU进行运算需要把ONNX模型转换为RKNN模型具体流程请查看[转换文档](./export.md)
## RKNPU2已经支持的模型列表
| 任务场景 | 模型 | 模型版本(表示已经测试的版本) | 大小 | ONNX/RKNN是否支持 | ONNX/RKNN速度(ms) |
|------------------|-------------------|--------------------------------------------------------------------------------------------------------------------------------------------|-----|---------------|-----------------|
| Detection | Picodet | [Picodet-s-npu](https://bj.bcebos.com/fastdeploy/models/rknn2/picodet_s_416_coco_npu_3588.tgz) | - | True/True | 454/177 |
| Segmentation | PP-LiteSeg | [PP_LiteSeg_T_STDC1_cityscapes](https://bj.bcebos.com/fastdeploy/models/rknn2/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_3588.tgz) | - | True/True | 6634/5598 |
| Segmentation | PP-HumanSegV2Lite | [portrait](https://bj.bcebos.com/fastdeploy/models/rknn2/portrait_pp_humansegv2_lite_256x144_inference_model_without_softmax_3588.tgz) | - | True/True | 456/266 |
| Segmentation | PP-HumanSegV2Lite | [human](https://bj.bcebos.com/fastdeploy/models/rknn2/human_pp_humansegv2_lite_192x192_pretrained_3588.tgz) | - | True/True | 496/256 |
| Face Detection | SCRFD | [SCRFD-2.5G-kps-640](https://bj.bcebos.com/fastdeploy/models/rknn2/scrfd_2.5g_bnkps_shape640x640.rknn) | - | True/True | 963/142 |
| Face Recognition | ArcFace | [ArcFace_r18](https://bj.bcebos.com/fastdeploy/models/rknn2/new_ms1mv3_arcface_r18.rknn) | - | True/True | 600/3 |
| Face Recognition | cosFace | [cosFace_r18](https://bj.bcebos.com/fastdeploy/models/rknn2/new_glint360k_cosface_r18.rknn) | - | True/True | 600/3 |
## RKNPU2 Backend推理使用教程
这里以Scrfd模型为例子教你如何使用RKNPU2 Backend推理模型。以下注释中的改动是对比onnx cpu的改动。
```c++
int infer_scrfd_npu() {
char model_path[] = "./model/scrfd_2.5g_bnkps_shape640x640.rknn";
char image_file[] = "./image/test_lite_face_detector_3.jpg";
auto option = fastdeploy::RuntimeOption();
// 改动1: option需要调用UseRKNPU2
option.UseRKNPU2();
// 改动2: 模型加载时需要传递fastdeploy::ModelFormat::RKNN参数
auto *model = new fastdeploy::vision::facedet::SCRFD(model_path,"",option,fastdeploy::ModelFormat::RKNN);
if (!model->Initialized()) {
std::cerr << "Failed to initialize." << std::endl;
return 0;
}
// 改动3(可选): RKNPU2支持使用NPU进行normalize操作并且输入格式为nhwc格式。
// DisableNormalizeAndPermute操作将屏蔽预处理时的nor操作和hwc转chw操作。
// 如果你使用的是已经支持的模型列表请在Predict前调用该方法。
model->DisableNormalizeAndPermute();
auto im = cv::imread(image_file);
auto im_bak = im.clone();
fastdeploy::vision::FaceDetectionResult res;
clock_t start = clock();
if (!model->Predict(&im, &res, 0.8, 0.8)) {
std::cerr << "Failed to predict." << std::endl;
return 0;
}
clock_t end = clock();
double dur = (double) (end - start);
printf("infer_scrfd_npu use time:%f\n", (dur / CLOCKS_PER_SEC));
auto vis_im = fastdeploy::vision::Visualize::VisFaceDetection(im_bak, res);
cv::imwrite("scrfd_rknn_vis_result.jpg", vis_im);
std::cout << "Visualized result saved in ./scrfd_rknn_vis_result.jpg" << std::endl;
return 0;
}
```
## 其他关联文档
- [rknpu2板端环境安装配置](../../build_and_install/rknpu2.md)
- [rknn_toolkit2安装文档](./install_rknn_toolkit2.md)
- [onnx转换rknn文档](./export.md)

View File

@@ -0,0 +1,52 @@
# PaddleSeg 模型部署
## 模型版本说明
- [PaddleSeg develop](https://github.com/PaddlePaddle/PaddleSeg/tree/develop)
目前FastDeploy支持如下模型的部署
- [U-Net系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/unet/README.md)
- [PP-LiteSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/pp_liteseg/README.md)
- [PP-HumanSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/contrib/PP-HumanSeg/README.md)
- [FCN系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/fcn/README.md)
- [DeepLabV3系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/deeplabv3/README.md)
【注意】如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../matting)
## 准备PaddleSeg部署模型以及转换模型
RKNPU部署模型前需要将模型转换成RKNN模型其过程一般可以简化为如下步骤:
* Paddle动态图模型 -> ONNX模型 -> RKNN模型。
* Paddle动态图模型 转换为 ONNX模型的过程请参考([PaddleSeg模型导出说明](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/contrib/PP-HumanSeg))。
* 对于ONNX模型 转换 RKNN模型的过程请参考[转换文档](../../../../../docs/cn/faq/rknpu2/export.md)进行转换。
以PPHumanSeg为例在获取到ONNX模型后其转换为RK3588步骤如下:
* 编写config.yaml文件
```yaml
model_path: ./portrait_pp_humansegv2_lite_256x144_pretrained.onnx
output_folder: ./
target_platform: RK3588
normalize:
mean: [0.5,0.5,0.5]
std: [0.5,0.5,0.5]
outputs: None
```
* 执行转换代码
```bash
python /path/to/fastDeploy/toosl/export.py --config_path=/path/to/fastdeploy/tools/rknpu2/config/ppset_config.yaml
```
## 下载预训练模型
为了方便开发者的测试下面提供了PaddleSeg导出的部分模型导出方式为**指定**`--input_shape`**指定**`--output_op none`**指定**`--without_argmax`),开发者可直接下载使用。
| 任务场景 | 模型 | 模型版本(表示已经测试的版本) | 大小 | ONNX/RKNN是否支持 | ONNX/RKNN速度(ms) |
|------------------|-------------------|--------------------------------------------------------------------------------------------------------------------------------------------|-----|---------------|-----------------|
| Segmentation | PP-LiteSeg | [PP_LiteSeg_T_STDC1_cityscapes](https://bj.bcebos.com/fastdeploy/models/rknn2/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_3588.tgz) | - | True/True | 6634/5598 |
| Segmentation | PP-HumanSegV2Lite | [portrait](https://bj.bcebos.com/fastdeploy/models/rknn2/portrait_pp_humansegv2_lite_256x144_inference_model_without_softmax_3588.tgz) | - | True/True | 456/266 |
| Segmentation | PP-HumanSegV2Lite | [human](https://bj.bcebos.com/fastdeploy/models/rknn2/human_pp_humansegv2_lite_192x192_pretrained_3588.tgz) | - | True/True | 496/256 |
## 详细部署文档
- [RKNN总体部署教程](../../../../../docs/cn/faq/rknpu2.md)
- [C++部署](cpp)
- [Python部署](python)

View File

@@ -0,0 +1,36 @@
CMAKE_MINIMUM_REQUIRED(VERSION 3.10)
project(rknpu_test)
set(CMAKE_CXX_STANDARD 14)
# 指定下载解压后的fastdeploy库路径
set(FASTDEPLOY_INSTALL_DIR "thirdpartys/fastdeploy-0.0.3")
include(${FASTDEPLOY_INSTALL_DIR}/FastDeployConfig.cmake)
include_directories(${FastDeploy_INCLUDE_DIRS})
add_executable(rknpu_test infer.cc)
target_link_libraries(rknpu_test
${FastDeploy_LIBS}
)
set(CMAKE_INSTALL_PREFIX ${CMAKE_SOURCE_DIR}/build/install)
install(TARGETS rknpu_test DESTINATION ./)
install(DIRECTORY model DESTINATION ./)
install(DIRECTORY images DESTINATION ./)
file(GLOB FASTDEPLOY_LIBS ${FASTDEPLOY_INSTALL_DIR}/lib/*)
message("${FASTDEPLOY_LIBS}")
install(PROGRAMS ${FASTDEPLOY_LIBS} DESTINATION lib)
file(GLOB ONNXRUNTIME_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/onnxruntime/lib/*)
install(PROGRAMS ${ONNXRUNTIME_LIBS} DESTINATION lib)
install(DIRECTORY ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/opencv/lib DESTINATION ./)
file(GLOB PADDLETOONNX_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/paddle2onnx/lib/*)
install(PROGRAMS ${PADDLETOONNX_LIBS} DESTINATION lib)
file(GLOB RKNPU2_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/rknpu2_runtime/RK3588/lib/*)
install(PROGRAMS ${RKNPU2_LIBS} DESTINATION lib)

View File

@@ -0,0 +1,84 @@
# PaddleSeg C++部署示例
本目录下用于展示PaddleSeg系列模型在RKNPU2上的部署以下的部署过程以PPHumanSeg为例子。
在部署前,需确认以下两个步骤:
1. 软硬件环境满足要求
2. 根据开发环境下载预编译部署库或者从头编译FastDeploy仓库
以上步骤请参考[RK2代NPU部署库编译](../../../../../../docs/cn/build_and_install/rknpu2.md)实现
## 生成基本目录文件
该例程由以下几个部分组成
```text
.
├── CMakeLists.txt
├── build # 编译文件夹
├── image # 存放图片的文件夹
├── infer_cpu_npu.cc
├── infer_cpu_npu.h
├── main.cc
├── model # 存放模型文件的文件夹
└── thirdpartys # 存放sdk的文件夹
```
首先需要先生成目录结构
```bash
mkdir build
mkdir images
mkdir model
mkdir thirdpartys
```
## 编译
### 编译并拷贝SDK到thirdpartys文件夹
请参考[RK2代NPU部署库编译](../../../../../../docs/cn/build_and_install/rknpu2.md)仓库编译SDK编译完成后将在build目录下生成
fastdeploy-0.0.3目录请移动它至thirdpartys目录下.
### 拷贝模型文件以及配置文件至model文件夹
在Paddle动态图模型 -> Paddle静态图模型 -> ONNX模型的过程中将生成ONNX文件以及对应的yaml配置文件请将配置文件存放到model文件夹内。
转换为RKNN后的模型文件也需要拷贝至model这里提供了转换好的文件输入以下命令下载使用(模型文件为RK3588RK3568需要重新[转换PPSeg RKNN模型](../README.md))。
```bash
cd model
wget https://bj.bcebos.com/fastdeploy/models/rknn2/human_pp_humansegv2_lite_192x192_pretrained_3588.tgz
tar xvf human_pp_humansegv2_lite_192x192_pretrained_3588.tgz
cp -r ./human_pp_humansegv2_lite_192x192_pretrained_3588 ./model
```
### 准备测试图片至image文件夹
```bash
wget https://paddleseg.bj.bcebos.com/dygraph/pp_humanseg_v2/images.zip
unzip -qo images.zip
```
### 编译example
```bash
cd build
cmake ..
make -j8
make install
```
## 运行例程
```bash
cd ./build/install
./rknpu_test
```
## 运行结果展示
运行后将在install文件夹下生成human_pp_humansegv2_lite_npu_result.jpg文件如下图:
![](https://user-images.githubusercontent.com/58363586/198875853-72821ad1-d4f7-41e3-b616-bef43027de3c.jpg)
## 注意事项
RKNPU上对模型的输入要求是使用NHWC格式且图片归一化操作会在转RKNN模型时内嵌到模型中因此我们在使用FastDeploy部署时
需要先调用DisableNormalizePermute(C++)或`disable_normalize_permute(Python),在预处理阶段禁用归一化以及数据格式的转换。
- [模型介绍](../../)
- [Python部署](../python)
- [转换PPSeg RKNN模型文档](../README.md)

View File

@@ -0,0 +1,84 @@
#include <iostream>
#include <string>
#include "fastdeploy/vision.h"
void InferHumanPPHumansegv2Lite(const std::string& device = "cpu");
int main() {
InferHumanPPHumansegv2Lite("npu");
return 0;
}
fastdeploy::RuntimeOption GetOption(const std::string& device) {
auto option = fastdeploy::RuntimeOption();
if (device == "npu") {
option.UseRKNPU2();
} else {
option.UseCpu();
}
return option;
}
fastdeploy::ModelFormat GetFormat(const std::string& device) {
auto format = fastdeploy::ModelFormat::ONNX;
if (device == "npu") {
format = fastdeploy::ModelFormat::RKNN;
} else {
format = fastdeploy::ModelFormat::ONNX;
}
return format;
}
std::string GetModelPath(std::string& model_path, const std::string& device) {
if (device == "npu") {
model_path += "rknn";
} else {
model_path += "onnx";
}
return model_path;
}
void InferHumanPPHumansegv2Lite(const std::string& device) {
std::string model_file =
"./model/human_pp_humansegv2_lite_192x192_pretrained_3588/"
"human_pp_humansegv2_lite_192x192_pretrained_3588.";
std::string params_file;
std::string config_file =
"./model/human_pp_humansegv2_lite_192x192_pretrained_3588/deploy.yaml";
fastdeploy::RuntimeOption option = GetOption(device);
fastdeploy::ModelFormat format = GetFormat(device);
model_file = GetModelPath(model_file, device);
auto model = fastdeploy::vision::segmentation::PaddleSegModel(
model_file, params_file, config_file, option, format);
if (!model.Initialized()) {
std::cerr << "Failed to initialize." << std::endl;
return;
}
auto image_file =
"./images/portrait_heng.jpg";
auto im = cv::imread(image_file);
if (device == "npu") {
model.DisableNormalizeAndPermute();
}
fastdeploy::vision::SegmentationResult res;
clock_t start = clock();
if (!model.Predict(&im, &res)) {
std::cerr << "Failed to predict." << std::endl;
return;
}
clock_t end = clock();
auto dur = (double)(end - start);
printf("infer_human_pp_humansegv2_lite_npu use time:%f\n",
(dur / CLOCKS_PER_SEC));
std::cout << res.Str() << std::endl;
auto vis_im = fastdeploy::vision::VisSegmentation(im, res);
cv::imwrite("human_pp_humansegv2_lite_npu_result.jpg", vis_im);
std::cout
<< "Visualized result saved in ./human_pp_humansegv2_lite_npu_result.jpg"
<< std::endl;
}

View File

@@ -0,0 +1,44 @@
# PaddleSeg Python部署示例
在部署前,需确认以下两个步骤
- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../../../docs/cn/build_and_install/rknpu2.md)
【注意】如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../../matting)
本目录下提供`infer.py`快速完成PPHumanseg在RKNPU上部署的示例。执行如下脚本即可完成
```bash
# 下载部署示例代码
git clone https://github.com/PaddlePaddle/FastDeploy.git
cd FastDeploy/examples/vision/segmentation/paddleseg/python
# 下载模型
wget https://bj.bcebos.com/fastdeploy/models/rknn2/human_pp_humansegv2_lite_192x192_pretrained_3588.tgz
tar xvf human_pp_humansegv2_lite_192x192_pretrained_3588.tgz
# 下载图片
wget https://paddleseg.bj.bcebos.com/dygraph/pp_humanseg_v2/images.zip
unzip images.zip
# 推理
python3 infer.py --model_file ./human_pp_humansegv2_lite_192x192_pretrained_3588/human_pp_humansegv2_lite_192x192_pretrained_3588.rknn \
--config_file ./human_pp_humansegv2_lite_192x192_pretrained_3588/deploy.yaml \
--image images/portrait_heng.jpg
```
运行完成可视化结果如下图所示
<div align="center">
<img src="https://user-images.githubusercontent.com/16222477/191712880-91ae128d-247a-43e0-b1e3-cafae78431e0.jpg", width=512px, height=256px />
</div>
## 注意事项
RKNPU上对模型的输入要求是使用NHWC格式且图片归一化操作会在转RKNN模型时内嵌到模型中因此我们在使用FastDeploy部署时
需要先调用DisableNormalizePermute(C++)或`disable_normalize_permute(Python),在预处理阶段禁用归一化以及数据格式的转换。
## 其它文档
- [PaddleSeg 模型介绍](..)
- [PaddleSeg C++部署](../cpp)
- [模型预测结果说明](../../../../../../docs/api/vision_results/)
- [转换PPSeg RKNN模型文档](../README.md)

View File

@@ -0,0 +1,44 @@
import fastdeploy as fd
import cv2
import os
def parse_arguments():
import argparse
import ast
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_file", required=True, help="Path of PaddleSeg model.")
parser.add_argument(
"--config_file", required=True, help="Path of PaddleSeg config.")
parser.add_argument(
"--image", type=str, required=True, help="Path of test image file.")
return parser.parse_args()
def build_option(args):
option = fd.RuntimeOption()
option.use_rknpu2()
return option
args = parse_arguments()
# 配置runtime加载模型
runtime_option = build_option(args)
model_file = args.model_file
params_file = ""
config_file = args.config_file
model = fd.vision.segmentation.PaddleSegModel(
model_file, params_file, config_file, runtime_option=runtime_option,model_format=fd.ModelFormat.RKNN)
model.disable_normalize_and_permute()
# 预测图片分割结果
im = cv2.imread(args.image)
result = model.predict(im.copy())
print(result)
# 可视化结果
vis_im = fd.vision.vis_segmentation(im, result, weight=0.5)
cv2.imwrite("vis_img.png", vis_im)

View File

@@ -0,0 +1,425 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h"
namespace fastdeploy {
RKNPU2Backend::~RKNPU2Backend() {
if(input_attrs != nullptr){
free(input_attrs);
}
if(output_attrs != nullptr){
free(output_attrs);
}
}
/***************************************************************
* @name GetSDKAndDeviceVersion
* @brief get RKNN sdk and device version
* @param None
* @return bool
* @note None
***************************************************************/
bool RKNPU2Backend::GetSDKAndDeviceVersion() {
int ret;
// get sdk and device version
ret = rknn_query(ctx, RKNN_QUERY_SDK_VERSION, &sdk_ver, sizeof(sdk_ver));
if (ret != RKNN_SUCC) {
printf("rknn_query fail! ret=%d\n", ret);
return false;
}
FDINFO << "rknn_api/rknnrt version: " << sdk_ver.api_version
<< ", driver version: " << sdk_ver.drv_version << std::endl;
return true;
}
/***************************************************************
* @name BuildOption
* @brief save option
* @param RKNPU2BackendOption
* @note None
***************************************************************/
void RKNPU2Backend::BuildOption(const RKNPU2BackendOption& option) {
this->option_ = option;
// save cpu_name
this->option_.cpu_name = option.cpu_name;
// save context
this->option_.core_mask = option.core_mask;
}
/***************************************************************
* @name InitFromRKNN
* @brief Initialize RKNN model
* @param model_file: Binary data for the RKNN model or the path of RKNN model.
* params_file: None
* option: config
* @return bool
* @note None
***************************************************************/
bool RKNPU2Backend::InitFromRKNN(const std::string& model_file,
const RKNPU2BackendOption& option) {
// LoadModel
if (!this->LoadModel((char*)model_file.data())) {
FDERROR << "load model failed" << std::endl;
return false;
}
// GetSDKAndDeviceVersion
if (!this->GetSDKAndDeviceVersion()) {
FDERROR << "get SDK and device version failed" << std::endl;
return false;
}
// BuildOption
this->BuildOption(option);
// SetCoreMask if RK3588
if (this->option_.cpu_name == rknpu2::CpuName::RK3588) {
if (!this->SetCoreMask(option_.core_mask)) {
FDERROR << "set core mask failed" << std::endl;
return false;
}
}
// GetModelInputOutputInfos
if (!this->GetModelInputOutputInfos()) {
FDERROR << "get model input output infos failed" << std::endl;
return false;
}
return true;
}
/***************************************************************
* @name SetCoreMask
* @brief set NPU core for model
* @param core_mask: The specification of NPU core setting.
* @return bool
* @note Only support RK3588
***************************************************************/
bool RKNPU2Backend::SetCoreMask(rknpu2::CoreMask& core_mask) const {
int ret = rknn_set_core_mask(ctx, static_cast<rknn_core_mask>(core_mask));
if (ret != RKNN_SUCC) {
FDERROR << "rknn_set_core_mask fail! ret=" << ret << std::endl;
return false;
}
return true;
}
/***************************************************************
* @name LoadModel
* @brief read rknn model
* @param model: Binary data for the RKNN model or the path of RKNN model.
* @return bool
* @note None
***************************************************************/
bool RKNPU2Backend::LoadModel(void* model) {
int ret = RKNN_SUCC;
ret = rknn_init(&ctx, model, 0, 0, nullptr);
if (ret != RKNN_SUCC) {
FDERROR << "rknn_init fail! ret=" << ret << std::endl;
return false;
}
return true;
}
/***************************************************************
* @name GetModelInputOutputInfos
* @brief Get the detailed input and output infos of Model
* @param None
* @return bool
* @note None
***************************************************************/
bool RKNPU2Backend::GetModelInputOutputInfos() {
int ret = RKNN_SUCC;
// Get the number of model inputs and outputs
ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
if (ret != RKNN_SUCC) {
return false;
}
// Get detailed input parameters
input_attrs = (rknn_tensor_attr*)malloc(sizeof(rknn_tensor_attr) * io_num.n_input);
memset(input_attrs, 0, io_num.n_input * sizeof(rknn_tensor_attr));
inputs_desc_.resize(io_num.n_input);
for (uint32_t i = 0; i < io_num.n_input; i++) {
input_attrs[i].index = i;
// query info
ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]),
sizeof(rknn_tensor_attr));
if (ret != RKNN_SUCC) {
printf("rknn_init error! ret=%d\n", ret);
return false;
}
std::string temp_name = input_attrs[i].name;
std::vector<int> temp_shape{};
temp_shape.resize(input_attrs[i].n_dims);
for (int j = 0; j < input_attrs[i].n_dims; j++) {
temp_shape[j] = (int)input_attrs[i].dims[j];
}
FDDataType temp_dtype =
fastdeploy::RKNPU2Backend::RknnTensorTypeToFDDataType(
input_attrs[i].type);
TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype};
inputs_desc_[i] = temp_input_info;
}
// Get detailed output parameters
output_attrs =
(rknn_tensor_attr*)malloc(sizeof(rknn_tensor_attr) * io_num.n_output);
memset(output_attrs, 0, io_num.n_output * sizeof(rknn_tensor_attr));
outputs_desc_.resize(io_num.n_output);
for (uint32_t i = 0; i < io_num.n_output; i++) {
output_attrs[i].index = i;
// query info
ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]),
sizeof(rknn_tensor_attr));
if (ret != RKNN_SUCC) {
FDERROR << "rknn_query fail! ret = " << ret << std::endl;
return false;
}
std::string temp_name = output_attrs[i].name;
std::vector<int> temp_shape{};
temp_shape.resize(output_attrs[i].n_dims);
for (int j = 0; j < output_attrs[i].n_dims; j++) {
temp_shape[j] = (int)output_attrs[i].dims[j];
}
FDDataType temp_dtype =
fastdeploy::RKNPU2Backend::RknnTensorTypeToFDDataType(
output_attrs[i].type);
TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype};
outputs_desc_[i] = temp_input_info;
}
return true;
}
/***************************************************************
* @name DumpTensorAttr
* @brief Get the model's detailed inputs and outputs
* @param rknn_tensor_attr
* @return None
* @note None
***************************************************************/
void RKNPU2Backend::DumpTensorAttr(rknn_tensor_attr& attr) {
printf("index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], "
"n_elems=%d, size=%d, fmt=%s, type=%s, "
"qnt_type=%s, zp=%d, scale=%f\n",
attr.index, attr.name, attr.n_dims, attr.dims[0], attr.dims[1],
attr.dims[2], attr.dims[3], attr.n_elems, attr.size,
get_format_string(attr.fmt), get_type_string(attr.type),
get_qnt_type_string(attr.qnt_type), attr.zp, attr.scale);
}
TensorInfo RKNPU2Backend::GetInputInfo(int index) {
FDASSERT(index < NumInputs(),
"The index: %d should less than the number of inputs: %d.", index,
NumInputs())
return inputs_desc_[index];
}
std::vector<TensorInfo> RKNPU2Backend::GetInputInfos() { return inputs_desc_; }
TensorInfo RKNPU2Backend::GetOutputInfo(int index) {
FDASSERT(index < NumOutputs(),
"The index: %d should less than the number of outputs %d.", index,
NumOutputs())
return outputs_desc_[index];
}
std::vector<TensorInfo> RKNPU2Backend::GetOutputInfos() {
return outputs_desc_;
}
bool RKNPU2Backend::Infer(std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs) {
int ret = RKNN_SUCC;
// Judge whether the input and output size are the same
if (inputs.size() != inputs_desc_.size()) {
FDERROR << "[RKNPU2Backend] Size of the inputs(" << inputs.size()
<< ") should keep same with the inputs of this model("
<< inputs_desc_.size() << ")." << std::endl;
return false;
}
// the input size only can be one
if (inputs.size() > 1) {
FDERROR << "[RKNPU2Backend] Size of the inputs only support 1."
<< std::endl;
return false;
}
// Judge whether the input and output types are the same
rknn_tensor_type input_type =
fastdeploy::RKNPU2Backend::FDDataTypeToRknnTensorType(inputs[0].dtype);
if (input_type != input_attrs[0].type) {
FDWARNING << "The input tensor type != model's inputs type."
<< "The input_type need " << get_type_string(input_attrs[0].type)
<< ",but inputs[0].type is " << get_type_string(input_type)
<< std::endl;
}
rknn_tensor_format input_layout =
RKNN_TENSOR_NHWC; // RK3588 only support NHWC
input_attrs[0].type = input_type;
input_attrs[0].fmt = input_layout;
input_attrs[0].size = inputs[0].Nbytes();
input_attrs[0].size_with_stride = inputs[0].Nbytes();
input_attrs[0].pass_through = 0;
// create input tensor memory
rknn_tensor_mem* input_mems[1];
input_mems[0] = rknn_create_mem(ctx, inputs[0].Nbytes());
if (input_mems[0] == nullptr) {
FDERROR << "rknn_create_mem input_mems error." << std::endl;
return false;
}
// Copy input data to input tensor memory
uint32_t width = input_attrs[0].dims[2];
uint32_t stride = input_attrs[0].w_stride;
if (width == stride) {
if (inputs[0].Data() == nullptr) {
FDERROR << "inputs[0].Data is NULL." << std::endl;
return false;
}
memcpy(input_mems[0]->virt_addr, inputs[0].Data(), inputs[0].Nbytes());
} else {
FDERROR << "[RKNPU2Backend] only support width == stride." << std::endl;
return false;
}
// Create output tensor memory
rknn_tensor_mem* output_mems[io_num.n_output];
for (uint32_t i = 0; i < io_num.n_output; ++i) {
// Most post-processing does not support the fp16 format.
// The unified output here is float32
uint32_t output_size = output_attrs[i].n_elems * sizeof(float);
output_mems[i] = rknn_create_mem(ctx, output_size);
}
// Set input tensor memory
ret = rknn_set_io_mem(ctx, input_mems[0], &input_attrs[0]);
if (ret != RKNN_SUCC) {
FDERROR << "input tensor memory rknn_set_io_mem fail! ret=" << ret
<< std::endl;
return false;
}
// Set output tensor memory
for (uint32_t i = 0; i < io_num.n_output; ++i) {
// default output type is depend on model, this requires float32 to compute top5
output_attrs[i].type = RKNN_TENSOR_FLOAT32;
ret = rknn_set_io_mem(ctx, output_mems[i], &output_attrs[i]);
// set output memory and attribute
if (ret != RKNN_SUCC) {
FDERROR << "output tensor memory rknn_set_io_mem fail! ret=" << ret
<< std::endl;
return false;
}
}
// run rknn
ret = rknn_run(ctx, nullptr);
if (ret != RKNN_SUCC) {
FDERROR << "rknn run error! ret=" << ret << std::endl;
return false;
}
rknn_destroy_mem(ctx, input_mems[0]);
// get result
outputs->resize(outputs_desc_.size());
std::vector<int64_t> temp_shape(4);
for (size_t i = 0; i < outputs_desc_.size(); ++i) {
temp_shape.resize(outputs_desc_[i].shape.size());
for (int j = 0; j < outputs_desc_[i].shape.size(); ++j) {
temp_shape[j] = outputs_desc_[i].shape[j];
}
(*outputs)[i].Resize(temp_shape, outputs_desc_[i].dtype,
outputs_desc_[i].name);
memcpy((*outputs)[i].MutableData(), (float*)output_mems[i]->virt_addr, (*outputs)[i].Nbytes());
rknn_destroy_mem(ctx, output_mems[i]);
}
return true;
}
/***************************************************************
* @name RknnTensorTypeToFDDataType
* @brief Change RknnTensorType To FDDataType
* @param rknn_tensor_type
* @return None
* @note Most post-processing does not support the fp16 format.
* Therefore, if the input is FP16, the output will be FP32.
***************************************************************/
FDDataType RKNPU2Backend::RknnTensorTypeToFDDataType(rknn_tensor_type type) {
if (type == rknn_tensor_type::RKNN_TENSOR_FLOAT16) {
return FDDataType::FP32;
}
if (type == rknn_tensor_type::RKNN_TENSOR_FLOAT32) {
return FDDataType::FP32;
}
if (type == rknn_tensor_type::RKNN_TENSOR_INT8) {
return FDDataType::INT8;
}
if (type == rknn_tensor_type::RKNN_TENSOR_INT16) {
return FDDataType::INT16;
}
if (type == rknn_tensor_type::RKNN_TENSOR_INT32) {
return FDDataType::INT32;
}
if (type == rknn_tensor_type::RKNN_TENSOR_UINT8) {
return FDDataType::UINT8;
}
if (type == rknn_tensor_type::RKNN_TENSOR_BOOL) {
return FDDataType::BOOL;
}
FDERROR << "FDDataType don't support this type" << std::endl;
return FDDataType::UNKNOWN1;
}
/***************************************************************
* @name FDDataTypeToRknnTensorType
* @brief Change FDDataType To RknnTensorType
* @param FDDataType
* @return None
* @note None
***************************************************************/
rknn_tensor_type
RKNPU2Backend::FDDataTypeToRknnTensorType(fastdeploy::FDDataType type) {
if (type == FDDataType::FP16) {
return rknn_tensor_type::RKNN_TENSOR_FLOAT16;
}
if (type == FDDataType::FP32) {
return rknn_tensor_type::RKNN_TENSOR_FLOAT32;
}
if (type == FDDataType::INT8) {
return rknn_tensor_type::RKNN_TENSOR_INT8;
}
if (type == FDDataType::INT16) {
return rknn_tensor_type::RKNN_TENSOR_INT16;
}
if (type == FDDataType::INT32) {
return rknn_tensor_type::RKNN_TENSOR_INT32;
}
if (type == FDDataType::UINT8) {
return rknn_tensor_type::RKNN_TENSOR_UINT8;
}
if (type == FDDataType::BOOL) {
return rknn_tensor_type::RKNN_TENSOR_BOOL;
}
FDERROR << "rknn_tensor_type don't support this type" << std::endl;
return RKNN_TENSOR_TYPE_MAX;
}
} // namespace fastdeploy

View File

@@ -0,0 +1,96 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "fastdeploy/backends/backend.h"
#include "fastdeploy/core/fd_tensor.h"
#include "rknn_api.h" // NOLINT
#include "rknpu2_config.h"
#include <cstring> // for memset
#include <iostream>
#include <memory>
#include <string>
#include <vector>
namespace fastdeploy {
struct RKNPU2BackendOption {
rknpu2::CpuName cpu_name = rknpu2::CpuName::RK3588;
//The specification of NPU core setting.It has the following choices :
// RKNN_NPU_CORE_AUTO : Referring to automatic mode, meaning that it will
// select the idle core inside the NPU.
// RKNN_NPU_CORE_0 : Running on the NPU0 core
// RKNN_NPU_CORE_1: Runing on the NPU1 core
// RKNN_NPU_CORE_2: Runing on the NPU2 core
// RKNN_NPU_CORE_0_1: Running on both NPU0 and NPU1 core simultaneously.
// RKNN_NPU_CORE_0_1_2: Running on both NPU0, NPU1 and NPU2 simultaneously.
rknpu2::CoreMask core_mask = rknpu2::CoreMask::RKNN_NPU_CORE_AUTO;
};
class RKNPU2Backend : public BaseBackend {
public:
RKNPU2Backend() = default;
virtual ~RKNPU2Backend();
// RKNN API
bool LoadModel(void* model);
bool GetSDKAndDeviceVersion();
bool SetCoreMask(rknpu2::CoreMask& core_mask) const;
bool GetModelInputOutputInfos();
// BaseBackend API
void BuildOption(const RKNPU2BackendOption& option);
bool InitFromRKNN(const std::string& model_file,
const RKNPU2BackendOption& option = RKNPU2BackendOption());
int NumInputs() const override {
return static_cast<int>(inputs_desc_.size());
}
int NumOutputs() const override {
return static_cast<int>(outputs_desc_.size());
}
TensorInfo GetInputInfo(int index) override;
TensorInfo GetOutputInfo(int index) override;
std::vector<TensorInfo> GetInputInfos() override;
std::vector<TensorInfo> GetOutputInfos() override;
bool Infer(std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs) override;
private:
// The object of rknn context.
rknn_context ctx{};
// The structure rknn_sdk_version is used to indicate the version information of the RKNN SDK.
rknn_sdk_version sdk_ver{};
// The structure rknn_input_output_num represents the number of input and output Tensor
rknn_input_output_num io_num{};
std::vector<TensorInfo> inputs_desc_;
std::vector<TensorInfo> outputs_desc_;
rknn_tensor_attr* input_attrs = nullptr;
rknn_tensor_attr* output_attrs = nullptr;
RKNPU2BackendOption option_;
static void DumpTensorAttr(rknn_tensor_attr& attr);
static FDDataType RknnTensorTypeToFDDataType(rknn_tensor_type type);
static rknn_tensor_type FDDataTypeToRknnTensorType(FDDataType type);
};
} // namespace fastdeploy

View File

@@ -0,0 +1,40 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef RKNPU2_CONFIG_H
#define RKNPU2_CONFIG_H
namespace fastdeploy {
namespace rknpu2 {
typedef enum _rknpu2_cpu_name {
RK356X = 0, /* run on RK356X. */
RK3588 = 1, /* default,run on RK3588. */
UNDEFINED,
} CpuName;
/*! RKNPU2 core mask for mobile device. */
typedef enum _rknpu2_core_mask {
RKNN_NPU_CORE_AUTO = 0, ///< default, run on NPU core randomly.
RKNN_NPU_CORE_0 = 1, ///< run on NPU core 0.
RKNN_NPU_CORE_1 = 2, ///< run on NPU core 1.
RKNN_NPU_CORE_2 = 4, ///< run on NPU core 2.
RKNN_NPU_CORE_0_1 =
RKNN_NPU_CORE_0 | RKNN_NPU_CORE_1, ///< run on NPU core 1 and core 2.
RKNN_NPU_CORE_0_1_2 =
RKNN_NPU_CORE_0_1 | RKNN_NPU_CORE_2, ///< run on NPU core 1 and core 2.
RKNN_NPU_CORE_UNDEFINED,
} CoreMask;
} // namespace RKNN
} // namespace fastdeploy
#endif //RKNPU2_CONFIG_H

View File

@@ -53,6 +53,9 @@ std::string Str(const Device& d) {
case Device::GPU:
out = "Device::GPU";
break;
case Device::RKNPU:
out = "Device::RKNPU";
break;
case Device::IPU:
out = "Device::IPU";
break;
@@ -70,6 +73,9 @@ std::ostream& operator<<(std::ostream& out,const Device& d){
case Device::GPU:
out << "Device::GPU";
break;
case Device::RKNPU:
out << "Device::RKNPU";
break;
default:
out << "Device::UNKOWN";
}

View File

@@ -22,7 +22,7 @@
namespace fastdeploy {
enum FASTDEPLOY_DECL Device { CPU, GPU, IPU };
enum FASTDEPLOY_DECL Device { CPU, GPU, RKNPU, IPU};
FASTDEPLOY_DECL std::string Str(const Device& d);

View File

@@ -41,6 +41,7 @@ bool FastDeployModel::InitRuntime() {
#ifndef WITH_IPU
use_ipu = false;
#endif
bool use_rknpu = (runtime_option.device == Device::RKNPU);
// whether the model is supported by the setted backend
bool is_supported = false;
@@ -51,6 +52,13 @@ bool FastDeployModel::InitRuntime() {
break;
}
}
} else if (use_rknpu) {
for (auto& item : valid_rknpu_backends) {
if (item == runtime_option.backend) {
is_supported = true;
break;
}
}
} else if(use_ipu) {
for (auto& item : valid_ipu_backends) {
if (item == runtime_option.backend) {
@@ -101,6 +109,8 @@ bool FastDeployModel::InitRuntime() {
<< std::endl;
return false;
#endif
} else if (runtime_option.device == Device::RKNPU) {
return CreateRKNPUBackend();
} else if (runtime_option.device == Device::IPU) {
#ifdef WITH_IPU
return CreateIpuBackend();
@@ -110,7 +120,7 @@ bool FastDeployModel::InitRuntime() {
return false;
#endif
}
FDERROR << "Only support CPU/GPU now." << std::endl;
FDERROR << "Only support CPU/GPU/NPU now." << std::endl;
return false;
}
@@ -138,7 +148,7 @@ bool FastDeployModel::CreateCpuBackend() {
}
bool FastDeployModel::CreateGpuBackend() {
if (valid_gpu_backends.size() == 0) {
if (valid_gpu_backends.empty()) {
FDERROR << "There's no valid gpu backends for model: " << ModelName()
<< std::endl;
return false;
@@ -161,6 +171,30 @@ bool FastDeployModel::CreateGpuBackend() {
return false;
}
bool FastDeployModel::CreateRKNPUBackend() {
if (valid_rknpu_backends.empty()) {
FDERROR << "There's no valid npu backends for model: " << ModelName()
<< std::endl;
return false;
}
for (size_t i = 0; i < valid_rknpu_backends.size(); ++i) {
if (!IsBackendAvailable(valid_rknpu_backends[i])) {
continue;
}
runtime_option.backend = valid_rknpu_backends[i];
runtime_ = std::unique_ptr<Runtime>(new Runtime());
if (!runtime_->Init(runtime_option)) {
return false;
}
runtime_initialized_ = true;
return true;
}
FDERROR << "Cannot find an available npu backend to load this model."
<< std::endl;
return false;
}
bool FastDeployModel::CreateIpuBackend() {
if (valid_ipu_backends.size() == 0) {
FDERROR << "There's no valid ipu backends for model: " << ModelName()

View File

@@ -38,6 +38,12 @@ class FASTDEPLOY_DECL FastDeployModel {
/** Model's valid ipu backends. This member defined all the ipu backends have successfully tested for the model
*/
std::vector<Backend> valid_ipu_backends = {Backend::PDINFER};
/** Model's valid hardware backends. This member defined all the gpu backends have successfully tested for the model
*/
std::vector<Backend> valid_rknpu_backends = {};
/// Get number of inputs for this model
virtual int NumInputsOfRuntime() { return runtime_->NumInputs(); }
/// Get number of outputs for this model
@@ -99,6 +105,8 @@ class FASTDEPLOY_DECL FastDeployModel {
virtual bool CreateCpuBackend();
virtual bool CreateGpuBackend();
virtual bool CreateIpuBackend();
virtual bool CreateRKNPUBackend();
bool initialized = false;
std::vector<Backend> valid_external_backends;

View File

@@ -164,6 +164,9 @@ PYBIND11_MODULE(@PY_LIBRARY_NAME@, m) {
m.def_submodule("text", "Text module of FastDeploy.");
BindText(text_module);
#endif
auto rknpu2_module =
m.def_submodule("rknpu2", "RKNPU2 config module of FastDeploy.");
BindRKNPU2Config(rknpu2_module);
}
} // namespace fastdeploy

View File

@@ -39,6 +39,7 @@ void BindBackend(pybind11::module&);
void BindVision(pybind11::module&);
void BindText(pybind11::module& m);
void BindPipeline(pybind11::module& m);
void BindRKNPU2Config(pybind11::module&);
pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType& fd_dtype);

View File

@@ -0,0 +1,33 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h"
#include "fastdeploy/pybind/main.h"
namespace fastdeploy {
void BindRKNPU2Config(pybind11::module& m) {
pybind11::enum_<fastdeploy::rknpu2::CpuName>(m, "CpuName", pybind11::arithmetic(),
"CpuName for inference.")
.value("RK356X", fastdeploy::rknpu2::CpuName::RK356X)
.value("RK3588", fastdeploy::rknpu2::CpuName::RK3588)
.value("UNDEFINED", fastdeploy::rknpu2::CpuName::UNDEFINED);
pybind11::enum_<fastdeploy::rknpu2::CoreMask>(m, "CoreMask", pybind11::arithmetic(),
"CoreMask for inference.")
.value("RKNN_NPU_CORE_AUTO", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO)
.value("RKNN_NPU_CORE_0", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0)
.value("RKNN_NPU_CORE_1", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_1)
.value("RKNN_NPU_CORE_2", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_2)
.value("RKNN_NPU_CORE_0_1", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0_1)
.value("RKNN_NPU_CORE_0_1_2", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0_1_2)
.value("RKNN_NPU_CORE_UNDEFINED", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_UNDEFINED);
}
} // namespace fastdeploy

View File

@@ -22,6 +22,7 @@ void BindRuntime(pybind11::module& m) {
.def("set_model_path", &RuntimeOption::SetModelPath)
.def("use_gpu", &RuntimeOption::UseGpu)
.def("use_cpu", &RuntimeOption::UseCpu)
.def("use_rknpu2", &RuntimeOption::UseRKNPU2)
.def("set_external_stream", &RuntimeOption::SetExternalStream)
.def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum)
.def("use_paddle_backend", &RuntimeOption::UsePaddleBackend)
@@ -174,17 +175,20 @@ void BindRuntime(pybind11::module& m) {
.value("TRT", Backend::TRT)
.value("POROS", Backend::POROS)
.value("PDINFER", Backend::PDINFER)
.value("RKNPU2", Backend::RKNPU2)
.value("LITE", Backend::LITE);
pybind11::enum_<ModelFormat>(m, "ModelFormat", pybind11::arithmetic(),
"ModelFormat for inference.")
.value("PADDLE", ModelFormat::PADDLE)
.value("TORCHSCRIPT", ModelFormat::TORCHSCRIPT)
.value("RKNN", ModelFormat::RKNN)
.value("ONNX", ModelFormat::ONNX);
pybind11::enum_<Device>(m, "Device", pybind11::arithmetic(),
"Device for inference.")
.value("CPU", Device::CPU)
.value("GPU", Device::GPU)
.value("IPU", Device::IPU);
.value("IPU", Device::IPU)
.value("RKNPU", Device::RKNPU);
pybind11::enum_<FDDataType>(m, "FDDataType", pybind11::arithmetic(),
"Data type of FastDeploy.")

View File

@@ -41,6 +41,10 @@
#include "fastdeploy/backends/lite/lite_backend.h"
#endif
#ifdef ENABLE_RKNPU2_BACKEND
#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h"
#endif
namespace fastdeploy {
std::vector<Backend> GetAvailableBackends() {
@@ -62,6 +66,9 @@ std::vector<Backend> GetAvailableBackends() {
#endif
#ifdef ENABLE_LITE_BACKEND
backends.push_back(Backend::LITE);
#endif
#ifdef ENABLE_RKNPU2_BACKEND
backends.push_back(Backend::RKNPU2);
#endif
return backends;
}
@@ -85,6 +92,8 @@ std::string Str(const Backend& b) {
return "Backend::PDINFER";
} else if (b == Backend::POROS) {
return "Backend::POROS";
} else if (b == Backend::RKNPU2) {
return "Backend::RKNPU2";
}else if (b == Backend::OPENVINO) {
return "Backend::OPENVINO";
} else if (b == Backend::LITE) {
@@ -98,6 +107,8 @@ std::string Str(const ModelFormat& f) {
return "ModelFormat::PADDLE";
} else if (f == ModelFormat::ONNX) {
return "ModelFormat::ONNX";
}else if (f == ModelFormat::RKNN) {
return "ModelFormat::RKNN";
} else if (f == ModelFormat::TORCHSCRIPT) {
return "ModelFormat::TORCHSCRIPT";
}
@@ -113,6 +124,8 @@ std::ostream& operator<<(std::ostream& out, const Backend& backend) {
out << "Backend::PDINFER";
} else if (backend == Backend::OPENVINO) {
out << "Backend::OPENVINO";
} else if (backend == Backend::RKNPU2) {
out << "Backend::RKNPU2";
}else if (backend == Backend::POROS) {
out << "Backend::POROS";
} else if (backend == Backend::LITE) {
@@ -127,6 +140,8 @@ std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
out << "ModelFormat::PADDLE";
} else if (format == ModelFormat::ONNX) {
out << "ModelFormat::ONNX";
} else if (format == ModelFormat::RKNN) {
out << "ModelFormat::RKNN";
} else if (format == ModelFormat::TORCHSCRIPT) {
out << "ModelFormat::TORCHSCRIPT";
}
@@ -152,6 +167,14 @@ bool CheckModelFormat(const std::string& model_file,
<< model_file << std::endl;
return false;
}
} else if (model_format == ModelFormat::RKNN) {
if (model_file.size() < 5 ||
model_file.substr(model_file.size() - 5, 5) != ".rknn") {
FDERROR << "With model format of ModelFormat::RKNN, the model file "
"should ends with `.rknn`, but now it's "
<< model_file << std::endl;
return false;
}
} else if (model_format == ModelFormat::TORCHSCRIPT) {
if (model_file.size() < 3 ||
model_file.substr(model_file.size() - 3, 3) != ".pt") {
@@ -162,7 +185,7 @@ bool CheckModelFormat(const std::string& model_file,
}
} else {
FDERROR << "Only support model format with frontend ModelFormat::PADDLE / "
"ModelFormat::ONNX / ModelFormat::TORCHSCRIPT."
"ModelFormat::ONNX / ModelFormat::RKNN / ModelFormat::TORCHSCRIPT."
<< std::endl;
return false;
}
@@ -182,6 +205,10 @@ ModelFormat GuessModelFormat(const std::string& model_file) {
model_file.substr(model_file.size() - 3, 3) == ".pt") {
FDINFO << "Model Format: Torchscript." << std::endl;
return ModelFormat::TORCHSCRIPT;
} else if (model_file.size() > 5 &&
model_file.substr(model_file.size() - 5, 5) == ".rknn") {
FDINFO << "Model Format: RKNN." << std::endl;
return ModelFormat::RKNN;
}
FDERROR << "Cannot guess which model format you are using, please set "
@@ -223,6 +250,13 @@ void RuntimeOption::UseGpu(int gpu_id) {
void RuntimeOption::UseCpu() { device = Device::CPU; }
void RuntimeOption::UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name,
fastdeploy::rknpu2::CoreMask rknpu2_core) {
rknpu2_cpu_name_ = rknpu2_name;
rknpu2_core_mask_ = rknpu2_core;
device = Device::RKNPU;
}
void RuntimeOption::SetExternalStream(void* external_stream) {
external_stream_ = external_stream;
}
@@ -234,7 +268,8 @@ void RuntimeOption::SetCpuThreadNum(int thread_num) {
void RuntimeOption::SetOrtGraphOptLevel(int level) {
std::vector<int> supported_level{-1, 0, 1, 2};
auto valid_level = std::find(supported_level.begin(), supported_level.end(), level) != supported_level.end();
auto valid_level = std::find(supported_level.begin(), supported_level.end(),
level) != supported_level.end();
FDASSERT(valid_level, "The level must be -1, 0, 1, 2.");
ort_graph_opt_level = level;
}
@@ -324,7 +359,6 @@ void RuntimeOption::EnableLiteFP16() {
void RuntimeOption::DisableLiteFP16() {
lite_enable_fp16 = false;
}
void RuntimeOption::EnableLiteInt8() {
lite_enable_int8 = true;
}
@@ -332,7 +366,6 @@ void RuntimeOption::EnableLiteInt8() {
void RuntimeOption::DisableLiteInt8() {
lite_enable_int8 = false;
}
void RuntimeOption::SetLitePowerMode(LitePowerMode mode) {
lite_power_mode = mode;
}
@@ -454,6 +487,8 @@ bool Runtime::Init(const RuntimeOption& _option) {
option.backend = Backend::POROS;
} else if (IsBackendAvailable(Backend::OPENVINO)) {
option.backend = Backend::OPENVINO;
} else if (IsBackendAvailable(Backend::RKNPU2)) {
option.backend = Backend::RKNPU2;
} else {
FDERROR << "Please define backend in RuntimeOption, current it's "
"Backend::UNKNOWN."
@@ -506,6 +541,13 @@ bool Runtime::Init(const RuntimeOption& _option) {
CreateLiteBackend();
FDINFO << "Runtime initialized with Backend::LITE in " << Str(option.device)
<< "." << std::endl;
} else if (option.backend == Backend::RKNPU2) {
FDASSERT(option.device == Device::RKNPU,
"Backend::RKNPU2 only supports Device::RKNPU2");
CreateRKNPU2Backend();
FDINFO << "Runtime initialized with Backend::RKNPU2 in "
<< Str(option.device) << "." << std::endl;
} else {
FDERROR << "Runtime only support "
"Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as "
@@ -720,4 +762,21 @@ void Runtime::CreateLiteBackend() {
#endif
}
void Runtime::CreateRKNPU2Backend() {
#ifdef ENABLE_RKNPU2_BACKEND
auto rknpu2_option = RKNPU2BackendOption();
rknpu2_option.cpu_name = option.rknpu2_cpu_name_;
rknpu2_option.core_mask = option.rknpu2_core_mask_;
FDASSERT(option.model_format == ModelFormat::RKNN,
"RKNPU2Backend only support model format of ModelFormat::RKNN");
backend_ = utils::make_unique<RKNPU2Backend>();
auto casted_backend = dynamic_cast<RKNPU2Backend*>(backend_.get());
FDASSERT(casted_backend->InitFromRKNN(option.model_file, rknpu2_option),
"Load model from nb file failed while initializing LiteBackend.");
#else
FDASSERT(false, "RKNPU2Backend is not available, please compiled with "
"ENABLE_RKNPU2_BACKEND=ON.");
#endif
}
} // namespace fastdeploy

View File

@@ -20,12 +20,13 @@
#pragma once
#include <algorithm>
#include <map>
#include <vector>
#include <algorithm>
#include "fastdeploy/backends/backend.h"
#include "fastdeploy/utils/perf.h"
#include "backends/rknpu/rknpu2/rknpu2_config.h"
/** \brief All C++ FastDeploy APIs are defined inside this namespace
*
@@ -41,6 +42,7 @@ enum Backend {
POROS, ///< Poros, support TorchScript format model, CPU / Nvidia GPU
OPENVINO, ///< Intel OpenVINO, support Paddle/ONNX format, CPU only
LITE, ///< Paddle Lite, support Paddle format model, ARM CPU only
RKNPU2, ///< RKNPU2, support RKNN format model, Rockchip NPU only
};
/*! Deep learning model format */
@@ -48,6 +50,7 @@ enum ModelFormat {
AUTOREC, ///< Auto recognize the model format by model file name
PADDLE, ///< Model with paddlepaddle format
ONNX, ///< Model with ONNX format
RKNN, ///< Model with RKNN format
TORCHSCRIPT, ///< Model with TorchScript format
};
@@ -102,6 +105,9 @@ struct FASTDEPLOY_DECL RuntimeOption {
/// Use Nvidia GPU to inference
void UseGpu(int gpu_id = 0);
void UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name = fastdeploy::rknpu2::CpuName::RK3588,
fastdeploy::rknpu2::CoreMask rknpu2_core = fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0);
void SetExternalStream(void* external_stream);
/*
@@ -325,6 +331,10 @@ struct FASTDEPLOY_DECL RuntimeOption {
int unconst_ops_thres = -1;
std::string poros_file = "";
// ======Only for RKNPU2 Backend=======
fastdeploy::rknpu2::CpuName rknpu2_cpu_name_ = fastdeploy::rknpu2::CpuName::RK3588;
fastdeploy::rknpu2::CoreMask rknpu2_core_mask_ = fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO;
std::string model_file = ""; // Path of model file
std::string params_file = ""; // Path of parameters file, can be empty
ModelFormat model_format = ModelFormat::AUTOREC; // format of input model
@@ -388,6 +398,7 @@ struct FASTDEPLOY_DECL Runtime {
void CreateTrtBackend();
void CreateOpenVINOBackend();
void CreateLiteBackend();
void CreateRKNPU2Backend();
std::unique_ptr<BaseBackend> backend_;
};
} // namespace fastdeploy

View File

@@ -28,6 +28,7 @@ PaddleSegModel::PaddleSegModel(const std::string& model_file,
config_file_ = config_file;
valid_cpu_backends = {Backend::OPENVINO, Backend::PDINFER, Backend::ORT, Backend::LITE};
valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
valid_rknpu_backends = {Backend::RKNPU2};
runtime_option = custom_option;
runtime_option.model_format = model_format;
runtime_option.model_file = model_file;
@@ -67,6 +68,7 @@ bool PaddleSegModel::BuildPreprocessPipelineFromConfig() {
FDASSERT(op.IsMap(),
"Require the transform information in yaml be Map type.");
if (op["type"].as<std::string>() == "Normalize") {
if(!(this->disable_normalize_and_permute)){
std::vector<float> mean = {0.5, 0.5, 0.5};
std::vector<float> std = {0.5, 0.5, 0.5};
if (op["mean"]) {
@@ -76,7 +78,7 @@ bool PaddleSegModel::BuildPreprocessPipelineFromConfig() {
std = op["std"].as<std::vector<float>>();
}
processors_.push_back(std::make_shared<Normalize>(mean, std));
}
} else if (op["type"].as<std::string>() == "Resize") {
yml_contain_resize_op = true;
const auto& target_size = op["target_size"];
@@ -130,7 +132,9 @@ bool PaddleSegModel::BuildPreprocessPipelineFromConfig() {
<< "." << std::endl;
}
}
if(!(this->disable_normalize_and_permute)){
processors_.push_back(std::make_shared<HWC2CHW>());
}
return true;
}
@@ -357,6 +361,14 @@ bool PaddleSegModel::Predict(cv::Mat* im, SegmentationResult* result) {
return true;
}
void PaddleSegModel::DisableNormalizeAndPermute(){
this->disable_normalize_and_permute = true;
// the DisableNormalizeAndPermute function will be invalid if the configuration file is loaded during preprocessing
if (!BuildPreprocessPipelineFromConfig()) {
FDERROR << "Failed to build preprocess pipeline from configuration file." << std::endl;
}
}
} // namespace segmentation
} // namespace vision
} // namespace fastdeploy

View File

@@ -60,6 +60,9 @@ class FASTDEPLOY_DECL PaddleSegModel : public FastDeployModel {
*/
bool is_vertical_screen = false;
// This function will disable normalize and hwc2chw in preprocessing step.
void DisableNormalizeAndPermute();
private:
bool Initialize();
@@ -76,6 +79,9 @@ class FASTDEPLOY_DECL PaddleSegModel : public FastDeployModel {
std::vector<std::shared_ptr<Processor>> processors_;
std::string config_file_;
// for recording the switch of normalize and hwc2chw
bool disable_normalize_and_permute = false;
};
} // namespace segmentation

View File

@@ -27,6 +27,7 @@ void BindPPSeg(pybind11::module& m) {
self.Predict(&mat, res);
return res;
})
.def("disable_normalize_and_permute",&vision::segmentation::PaddleSegModel::DisableNormalizeAndPermute)
.def_readwrite("apply_softmax",
&vision::segmentation::PaddleSegModel::apply_softmax)
.def_readwrite("is_vertical_screen",

View File

@@ -16,10 +16,11 @@ import logging
import os
import sys
from .c_lib_wrap import (ModelFormat, Backend, FDDataType, TensorInfo, Device,
from .c_lib_wrap import (ModelFormat, Backend, rknpu2,
FDDataType, TensorInfo, Device,
FDTensor, is_built_with_gpu, is_built_with_ort,
ModelFormat, is_built_with_paddle, is_built_with_trt,
get_default_cuda_directory)
get_default_cuda_directory, )
from .runtime import Runtime, RuntimeOption
from .model import FastDeployModel

View File

@@ -16,7 +16,7 @@ import logging
import numpy as np
from . import ModelFormat
from . import c_lib_wrap as C
from . import rknpu2
class Runtime:
"""FastDeploy Runtime object.
@@ -207,6 +207,11 @@ class RuntimeOption:
"""
return self._option.use_cpu()
def use_rknpu2(self,rknpu2_name=rknpu2.CpuName.RK3588,rknpu2_core=rknpu2.CoreMask.RKNN_NPU_CORE_0):
"""Inference with CPU
"""
return self._option.use_rknpu2(rknpu2_name,rknpu2_core)
def set_cpu_thread_num(self, thread_num=-1):
"""Set number of threads if inference with CPU

View File

@@ -35,7 +35,7 @@ class PaddleSegModel(FastDeployModel):
"""
super(PaddleSegModel, self).__init__(runtime_option)
assert model_format == ModelFormat.PADDLE, "PaddleSeg only support model format of ModelFormat.Paddle now."
# assert model_format == ModelFormat.PADDLE, "PaddleSeg only support model format of ModelFormat.Paddle now."
self._model = C.vision.segmentation.PaddleSegModel(
model_file, params_file, config_file, self._runtime_option,
model_format)
@@ -49,6 +49,9 @@ class PaddleSegModel(FastDeployModel):
"""
return self._model.predict(input_image)
def disable_normalize_and_permute(self):
return self._model.disable_normalize_and_permute()
@property
def apply_softmax(self):
"""Atrribute of PaddleSeg model. Stating Whether applying softmax operator in the postprocess, default value is False

View File

@@ -48,6 +48,8 @@ with open(os.path.join(TOP_DIR, "python", "requirements.txt")) as fin:
setup_configs = dict()
setup_configs["ENABLE_PADDLE_FRONTEND"] = os.getenv("ENABLE_PADDLE_FRONTEND",
"ON")
setup_configs["ENABLE_RKNPU2_BACKEND"] = os.getenv("ENABLE_RKNPU2_BACKEND",
"OFF")
setup_configs["ENABLE_ORT_BACKEND"] = os.getenv("ENABLE_ORT_BACKEND", "OFF")
setup_configs["ENABLE_OPENVINO_BACKEND"] = os.getenv("ENABLE_OPENVINO_BACKEND",
"OFF")
@@ -69,6 +71,7 @@ setup_configs["LIBRARY_NAME"] = PACKAGE_NAME
setup_configs["PY_LIBRARY_NAME"] = PACKAGE_NAME + "_main"
setup_configs["OPENCV_DIRECTORY"] = os.getenv("OPENCV_DIRECTORY", "")
setup_configs["ORT_DIRECTORY"] = os.getenv("ORT_DIRECTORY", "")
setup_configs["RKNN2_TARGET_SOC"] = os.getenv("RKNN2_TARGET_SOC", "")
if setup_configs["WITH_GPU"] == "ON" or setup_configs[
"BUILD_ON_JETSON"] == "ON":

View File

@@ -0,0 +1,7 @@
model_path: ./portrait_pp_humansegv2_lite_256x144_pretrained.onnx
output_folder: ./
target_platform: RK3588
normalize:
mean: [0.5,0.5,0.5]
std: [0.5,0.5,0.5]
outputs: None

75
tools/rknpu2/export.py Normal file
View File

@@ -0,0 +1,75 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import yaml
import argparse
from rknn.api import RKNN
def get_config():
parser = argparse.ArgumentParser()
parser.add_argument("--verbose", default=True, help="rknntoolkit verbose")
parser.add_argument("--config_path")
args = parser.parse_args()
return args
if __name__ == "__main__":
config = get_config()
with open(config.config_path) as file:
file_data = file.read()
yaml_config = yaml.safe_load(file_data)
print(yaml_config)
model = RKNN(config.verbose)
# Config
mean_values = [[255 * mean for mean in yaml_config["normalize"]["mean"]]]
std_values = [[255 * std for std in yaml_config["normalize"]["std"]]]
model.config(mean_values=mean_values,
std_values=std_values,
target_platform=yaml_config["target_platform"])
# Load ONNX model
print(type(yaml_config["outputs"]))
print("yaml_config[\"outputs\"] = ", yaml_config["outputs"])
if yaml_config["outputs"] == "None":
ret = model.load_onnx(model=yaml_config["model_path"])
else:
ret = model.load_onnx(model=yaml_config["model_path"],
outputs=yaml_config["outputs"])
assert ret == 0, "Load model failed!"
# Build model
ret = model.build(do_quantization=None)
assert ret == 0, "Build model failed!"
# Init Runtime
ret = model.init_runtime()
assert ret == 0, "Init runtime environment failed!"
# Export
if not os.path.exists(yaml_config["output_folder"]):
os.mkdir(yaml_config["output_folder"])
model_base_name = os.path.basename(yaml_config["model_path"]).split(".")[0]
model_device_name = yaml_config["target_platform"].lower()
model_save_name = model_base_name + "_" + model_device_name + ".rknn"
ret = model.export_rknn(
os.path.join(yaml_config["output_folder"], model_save_name))
assert ret == 0, "Export rknn model failed!"
print("Export OK!")