diff --git a/CMakeLists.txt b/CMakeLists.txt
index ded88c54e..ce6b1920d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -58,6 +58,7 @@ option(ENABLE_TRT_BACKEND "Whether to enable tensorrt backend." OFF)
option(ENABLE_PADDLE_BACKEND "Whether to enable paddle backend." OFF)
option(ENABLE_POROS_BACKEND "Whether to enable poros backend." OFF)
option(ENABLE_OPENVINO_BACKEND "Whether to enable openvino backend." OFF)
+option(ENABLE_RKNPU2_BACKEND "Whether to enable RKNPU2 backend." OFF)
option(ENABLE_LITE_BACKEND "Whether to enable paddle lite backend." OFF)
option(ENABLE_VISION "Whether to enable vision models usage." OFF)
option(ENABLE_TEXT "Whether to enable text models usage." OFF)
@@ -164,13 +165,14 @@ file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fas
file(GLOB_RECURSE DEPLOY_POROS_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/poros/*.cc)
file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cpp)
file(GLOB_RECURSE DEPLOY_OPENVINO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/openvino/*.cc)
+file(GLOB_RECURSE DEPLOY_RKNPU2_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/rknpu/rknpu2/*.cc)
file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/lite/*.cc)
file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cc)
file(GLOB_RECURSE DEPLOY_PIPELINE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pipeline/*.cc)
file(GLOB_RECURSE DEPLOY_VISION_CUDA_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cu)
file(GLOB_RECURSE DEPLOY_TEXT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/text/*.cc)
file(GLOB_RECURSE DEPLOY_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*_pybind.cc)
-list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} ${DEPLOY_POROS_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_OPENVINO_SRCS} ${DEPLOY_LITE_SRCS} ${DEPLOY_VISION_SRCS} ${DEPLOY_TEXT_SRCS} ${DEPLOY_PIPELINE_SRCS})
+list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} ${DEPLOY_POROS_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_OPENVINO_SRCS} ${DEPLOY_LITE_SRCS} ${DEPLOY_VISION_SRCS} ${DEPLOY_TEXT_SRCS} ${DEPLOY_PIPELINE_SRCS} ${DEPLOY_RKNPU2_SRCS})
set(DEPEND_LIBS "")
@@ -227,6 +229,13 @@ if(ENABLE_OPENVINO_BACKEND)
include(${PROJECT_SOURCE_DIR}/cmake/openvino.cmake)
endif()
+if(ENABLE_RKNPU2_BACKEND)
+ add_definitions(-DENABLE_RKNPU2_BACKEND)
+ list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_RKNPU2_SRCS})
+ include(${PROJECT_SOURCE_DIR}/cmake/rknpu2.cmake)
+ list(APPEND DEPEND_LIBS ${RKNN_RT_LIB})
+endif()
+
if(ENABLE_POROS_BACKEND)
set(CMAKE_CXX_STANDARD 14)
add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in
index e1a1cf2f4..1074c86ad 100755
--- a/FastDeploy.cmake.in
+++ b/FastDeploy.cmake.in
@@ -2,6 +2,7 @@ CMAKE_MINIMUM_REQUIRED(VERSION 3.8)
set(WITH_GPU @WITH_GPU@)
set(ENABLE_ORT_BACKEND @ENABLE_ORT_BACKEND@)
+set(ENABLE_RKNPU2_BACKEND @ENABLE_RKNPU2_BACKEND@)
set(ENABLE_LITE_BACKEND @ENABLE_LITE_BACKEND@)
set(ENABLE_PADDLE_BACKEND @ENABLE_PADDLE_BACKEND@)
set(ENABLE_OPENVINO_BACKEND @ENABLE_OPENVINO_BACKEND@)
@@ -27,6 +28,7 @@ set(LIBRARY_NAME @LIBRARY_NAME@)
set(OPENCV_DIRECTORY "@OPENCV_DIRECTORY@")
set(ORT_DIRECTORY "@ORT_DIRECTORY@")
set(OPENVINO_DIRECTORY "@OPENVINO_DIRECTORY@")
+set(RKNN2_TARGET_SOC "@RKNN2_TARGET_SOC@")
set(FASTDEPLOY_LIBS "")
set(FASTDEPLOY_INCS "")
@@ -88,6 +90,18 @@ if(ENABLE_OPENVINO_BACKEND)
list(APPEND FASTDEPLOY_LIBS ${OPENVINO_LIBS})
endif()
+if(ENABLE_RKNPU2_BACKEND)
+ if(RKNN2_TARGET_SOC STREQUAL "RK356X")
+ set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/RK356X/lib/librknn_api.so)
+ elseif (RKNN2_TARGET_SOC STREQUAL "RK3588")
+ set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/RK3588/lib/librknn_api.so)
+ else ()
+ message(FATAL_ERROR "RKNN2_TARGET_SOC is not set, ref value: RK356X or RK3588")
+ endif()
+ message(STATUS "The path of RKNPU2 is ${RKNPU2_LIB}.")
+ list(APPEND FASTDEPLOY_LIBS ${RKNPU2_LIB})
+endif()
+
if(ENABLE_LITE_BACKEND)
set(LITE_DIR ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/${PADDLELITE_FILENAME})
if(ANDROID)
@@ -234,6 +248,7 @@ message(STATUS " C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}")
message(STATUS " CXX flags : ${CMAKE_CXX_FLAGS}")
message(STATUS " WITH_GPU : ${WITH_GPU}")
message(STATUS " ENABLE_ORT_BACKEND : ${ENABLE_ORT_BACKEND}")
+message(STATUS " ENABLE_RKNPU2_BACKEND : ${ENABLE_RKNPU2_BACKEND}")
message(STATUS " ENABLE_PADDLE_BACKEND : ${ENABLE_PADDLE_BACKEND}")
message(STATUS " ENABLE_POROS_BACKEND : ${ENABLE_POROS_BACKEND}")
message(STATUS " ENABLE_OPENVINO_BACKEND : ${ENABLE_OPENVINO_BACKEND}")
diff --git a/cmake/rknpu2.cmake b/cmake/rknpu2.cmake
new file mode 100644
index 000000000..e8ed424be
--- /dev/null
+++ b/cmake/rknpu2.cmake
@@ -0,0 +1,26 @@
+# get RKNPU2_URL
+set(RKNPU2_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
+set(RKNPU2_VERSION "1.4.0")
+set(RKNPU2_FILE "rknpu2_runtime-linux-x64-${RKNPU2_VERSION}.tgz")
+set(RKNPU2_URL "${RKNPU2_URL_BASE}${RKNPU2_FILE}")
+
+# download_and_decompress
+download_and_decompress(${RKNPU2_URL} ${CMAKE_CURRENT_BINARY_DIR}/${RKNPU2_FILE} ${THIRD_PARTY_PATH}/install/)
+
+# set path
+set(RKNPU_RUNTIME_PATH ${THIRD_PARTY_PATH}/install/rknpu2_runtime)
+
+if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
+else ()
+ message(FATAL_ERROR "[rknpu2.cmake] Only support build rknpu2 in Linux")
+endif ()
+
+
+if (EXISTS ${RKNPU_RUNTIME_PATH})
+ set(RKNN_RT_LIB ${RKNPU_RUNTIME_PATH}/${RKNN2_TARGET_SOC}/lib/librknnrt.so)
+ include_directories(${RKNPU_RUNTIME_PATH}/${RKNN2_TARGET_SOC}/include)
+else ()
+ message(FATAL_ERROR "[rknpu2.cmake] download_and_decompress rknpu2_runtime error")
+endif ()
+
+
diff --git a/cmake/summary.cmake b/cmake/summary.cmake
index fb4c534c6..9a545441f 100755
--- a/cmake/summary.cmake
+++ b/cmake/summary.cmake
@@ -31,6 +31,7 @@ function(fastdeploy_summary)
message(STATUS " FastDeploy version : ${FASTDEPLOY_VERSION}")
message(STATUS " Paddle2ONNX version : ${PADDLE2ONNX_VERSION}")
message(STATUS " ENABLE_ORT_BACKEND : ${ENABLE_ORT_BACKEND}")
+ message(STATUS " ENABLE_RKNPU2_BACKEND : ${ENABLE_RKNPU2_BACKEND}")
message(STATUS " ENABLE_PADDLE_BACKEND : ${ENABLE_PADDLE_BACKEND}")
message(STATUS " ENABLE_POROS_BACKEND : ${ENABLE_POROS_BACKEND}")
message(STATUS " ENABLE_TRT_BACKEND : ${ENABLE_TRT_BACKEND}")
diff --git a/docs/cn/build_and_install/README.md b/docs/cn/build_and_install/README.md
index 1b4bc21bb..43b98602d 100644
--- a/docs/cn/build_and_install/README.md
+++ b/docs/cn/build_and_install/README.md
@@ -9,18 +9,20 @@
## FastDeploy编译选项说明
-| 选项 | 说明 |
-| :--- | :---- |
-| ENABLE_ORT_BACKEND | 默认OFF, 是否编译集成ONNX Runtime后端(CPU/GPU上推荐打开) |
-| ENABLE_PADDLE_BACKEND | 默认OFF,是否编译集成Paddle Inference后端(CPU/GPU上推荐打开) |
-| ENABLE_LITE_BACKEND | 默认OFF,是否编译集成Paddle Lite后端(编译Android库时需要设置为ON) |
-| ENABLE_TRT_BACKEND | 默认OFF,是否编译集成TensorRT后端(GPU上推荐打开) |
-| ENABLE_OPENVINO_BACKEND | 默认OFF,是否编译集成OpenVINO后端(CPU上推荐打开) |
-| ENABLE_VISION | 默认OFF,是否编译集成视觉模型的部署模块 |
-| ENABLE_TEXT | 默认OFF,是否编译集成文本NLP模型的部署模块 |
-| WITH_GPU | 默认OFF, 当需要在GPU上部署时,需设置为ON |
-| CUDA_DIRECTORY | 默认/usr/local/cuda, 当需要在GPU上部署时,用于指定CUDA(>=11.2)的路径 |
-| TRT_DIRECTORY | 当开启TensorRT后端时,必须通过此开关指定TensorRT(>=8.4)的路径 |
-| ORT_DIRECTORY | 当开启ONNX Runtime后端时,用于指定用户本地的ONNX Runtime库路径;如果不指定,编译过程会自动下载ONNX Runtime库 |
-| OPENCV_DIRECTORY | 当ENABLE_VISION=ON时,用于指定用户本地的OpenCV库路径;如果不指定,编译过程会自动下载OpenCV库 |
-| OPENVINO_DIRECTORY | 当开启OpenVINO后端时, 用于指定用户本地的OpenVINO库路径;如果不指定,编译过程会自动下载OpenVINO库 |
+| 选项 | 说明 |
+|:------------------------|:--------------------------------------------------------------------------|
+| ENABLE_ORT_BACKEND | 默认OFF, 是否编译集成ONNX Runtime后端(CPU/GPU上推荐打开) |
+| ENABLE_PADDLE_BACKEND | 默认OFF,是否编译集成Paddle Inference后端(CPU/GPU上推荐打开) |
+| ENABLE_LITE_BACKEND | 默认OFF,是否编译集成Paddle Lite后端(编译Android库时需要设置为ON) |
+| ENABLE_RKNPU2_BACKEND | 默认OFF,是否编译集成RKNPU2后端(RK3588/RK3568/RK3566上推荐打开) |
+| ENABLE_TRT_BACKEND | 默认OFF,是否编译集成TensorRT后端(GPU上推荐打开) |
+| ENABLE_OPENVINO_BACKEND | 默认OFF,是否编译集成OpenVINO后端(CPU上推荐打开) |
+| ENABLE_VISION | 默认OFF,是否编译集成视觉模型的部署模块 |
+| ENABLE_TEXT | 默认OFF,是否编译集成文本NLP模型的部署模块 |
+| WITH_GPU | 默认OFF, 当需要在GPU上部署时,需设置为ON |
+| RKNN2_TARGET_SOC | ENABLE_RKNPU2_BACKEND时才需要使用这个编译选项。无默认值, 可输入值为RK3588/RK356X, 必须填入,否则 将编译失败 |
+| CUDA_DIRECTORY | 默认/usr/local/cuda, 当需要在GPU上部署时,用于指定CUDA(>=11.2)的路径 |
+| TRT_DIRECTORY | 当开启TensorRT后端时,必须通过此开关指定TensorRT(>=8.4)的路径 |
+| ORT_DIRECTORY | 当开启ONNX Runtime后端时,用于指定用户本地的ONNX Runtime库路径;如果不指定,编译过程会自动下载ONNX Runtime库 |
+| OPENCV_DIRECTORY | 当ENABLE_VISION=ON时,用于指定用户本地的OpenCV库路径;如果不指定,编译过程会自动下载OpenCV库 |
+| OPENVINO_DIRECTORY | 当开启OpenVINO后端时, 用于指定用户本地的OpenVINO库路径;如果不指定,编译过程会自动下载OpenVINO库 |
diff --git a/docs/cn/build_and_install/rknpu2.md b/docs/cn/build_and_install/rknpu2.md
new file mode 100644
index 000000000..61057252c
--- /dev/null
+++ b/docs/cn/build_and_install/rknpu2.md
@@ -0,0 +1,102 @@
+# RK2代NPU部署库编译
+
+## 写在前面
+FastDeploy已经初步支持RKNPU2的部署。使用的过程中,如果出现Bug请提Issues反馈。
+
+## 简介
+FastDeploy当前在RK平台上支持后端引擎如下:
+
+| 后端 | 平台 | 支持模型格式 | 说明 |
+|:------------------|:---------------------|:-------|:-------------------------------------------|
+| ONNX Runtime | RK356X
RK3588 | ONNX | 编译开关`ENABLE_ORT_BACKEND`为ON或OFF控制,默认OFF |
+| RKNPU2 | RK356X
RK3588 | RKNN | 编译开关`ENABLE_RKNPU2_BACKEND`为ON或OFF控制,默认OFF |
+
+
+## C++ SDK编译安装
+
+RKNPU2仅支持linux下进行编译,以下教程均在linux环境下完成。
+
+### 更新驱动和安装编译时需要的环境
+
+
+在运行代码之前,我们需要安装以下最新的RKNPU驱动,目前驱动更新至1.4.0。为了简化安装我编写了快速安装脚本,一键即可进行安装。
+
+**方法1: 通过脚本安装**
+```bash
+# 下载解压rknpu2_device_install_1.4.0
+wget https://bj.bcebos.com/fastdeploy/third_libs/rknpu2_device_install_1.4.0.zip
+unzip rknpu2_device_install_1.4.0.zip
+
+cd rknpu2_device_install_1.4.0
+# RK3588运行以下代码
+sudo rknn_install_rk3588.sh
+# RK356X运行以下代码
+sudo rknn_install_rk356X.sh
+```
+
+**方法2: 通过gitee安装**
+```bash
+# 安装必备的包
+sudo apt update -y
+sudo apt install -y python3
+sudo apt install -y python3-dev
+sudo apt install -y python3-pip
+sudo apt install -y gcc
+sudo apt install -y python3-opencv
+sudo apt install -y python3-numpy
+sudo apt install -y cmake
+
+# 下载rknpu2
+# RK3588运行以下代码
+git clone https://gitee.com/mirrors_rockchip-linux/rknpu2.git
+sudo cp ./rknpu2/runtime/RK3588/Linux/librknn_api/aarch64/* /usr/lib
+sudo cp ./rknpu2/runtime/RK3588/Linux/rknn_server/aarch64/usr/bin/* /usr/bin/
+
+# RK356X运行以下代码
+git clone https://gitee.com/mirrors_rockchip-linux/rknpu2.git
+sudo cp ./rknpu2/runtime/RK356X/Linux/librknn_api/aarch64/* /usr/lib
+sudo cp ./rknpu2/runtime/RK356X/Linux/rknn_server/aarch64/usr/bin/* /usr/bin/
+```
+
+### 编译C++ SDK
+
+```bash
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy
+mkdir build && cd build
+
+# 编译配置详情见README文件,这里只介绍关键的几个配置
+# -DENABLE_ORT_BACKEND: 是否开启ONNX模型,默认关闭
+# -DENABLE_RKNPU2_BACKEND: 是否开启RKNPU模型,默认关闭
+# -RKNN2_TARGET_SOC: 编译SDK的板子型号,只能输入RK356X或者RK3588,注意区分大小写
+cmake .. -DENABLE_ORT_BACKEND=ON \
+ -DENABLE_RKNPU2_BACKEND=ON \
+ -DENABLE_VISION=ON \
+ -DRKNN2_TARGET_SOC=RK3588 \
+ -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3
+make -j8
+make install
+```
+
+### 编译Python SDK
+
+```bash
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy
+cd python
+
+export ENABLE_ORT_BACKEND=ON
+export ENABLE_RKNPU2_BACKEND=ON
+export ENABLE_VISION=ON
+export RKNN2_TARGET_SOC=RK3588
+python3 setup.py build
+python3 setup.py bdist_wheel
+
+cd dist
+
+pip3 install fastdeploy_python-0.0.0-cp39-cp39-linux_aarch64.whl
+```
+
+## 部署模型
+
+请查看[RKNPU2部署模型教程](../faq/rknpu2/rknpu2.md)
\ No newline at end of file
diff --git a/docs/cn/faq/rknpu2/export.md b/docs/cn/faq/rknpu2/export.md
new file mode 100644
index 000000000..9399c78d5
--- /dev/null
+++ b/docs/cn/faq/rknpu2/export.md
@@ -0,0 +1,48 @@
+# 导出模型指南
+
+## 简介
+
+Fastdeploy已经简单的集成了onnx->rknn的转换过程。本教程使用tools/export.py文件导出模型,在导出之前需要编写yaml配置文件。
+在进行转换前请根据[rknn_toolkit2安装文档](./install_rknn_toolkit2.md)检查环境是否已经安装成功。
+
+
+## export.py 配置参数介绍
+
+| 参数名称 | 是否可以为空 | 参数作用 |
+|-----------------|------------|--------------------|
+| verbose | 是,默认值为True | 是否在屏幕上输出转换模型时的具体信息 |
+| config_path | 否 | 配置文件路径 |
+
+## config 配置文件介绍
+
+### config yaml文件模版
+
+```yaml
+model_path: ./portrait_pp_humansegv2_lite_256x144_pretrained.onnx
+output_folder: ./
+target_platform: RK3588
+normalize:
+ mean: [0.5,0.5,0.5]
+ std: [0.5,0.5,0.5]
+outputs: None
+```
+
+### config 配置参数介绍
+* model_path: 模型储存路径
+* output_folder: 模型储存文件夹名字
+* target_platform: 模型跑在哪一个设备上,只能为RK3588或RK3568
+* normalize: 配置在NPU上的normalize操作,有std和mean两个参数
+ * std: 如果在外部做normalize操作,请配置为[1/255,1/255,1/255]
+ * mean: 如果在外部做normalize操作,请配置为[0,0,0]
+* outputs: 输出节点列表,如果使用默认输出节点,请配置为None
+
+## 如何转换模型
+根目录下执行以下代码
+
+```bash
+python tools/export.py --config_path=./config.yaml
+```
+
+## 模型导出要注意的事项
+
+* 请不要导出带softmax和argmax的模型,这两个算子存在bug,请在外部进行运算
\ No newline at end of file
diff --git a/docs/cn/faq/rknpu2/install_rknn_toolkit2.md b/docs/cn/faq/rknpu2/install_rknn_toolkit2.md
new file mode 100644
index 000000000..ebebdeb6d
--- /dev/null
+++ b/docs/cn/faq/rknpu2/install_rknn_toolkit2.md
@@ -0,0 +1,49 @@
+# 安装rknn_toolkit2仓库
+
+## 下载rknn_toolkit2
+
+rknn_toolkit2的下载一般有两种方式,以下将一一介绍:
+
+* github仓库下载
+
+ github仓库中提供了稳定版本的rknn_toolkit2下载
+ ```bash
+ git clone https://github.com/rockchip-linux/rknn-toolkit2.git
+ ```
+
+* 百度网盘下载
+
+ 在有些时候,如果稳定版本的rknn_toolkit2存在bug,不满足模型部署的要求,我们也可以使用百度网盘下载beta版本的rknn_toolkit2使用。其安装方式与
+ 稳定版本一致
+ ```text
+ 链接:https://eyun.baidu.com/s/3eTDMk6Y 密码:rknn
+ ```
+
+## 安装rknn_toolkit2
+
+安装rknn_toolkit2中会存在依赖问题,这里介绍以下如何安装。首先,因为rknn_toolkit2依赖一些特定的包,因此建议使用conda新建一个虚拟环境进行安装。
+安装conda的方法百度有很多,这里跳过,直接介绍如何安装rknn_toolkit2。
+
+
+### 下载安装需要的软件包
+```bash
+sudo apt-get install libxslt1-dev zlib1g zlib1g-dev libglib2.0-0 \
+libsm6 libgl1-mesa-glx libprotobuf-dev gcc g++
+```
+
+### 安装rknn_toolkit2环境
+```bash
+# 创建虚拟环境
+conda create -n rknn2 python=3.6
+conda activate rknn2
+
+# rknn_toolkit2对numpy存在特定依赖,因此需要先安装numpy==1.16.6
+pip install numpy==1.16.6
+
+# 安装rknn_toolkit2-1.3.0_11912b58-cp38-cp38-linux_x86_64.whl
+cd ~/下载/rknn-toolkit2-master/packages
+pip install rknn_toolkit2-1.3.0_11912b58-cp38-cp38-linux_x86_64.whl
+```
+
+## 其他文档
+- [onnx转换rknn文档](./export.md)
\ No newline at end of file
diff --git a/docs/cn/faq/rknpu2/rknpu2.md b/docs/cn/faq/rknpu2/rknpu2.md
new file mode 100644
index 000000000..1e40585d1
--- /dev/null
+++ b/docs/cn/faq/rknpu2/rknpu2.md
@@ -0,0 +1,64 @@
+# RKNPU2模型部署
+
+## ONNX模型转换为RKNN模型
+ONNX模型不能直接调用RK芯片中的NPU进行运算,需要把ONNX模型转换为RKNN模型,具体流程请查看[转换文档](./export.md)
+
+## RKNPU2已经支持的模型列表
+
+| 任务场景 | 模型 | 模型版本(表示已经测试的版本) | 大小 | ONNX/RKNN是否支持 | ONNX/RKNN速度(ms) |
+|------------------|-------------------|--------------------------------------------------------------------------------------------------------------------------------------------|-----|---------------|-----------------|
+| Detection | Picodet | [Picodet-s-npu](https://bj.bcebos.com/fastdeploy/models/rknn2/picodet_s_416_coco_npu_3588.tgz) | - | True/True | 454/177 |
+| Segmentation | PP-LiteSeg | [PP_LiteSeg_T_STDC1_cityscapes](https://bj.bcebos.com/fastdeploy/models/rknn2/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_3588.tgz) | - | True/True | 6634/5598 |
+| Segmentation | PP-HumanSegV2Lite | [portrait](https://bj.bcebos.com/fastdeploy/models/rknn2/portrait_pp_humansegv2_lite_256x144_inference_model_without_softmax_3588.tgz) | - | True/True | 456/266 |
+| Segmentation | PP-HumanSegV2Lite | [human](https://bj.bcebos.com/fastdeploy/models/rknn2/human_pp_humansegv2_lite_192x192_pretrained_3588.tgz) | - | True/True | 496/256 |
+| Face Detection | SCRFD | [SCRFD-2.5G-kps-640](https://bj.bcebos.com/fastdeploy/models/rknn2/scrfd_2.5g_bnkps_shape640x640.rknn) | - | True/True | 963/142 |
+| Face Recognition | ArcFace | [ArcFace_r18](https://bj.bcebos.com/fastdeploy/models/rknn2/new_ms1mv3_arcface_r18.rknn) | - | True/True | 600/3 |
+| Face Recognition | cosFace | [cosFace_r18](https://bj.bcebos.com/fastdeploy/models/rknn2/new_glint360k_cosface_r18.rknn) | - | True/True | 600/3 |
+
+## RKNPU2 Backend推理使用教程
+
+这里以Scrfd模型为例子教你如何使用RKNPU2 Backend推理模型。以下注释中的改动,是对比onnx cpu的改动。
+
+```c++
+int infer_scrfd_npu() {
+ char model_path[] = "./model/scrfd_2.5g_bnkps_shape640x640.rknn";
+ char image_file[] = "./image/test_lite_face_detector_3.jpg";
+ auto option = fastdeploy::RuntimeOption();
+ // 改动1: option需要调用UseRKNPU2
+ option.UseRKNPU2();
+
+ // 改动2: 模型加载时需要传递fastdeploy::ModelFormat::RKNN参数
+ auto *model = new fastdeploy::vision::facedet::SCRFD(model_path,"",option,fastdeploy::ModelFormat::RKNN);
+ if (!model->Initialized()) {
+ std::cerr << "Failed to initialize." << std::endl;
+ return 0;
+ }
+
+ // 改动3(可选): RKNPU2支持使用NPU进行normalize操作,并且输入格式为nhwc格式。
+ // DisableNormalizeAndPermute操作将屏蔽预处理时的nor操作和hwc转chw操作。
+ // 如果你使用的是已经支持的模型列表,请在Predict前调用该方法。
+ model->DisableNormalizeAndPermute();
+ auto im = cv::imread(image_file);
+ auto im_bak = im.clone();
+ fastdeploy::vision::FaceDetectionResult res;
+ clock_t start = clock();
+ if (!model->Predict(&im, &res, 0.8, 0.8)) {
+ std::cerr << "Failed to predict." << std::endl;
+ return 0;
+ }
+ clock_t end = clock();
+ double dur = (double) (end - start);
+ printf("infer_scrfd_npu use time:%f\n", (dur / CLOCKS_PER_SEC));
+ auto vis_im = fastdeploy::vision::Visualize::VisFaceDetection(im_bak, res);
+ cv::imwrite("scrfd_rknn_vis_result.jpg", vis_im);
+ std::cout << "Visualized result saved in ./scrfd_rknn_vis_result.jpg" << std::endl;
+ return 0;
+}
+```
+
+
+## 其他关联文档
+- [rknpu2板端环境安装配置](../../build_and_install/rknpu2.md)
+- [rknn_toolkit2安装文档](./install_rknn_toolkit2.md)
+- [onnx转换rknn文档](./export.md)
+
diff --git a/examples/vision/segmentation/paddleseg/rknpu2/README.md b/examples/vision/segmentation/paddleseg/rknpu2/README.md
new file mode 100644
index 000000000..5a96661b1
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/rknpu2/README.md
@@ -0,0 +1,52 @@
+# PaddleSeg 模型部署
+
+## 模型版本说明
+
+- [PaddleSeg develop](https://github.com/PaddlePaddle/PaddleSeg/tree/develop)
+
+目前FastDeploy支持如下模型的部署
+
+- [U-Net系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/unet/README.md)
+- [PP-LiteSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/pp_liteseg/README.md)
+- [PP-HumanSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/contrib/PP-HumanSeg/README.md)
+- [FCN系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/fcn/README.md)
+- [DeepLabV3系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/deeplabv3/README.md)
+
+【注意】如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../matting)
+
+## 准备PaddleSeg部署模型以及转换模型
+
+RKNPU部署模型前需要将模型转换成RKNN模型,其过程一般可以简化为如下步骤:
+* Paddle动态图模型 -> ONNX模型 -> RKNN模型。
+ * Paddle动态图模型 转换为 ONNX模型的过程请参考([PaddleSeg模型导出说明](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/contrib/PP-HumanSeg))。
+ * 对于ONNX模型 转换 RKNN模型的过程,请参考[转换文档](../../../../../docs/cn/faq/rknpu2/export.md)进行转换。
+ 以PPHumanSeg为例,在获取到ONNX模型后,其转换为RK3588步骤如下:
+ * 编写config.yaml文件
+ ```yaml
+ model_path: ./portrait_pp_humansegv2_lite_256x144_pretrained.onnx
+ output_folder: ./
+ target_platform: RK3588
+ normalize:
+ mean: [0.5,0.5,0.5]
+ std: [0.5,0.5,0.5]
+ outputs: None
+ ```
+ * 执行转换代码
+ ```bash
+ python /path/to/fastDeploy/toosl/export.py --config_path=/path/to/fastdeploy/tools/rknpu2/config/ppset_config.yaml
+ ```
+
+## 下载预训练模型
+
+为了方便开发者的测试,下面提供了PaddleSeg导出的部分模型(导出方式为:**指定**`--input_shape`,**指定**`--output_op none`,**指定**`--without_argmax`),开发者可直接下载使用。
+
+| 任务场景 | 模型 | 模型版本(表示已经测试的版本) | 大小 | ONNX/RKNN是否支持 | ONNX/RKNN速度(ms) |
+|------------------|-------------------|--------------------------------------------------------------------------------------------------------------------------------------------|-----|---------------|-----------------|
+| Segmentation | PP-LiteSeg | [PP_LiteSeg_T_STDC1_cityscapes](https://bj.bcebos.com/fastdeploy/models/rknn2/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_3588.tgz) | - | True/True | 6634/5598 |
+| Segmentation | PP-HumanSegV2Lite | [portrait](https://bj.bcebos.com/fastdeploy/models/rknn2/portrait_pp_humansegv2_lite_256x144_inference_model_without_softmax_3588.tgz) | - | True/True | 456/266 |
+| Segmentation | PP-HumanSegV2Lite | [human](https://bj.bcebos.com/fastdeploy/models/rknn2/human_pp_humansegv2_lite_192x192_pretrained_3588.tgz) | - | True/True | 496/256 |
+
+## 详细部署文档
+- [RKNN总体部署教程](../../../../../docs/cn/faq/rknpu2.md)
+- [C++部署](cpp)
+- [Python部署](python)
\ No newline at end of file
diff --git a/examples/vision/segmentation/paddleseg/rknpu2/cpp/CMakeLists.txt b/examples/vision/segmentation/paddleseg/rknpu2/cpp/CMakeLists.txt
new file mode 100644
index 000000000..28161a83d
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/rknpu2/cpp/CMakeLists.txt
@@ -0,0 +1,36 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 3.10)
+project(rknpu_test)
+
+set(CMAKE_CXX_STANDARD 14)
+
+# 指定下载解压后的fastdeploy库路径
+set(FASTDEPLOY_INSTALL_DIR "thirdpartys/fastdeploy-0.0.3")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeployConfig.cmake)
+include_directories(${FastDeploy_INCLUDE_DIRS})
+add_executable(rknpu_test infer.cc)
+target_link_libraries(rknpu_test
+ ${FastDeploy_LIBS}
+ )
+
+set(CMAKE_INSTALL_PREFIX ${CMAKE_SOURCE_DIR}/build/install)
+
+install(TARGETS rknpu_test DESTINATION ./)
+
+install(DIRECTORY model DESTINATION ./)
+install(DIRECTORY images DESTINATION ./)
+
+file(GLOB FASTDEPLOY_LIBS ${FASTDEPLOY_INSTALL_DIR}/lib/*)
+message("${FASTDEPLOY_LIBS}")
+install(PROGRAMS ${FASTDEPLOY_LIBS} DESTINATION lib)
+
+file(GLOB ONNXRUNTIME_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/onnxruntime/lib/*)
+install(PROGRAMS ${ONNXRUNTIME_LIBS} DESTINATION lib)
+
+install(DIRECTORY ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/opencv/lib DESTINATION ./)
+
+file(GLOB PADDLETOONNX_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/paddle2onnx/lib/*)
+install(PROGRAMS ${PADDLETOONNX_LIBS} DESTINATION lib)
+
+file(GLOB RKNPU2_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/rknpu2_runtime/RK3588/lib/*)
+install(PROGRAMS ${RKNPU2_LIBS} DESTINATION lib)
\ No newline at end of file
diff --git a/examples/vision/segmentation/paddleseg/rknpu2/cpp/README.md b/examples/vision/segmentation/paddleseg/rknpu2/cpp/README.md
new file mode 100644
index 000000000..17defad01
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/rknpu2/cpp/README.md
@@ -0,0 +1,84 @@
+# PaddleSeg C++部署示例
+
+本目录下用于展示PaddleSeg系列模型在RKNPU2上的部署,以下的部署过程以PPHumanSeg为例子。
+
+在部署前,需确认以下两个步骤:
+
+1. 软硬件环境满足要求
+2. 根据开发环境,下载预编译部署库或者从头编译FastDeploy仓库
+
+以上步骤请参考[RK2代NPU部署库编译](../../../../../../docs/cn/build_and_install/rknpu2.md)实现
+
+## 生成基本目录文件
+
+该例程由以下几个部分组成
+```text
+.
+├── CMakeLists.txt
+├── build # 编译文件夹
+├── image # 存放图片的文件夹
+├── infer_cpu_npu.cc
+├── infer_cpu_npu.h
+├── main.cc
+├── model # 存放模型文件的文件夹
+└── thirdpartys # 存放sdk的文件夹
+```
+
+首先需要先生成目录结构
+```bash
+mkdir build
+mkdir images
+mkdir model
+mkdir thirdpartys
+```
+
+## 编译
+
+### 编译并拷贝SDK到thirdpartys文件夹
+
+请参考[RK2代NPU部署库编译](../../../../../../docs/cn/build_and_install/rknpu2.md)仓库编译SDK,编译完成后,将在build目录下生成
+fastdeploy-0.0.3目录,请移动它至thirdpartys目录下.
+
+### 拷贝模型文件,以及配置文件至model文件夹
+在Paddle动态图模型 -> Paddle静态图模型 -> ONNX模型的过程中,将生成ONNX文件以及对应的yaml配置文件,请将配置文件存放到model文件夹内。
+转换为RKNN后的模型文件也需要拷贝至model,这里提供了转换好的文件,输入以下命令下载使用(模型文件为RK3588,RK3568需要重新[转换PPSeg RKNN模型](../README.md))。
+```bash
+cd model
+wget https://bj.bcebos.com/fastdeploy/models/rknn2/human_pp_humansegv2_lite_192x192_pretrained_3588.tgz
+tar xvf human_pp_humansegv2_lite_192x192_pretrained_3588.tgz
+cp -r ./human_pp_humansegv2_lite_192x192_pretrained_3588 ./model
+```
+
+### 准备测试图片至image文件夹
+```bash
+wget https://paddleseg.bj.bcebos.com/dygraph/pp_humanseg_v2/images.zip
+unzip -qo images.zip
+```
+
+### 编译example
+
+```bash
+cd build
+cmake ..
+make -j8
+make install
+```
+
+## 运行例程
+
+```bash
+cd ./build/install
+./rknpu_test
+```
+
+## 运行结果展示
+运行后将在install文件夹下生成human_pp_humansegv2_lite_npu_result.jpg文件,如下图:
+
+
+## 注意事项
+RKNPU上对模型的输入要求是使用NHWC格式,且图片归一化操作会在转RKNN模型时,内嵌到模型中,因此我们在使用FastDeploy部署时,
+需要先调用DisableNormalizePermute(C++)或`disable_normalize_permute(Python),在预处理阶段禁用归一化以及数据格式的转换。
+
+- [模型介绍](../../)
+- [Python部署](../python)
+- [转换PPSeg RKNN模型文档](../README.md)
\ No newline at end of file
diff --git a/examples/vision/segmentation/paddleseg/rknpu2/cpp/infer.cc b/examples/vision/segmentation/paddleseg/rknpu2/cpp/infer.cc
new file mode 100644
index 000000000..b379a5739
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/rknpu2/cpp/infer.cc
@@ -0,0 +1,84 @@
+#include
+#include
+#include "fastdeploy/vision.h"
+
+void InferHumanPPHumansegv2Lite(const std::string& device = "cpu");
+
+int main() {
+ InferHumanPPHumansegv2Lite("npu");
+ return 0;
+}
+
+fastdeploy::RuntimeOption GetOption(const std::string& device) {
+ auto option = fastdeploy::RuntimeOption();
+ if (device == "npu") {
+ option.UseRKNPU2();
+ } else {
+ option.UseCpu();
+ }
+ return option;
+}
+
+fastdeploy::ModelFormat GetFormat(const std::string& device) {
+ auto format = fastdeploy::ModelFormat::ONNX;
+ if (device == "npu") {
+ format = fastdeploy::ModelFormat::RKNN;
+ } else {
+ format = fastdeploy::ModelFormat::ONNX;
+ }
+ return format;
+}
+
+std::string GetModelPath(std::string& model_path, const std::string& device) {
+ if (device == "npu") {
+ model_path += "rknn";
+ } else {
+ model_path += "onnx";
+ }
+ return model_path;
+}
+
+void InferHumanPPHumansegv2Lite(const std::string& device) {
+ std::string model_file =
+ "./model/human_pp_humansegv2_lite_192x192_pretrained_3588/"
+ "human_pp_humansegv2_lite_192x192_pretrained_3588.";
+ std::string params_file;
+ std::string config_file =
+ "./model/human_pp_humansegv2_lite_192x192_pretrained_3588/deploy.yaml";
+
+ fastdeploy::RuntimeOption option = GetOption(device);
+ fastdeploy::ModelFormat format = GetFormat(device);
+ model_file = GetModelPath(model_file, device);
+ auto model = fastdeploy::vision::segmentation::PaddleSegModel(
+ model_file, params_file, config_file, option, format);
+
+ if (!model.Initialized()) {
+ std::cerr << "Failed to initialize." << std::endl;
+ return;
+ }
+ auto image_file =
+ "./images/portrait_heng.jpg";
+ auto im = cv::imread(image_file);
+
+ if (device == "npu") {
+ model.DisableNormalizeAndPermute();
+ }
+
+ fastdeploy::vision::SegmentationResult res;
+ clock_t start = clock();
+ if (!model.Predict(&im, &res)) {
+ std::cerr << "Failed to predict." << std::endl;
+ return;
+ }
+ clock_t end = clock();
+ auto dur = (double)(end - start);
+ printf("infer_human_pp_humansegv2_lite_npu use time:%f\n",
+ (dur / CLOCKS_PER_SEC));
+
+ std::cout << res.Str() << std::endl;
+ auto vis_im = fastdeploy::vision::VisSegmentation(im, res);
+ cv::imwrite("human_pp_humansegv2_lite_npu_result.jpg", vis_im);
+ std::cout
+ << "Visualized result saved in ./human_pp_humansegv2_lite_npu_result.jpg"
+ << std::endl;
+}
\ No newline at end of file
diff --git a/examples/vision/segmentation/paddleseg/rknpu2/python/README.md b/examples/vision/segmentation/paddleseg/rknpu2/python/README.md
new file mode 100644
index 000000000..6e8eaf199
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/rknpu2/python/README.md
@@ -0,0 +1,44 @@
+# PaddleSeg Python部署示例
+
+在部署前,需确认以下两个步骤
+
+- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../../../docs/cn/build_and_install/rknpu2.md)
+
+【注意】如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../../matting)
+
+本目录下提供`infer.py`快速完成PPHumanseg在RKNPU上部署的示例。执行如下脚本即可完成
+
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/examples/vision/segmentation/paddleseg/python
+
+# 下载模型
+wget https://bj.bcebos.com/fastdeploy/models/rknn2/human_pp_humansegv2_lite_192x192_pretrained_3588.tgz
+tar xvf human_pp_humansegv2_lite_192x192_pretrained_3588.tgz
+
+# 下载图片
+wget https://paddleseg.bj.bcebos.com/dygraph/pp_humanseg_v2/images.zip
+unzip images.zip
+
+# 推理
+python3 infer.py --model_file ./human_pp_humansegv2_lite_192x192_pretrained_3588/human_pp_humansegv2_lite_192x192_pretrained_3588.rknn \
+ --config_file ./human_pp_humansegv2_lite_192x192_pretrained_3588/deploy.yaml \
+ --image images/portrait_heng.jpg
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+
+## 注意事项
+RKNPU上对模型的输入要求是使用NHWC格式,且图片归一化操作会在转RKNN模型时,内嵌到模型中,因此我们在使用FastDeploy部署时,
+需要先调用DisableNormalizePermute(C++)或`disable_normalize_permute(Python),在预处理阶段禁用归一化以及数据格式的转换。
+## 其它文档
+
+- [PaddleSeg 模型介绍](..)
+- [PaddleSeg C++部署](../cpp)
+- [模型预测结果说明](../../../../../../docs/api/vision_results/)
+- [转换PPSeg RKNN模型文档](../README.md)
diff --git a/examples/vision/segmentation/paddleseg/rknpu2/python/infer.py b/examples/vision/segmentation/paddleseg/rknpu2/python/infer.py
new file mode 100644
index 000000000..2b6034a33
--- /dev/null
+++ b/examples/vision/segmentation/paddleseg/rknpu2/python/infer.py
@@ -0,0 +1,44 @@
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+ import argparse
+ import ast
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--model_file", required=True, help="Path of PaddleSeg model.")
+ parser.add_argument(
+ "--config_file", required=True, help="Path of PaddleSeg config.")
+ parser.add_argument(
+ "--image", type=str, required=True, help="Path of test image file.")
+ return parser.parse_args()
+
+
+def build_option(args):
+ option = fd.RuntimeOption()
+ option.use_rknpu2()
+ return option
+
+
+args = parse_arguments()
+
+# 配置runtime,加载模型
+runtime_option = build_option(args)
+model_file = args.model_file
+params_file = ""
+config_file = args.config_file
+model = fd.vision.segmentation.PaddleSegModel(
+ model_file, params_file, config_file, runtime_option=runtime_option,model_format=fd.ModelFormat.RKNN)
+
+model.disable_normalize_and_permute()
+
+# 预测图片分割结果
+im = cv2.imread(args.image)
+result = model.predict(im.copy())
+print(result)
+
+# 可视化结果
+vis_im = fd.vision.vis_segmentation(im, result, weight=0.5)
+cv2.imwrite("vis_img.png", vis_im)
diff --git a/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.cc b/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.cc
new file mode 100644
index 000000000..1df1efcc4
--- /dev/null
+++ b/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.cc
@@ -0,0 +1,425 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h"
+
+namespace fastdeploy {
+RKNPU2Backend::~RKNPU2Backend() {
+ if(input_attrs != nullptr){
+ free(input_attrs);
+ }
+ if(output_attrs != nullptr){
+ free(output_attrs);
+ }
+}
+/***************************************************************
+ * @name GetSDKAndDeviceVersion
+ * @brief get RKNN sdk and device version
+ * @param None
+ * @return bool
+ * @note None
+ ***************************************************************/
+bool RKNPU2Backend::GetSDKAndDeviceVersion() {
+ int ret;
+ // get sdk and device version
+ ret = rknn_query(ctx, RKNN_QUERY_SDK_VERSION, &sdk_ver, sizeof(sdk_ver));
+ if (ret != RKNN_SUCC) {
+ printf("rknn_query fail! ret=%d\n", ret);
+ return false;
+ }
+ FDINFO << "rknn_api/rknnrt version: " << sdk_ver.api_version
+ << ", driver version: " << sdk_ver.drv_version << std::endl;
+ return true;
+}
+
+/***************************************************************
+ * @name BuildOption
+ * @brief save option
+ * @param RKNPU2BackendOption
+ * @note None
+ ***************************************************************/
+void RKNPU2Backend::BuildOption(const RKNPU2BackendOption& option) {
+ this->option_ = option;
+ // save cpu_name
+ this->option_.cpu_name = option.cpu_name;
+
+ // save context
+ this->option_.core_mask = option.core_mask;
+}
+
+/***************************************************************
+ * @name InitFromRKNN
+ * @brief Initialize RKNN model
+ * @param model_file: Binary data for the RKNN model or the path of RKNN model.
+ * params_file: None
+ * option: config
+ * @return bool
+ * @note None
+ ***************************************************************/
+bool RKNPU2Backend::InitFromRKNN(const std::string& model_file,
+ const RKNPU2BackendOption& option) {
+ // LoadModel
+ if (!this->LoadModel((char*)model_file.data())) {
+ FDERROR << "load model failed" << std::endl;
+ return false;
+ }
+
+ // GetSDKAndDeviceVersion
+ if (!this->GetSDKAndDeviceVersion()) {
+ FDERROR << "get SDK and device version failed" << std::endl;
+ return false;
+ }
+
+ // BuildOption
+ this->BuildOption(option);
+
+ // SetCoreMask if RK3588
+ if (this->option_.cpu_name == rknpu2::CpuName::RK3588) {
+ if (!this->SetCoreMask(option_.core_mask)) {
+ FDERROR << "set core mask failed" << std::endl;
+ return false;
+ }
+ }
+
+ // GetModelInputOutputInfos
+ if (!this->GetModelInputOutputInfos()) {
+ FDERROR << "get model input output infos failed" << std::endl;
+ return false;
+ }
+
+ return true;
+}
+
+/***************************************************************
+ * @name SetCoreMask
+ * @brief set NPU core for model
+ * @param core_mask: The specification of NPU core setting.
+ * @return bool
+ * @note Only support RK3588
+ ***************************************************************/
+bool RKNPU2Backend::SetCoreMask(rknpu2::CoreMask& core_mask) const {
+ int ret = rknn_set_core_mask(ctx, static_cast(core_mask));
+ if (ret != RKNN_SUCC) {
+ FDERROR << "rknn_set_core_mask fail! ret=" << ret << std::endl;
+ return false;
+ }
+ return true;
+}
+
+/***************************************************************
+ * @name LoadModel
+ * @brief read rknn model
+ * @param model: Binary data for the RKNN model or the path of RKNN model.
+ * @return bool
+ * @note None
+ ***************************************************************/
+bool RKNPU2Backend::LoadModel(void* model) {
+ int ret = RKNN_SUCC;
+ ret = rknn_init(&ctx, model, 0, 0, nullptr);
+ if (ret != RKNN_SUCC) {
+ FDERROR << "rknn_init fail! ret=" << ret << std::endl;
+ return false;
+ }
+ return true;
+}
+
+/***************************************************************
+ * @name GetModelInputOutputInfos
+ * @brief Get the detailed input and output infos of Model
+ * @param None
+ * @return bool
+ * @note None
+ ***************************************************************/
+bool RKNPU2Backend::GetModelInputOutputInfos() {
+ int ret = RKNN_SUCC;
+
+ // Get the number of model inputs and outputs
+ ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
+ if (ret != RKNN_SUCC) {
+ return false;
+ }
+
+ // Get detailed input parameters
+ input_attrs = (rknn_tensor_attr*)malloc(sizeof(rknn_tensor_attr) * io_num.n_input);
+ memset(input_attrs, 0, io_num.n_input * sizeof(rknn_tensor_attr));
+ inputs_desc_.resize(io_num.n_input);
+ for (uint32_t i = 0; i < io_num.n_input; i++) {
+ input_attrs[i].index = i;
+ // query info
+ ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]),
+ sizeof(rknn_tensor_attr));
+ if (ret != RKNN_SUCC) {
+ printf("rknn_init error! ret=%d\n", ret);
+ return false;
+ }
+ std::string temp_name = input_attrs[i].name;
+ std::vector temp_shape{};
+ temp_shape.resize(input_attrs[i].n_dims);
+ for (int j = 0; j < input_attrs[i].n_dims; j++) {
+ temp_shape[j] = (int)input_attrs[i].dims[j];
+ }
+
+ FDDataType temp_dtype =
+ fastdeploy::RKNPU2Backend::RknnTensorTypeToFDDataType(
+ input_attrs[i].type);
+ TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype};
+ inputs_desc_[i] = temp_input_info;
+ }
+
+ // Get detailed output parameters
+ output_attrs =
+ (rknn_tensor_attr*)malloc(sizeof(rknn_tensor_attr) * io_num.n_output);
+ memset(output_attrs, 0, io_num.n_output * sizeof(rknn_tensor_attr));
+ outputs_desc_.resize(io_num.n_output);
+ for (uint32_t i = 0; i < io_num.n_output; i++) {
+ output_attrs[i].index = i;
+ // query info
+ ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]),
+ sizeof(rknn_tensor_attr));
+ if (ret != RKNN_SUCC) {
+ FDERROR << "rknn_query fail! ret = " << ret << std::endl;
+ return false;
+ }
+ std::string temp_name = output_attrs[i].name;
+ std::vector temp_shape{};
+ temp_shape.resize(output_attrs[i].n_dims);
+ for (int j = 0; j < output_attrs[i].n_dims; j++) {
+ temp_shape[j] = (int)output_attrs[i].dims[j];
+ }
+ FDDataType temp_dtype =
+ fastdeploy::RKNPU2Backend::RknnTensorTypeToFDDataType(
+ output_attrs[i].type);
+ TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype};
+ outputs_desc_[i] = temp_input_info;
+ }
+ return true;
+}
+
+/***************************************************************
+ * @name DumpTensorAttr
+ * @brief Get the model's detailed inputs and outputs
+ * @param rknn_tensor_attr
+ * @return None
+ * @note None
+ ***************************************************************/
+void RKNPU2Backend::DumpTensorAttr(rknn_tensor_attr& attr) {
+ printf("index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], "
+ "n_elems=%d, size=%d, fmt=%s, type=%s, "
+ "qnt_type=%s, zp=%d, scale=%f\n",
+ attr.index, attr.name, attr.n_dims, attr.dims[0], attr.dims[1],
+ attr.dims[2], attr.dims[3], attr.n_elems, attr.size,
+ get_format_string(attr.fmt), get_type_string(attr.type),
+ get_qnt_type_string(attr.qnt_type), attr.zp, attr.scale);
+}
+
+TensorInfo RKNPU2Backend::GetInputInfo(int index) {
+ FDASSERT(index < NumInputs(),
+ "The index: %d should less than the number of inputs: %d.", index,
+ NumInputs())
+ return inputs_desc_[index];
+}
+
+std::vector RKNPU2Backend::GetInputInfos() { return inputs_desc_; }
+
+TensorInfo RKNPU2Backend::GetOutputInfo(int index) {
+ FDASSERT(index < NumOutputs(),
+ "The index: %d should less than the number of outputs %d.", index,
+ NumOutputs())
+ return outputs_desc_[index];
+}
+
+std::vector RKNPU2Backend::GetOutputInfos() {
+ return outputs_desc_;
+}
+
+bool RKNPU2Backend::Infer(std::vector& inputs,
+ std::vector* outputs) {
+ int ret = RKNN_SUCC;
+ // Judge whether the input and output size are the same
+ if (inputs.size() != inputs_desc_.size()) {
+ FDERROR << "[RKNPU2Backend] Size of the inputs(" << inputs.size()
+ << ") should keep same with the inputs of this model("
+ << inputs_desc_.size() << ")." << std::endl;
+ return false;
+ }
+
+ // the input size only can be one
+ if (inputs.size() > 1) {
+ FDERROR << "[RKNPU2Backend] Size of the inputs only support 1."
+ << std::endl;
+ return false;
+ }
+
+ // Judge whether the input and output types are the same
+ rknn_tensor_type input_type =
+ fastdeploy::RKNPU2Backend::FDDataTypeToRknnTensorType(inputs[0].dtype);
+ if (input_type != input_attrs[0].type) {
+ FDWARNING << "The input tensor type != model's inputs type."
+ << "The input_type need " << get_type_string(input_attrs[0].type)
+ << ",but inputs[0].type is " << get_type_string(input_type)
+ << std::endl;
+ }
+
+ rknn_tensor_format input_layout =
+ RKNN_TENSOR_NHWC; // RK3588 only support NHWC
+ input_attrs[0].type = input_type;
+ input_attrs[0].fmt = input_layout;
+ input_attrs[0].size = inputs[0].Nbytes();
+ input_attrs[0].size_with_stride = inputs[0].Nbytes();
+ input_attrs[0].pass_through = 0;
+
+ // create input tensor memory
+ rknn_tensor_mem* input_mems[1];
+ input_mems[0] = rknn_create_mem(ctx, inputs[0].Nbytes());
+ if (input_mems[0] == nullptr) {
+ FDERROR << "rknn_create_mem input_mems error." << std::endl;
+ return false;
+ }
+
+ // Copy input data to input tensor memory
+ uint32_t width = input_attrs[0].dims[2];
+ uint32_t stride = input_attrs[0].w_stride;
+ if (width == stride) {
+ if (inputs[0].Data() == nullptr) {
+ FDERROR << "inputs[0].Data is NULL." << std::endl;
+ return false;
+ }
+ memcpy(input_mems[0]->virt_addr, inputs[0].Data(), inputs[0].Nbytes());
+ } else {
+ FDERROR << "[RKNPU2Backend] only support width == stride." << std::endl;
+ return false;
+ }
+
+ // Create output tensor memory
+ rknn_tensor_mem* output_mems[io_num.n_output];
+ for (uint32_t i = 0; i < io_num.n_output; ++i) {
+ // Most post-processing does not support the fp16 format.
+ // The unified output here is float32
+ uint32_t output_size = output_attrs[i].n_elems * sizeof(float);
+ output_mems[i] = rknn_create_mem(ctx, output_size);
+ }
+
+ // Set input tensor memory
+ ret = rknn_set_io_mem(ctx, input_mems[0], &input_attrs[0]);
+ if (ret != RKNN_SUCC) {
+ FDERROR << "input tensor memory rknn_set_io_mem fail! ret=" << ret
+ << std::endl;
+ return false;
+ }
+
+ // Set output tensor memory
+ for (uint32_t i = 0; i < io_num.n_output; ++i) {
+ // default output type is depend on model, this requires float32 to compute top5
+ output_attrs[i].type = RKNN_TENSOR_FLOAT32;
+ ret = rknn_set_io_mem(ctx, output_mems[i], &output_attrs[i]);
+ // set output memory and attribute
+ if (ret != RKNN_SUCC) {
+ FDERROR << "output tensor memory rknn_set_io_mem fail! ret=" << ret
+ << std::endl;
+ return false;
+ }
+ }
+
+ // run rknn
+ ret = rknn_run(ctx, nullptr);
+ if (ret != RKNN_SUCC) {
+ FDERROR << "rknn run error! ret=" << ret << std::endl;
+ return false;
+ }
+ rknn_destroy_mem(ctx, input_mems[0]);
+
+ // get result
+ outputs->resize(outputs_desc_.size());
+ std::vector temp_shape(4);
+ for (size_t i = 0; i < outputs_desc_.size(); ++i) {
+ temp_shape.resize(outputs_desc_[i].shape.size());
+ for (int j = 0; j < outputs_desc_[i].shape.size(); ++j) {
+ temp_shape[j] = outputs_desc_[i].shape[j];
+ }
+ (*outputs)[i].Resize(temp_shape, outputs_desc_[i].dtype,
+ outputs_desc_[i].name);
+ memcpy((*outputs)[i].MutableData(), (float*)output_mems[i]->virt_addr, (*outputs)[i].Nbytes());
+ rknn_destroy_mem(ctx, output_mems[i]);
+ }
+
+ return true;
+}
+
+/***************************************************************
+ * @name RknnTensorTypeToFDDataType
+ * @brief Change RknnTensorType To FDDataType
+ * @param rknn_tensor_type
+ * @return None
+ * @note Most post-processing does not support the fp16 format.
+ * Therefore, if the input is FP16, the output will be FP32.
+ ***************************************************************/
+FDDataType RKNPU2Backend::RknnTensorTypeToFDDataType(rknn_tensor_type type) {
+ if (type == rknn_tensor_type::RKNN_TENSOR_FLOAT16) {
+ return FDDataType::FP32;
+ }
+ if (type == rknn_tensor_type::RKNN_TENSOR_FLOAT32) {
+ return FDDataType::FP32;
+ }
+ if (type == rknn_tensor_type::RKNN_TENSOR_INT8) {
+ return FDDataType::INT8;
+ }
+ if (type == rknn_tensor_type::RKNN_TENSOR_INT16) {
+ return FDDataType::INT16;
+ }
+ if (type == rknn_tensor_type::RKNN_TENSOR_INT32) {
+ return FDDataType::INT32;
+ }
+ if (type == rknn_tensor_type::RKNN_TENSOR_UINT8) {
+ return FDDataType::UINT8;
+ }
+ if (type == rknn_tensor_type::RKNN_TENSOR_BOOL) {
+ return FDDataType::BOOL;
+ }
+ FDERROR << "FDDataType don't support this type" << std::endl;
+ return FDDataType::UNKNOWN1;
+}
+
+/***************************************************************
+ * @name FDDataTypeToRknnTensorType
+ * @brief Change FDDataType To RknnTensorType
+ * @param FDDataType
+ * @return None
+ * @note None
+ ***************************************************************/
+rknn_tensor_type
+RKNPU2Backend::FDDataTypeToRknnTensorType(fastdeploy::FDDataType type) {
+ if (type == FDDataType::FP16) {
+ return rknn_tensor_type::RKNN_TENSOR_FLOAT16;
+ }
+ if (type == FDDataType::FP32) {
+ return rknn_tensor_type::RKNN_TENSOR_FLOAT32;
+ }
+ if (type == FDDataType::INT8) {
+ return rknn_tensor_type::RKNN_TENSOR_INT8;
+ }
+ if (type == FDDataType::INT16) {
+ return rknn_tensor_type::RKNN_TENSOR_INT16;
+ }
+ if (type == FDDataType::INT32) {
+ return rknn_tensor_type::RKNN_TENSOR_INT32;
+ }
+ if (type == FDDataType::UINT8) {
+ return rknn_tensor_type::RKNN_TENSOR_UINT8;
+ }
+ if (type == FDDataType::BOOL) {
+ return rknn_tensor_type::RKNN_TENSOR_BOOL;
+ }
+ FDERROR << "rknn_tensor_type don't support this type" << std::endl;
+ return RKNN_TENSOR_TYPE_MAX;
+}
+} // namespace fastdeploy
\ No newline at end of file
diff --git a/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h b/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h
new file mode 100644
index 000000000..68467294d
--- /dev/null
+++ b/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h
@@ -0,0 +1,96 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include "fastdeploy/backends/backend.h"
+#include "fastdeploy/core/fd_tensor.h"
+#include "rknn_api.h" // NOLINT
+#include "rknpu2_config.h"
+#include // for memset
+#include
+#include
+#include
+#include
+
+namespace fastdeploy {
+struct RKNPU2BackendOption {
+ rknpu2::CpuName cpu_name = rknpu2::CpuName::RK3588;
+
+ //The specification of NPU core setting.It has the following choices :
+ // RKNN_NPU_CORE_AUTO : Referring to automatic mode, meaning that it will
+ // select the idle core inside the NPU.
+ // RKNN_NPU_CORE_0 : Running on the NPU0 core
+ // RKNN_NPU_CORE_1: Runing on the NPU1 core
+ // RKNN_NPU_CORE_2: Runing on the NPU2 core
+ // RKNN_NPU_CORE_0_1: Running on both NPU0 and NPU1 core simultaneously.
+ // RKNN_NPU_CORE_0_1_2: Running on both NPU0, NPU1 and NPU2 simultaneously.
+ rknpu2::CoreMask core_mask = rknpu2::CoreMask::RKNN_NPU_CORE_AUTO;
+};
+
+class RKNPU2Backend : public BaseBackend {
+ public:
+ RKNPU2Backend() = default;
+
+ virtual ~RKNPU2Backend();
+
+ // RKNN API
+ bool LoadModel(void* model);
+
+ bool GetSDKAndDeviceVersion();
+
+ bool SetCoreMask(rknpu2::CoreMask& core_mask) const;
+
+ bool GetModelInputOutputInfos();
+
+ // BaseBackend API
+ void BuildOption(const RKNPU2BackendOption& option);
+
+ bool InitFromRKNN(const std::string& model_file,
+ const RKNPU2BackendOption& option = RKNPU2BackendOption());
+
+ int NumInputs() const override {
+ return static_cast(inputs_desc_.size());
+ }
+
+ int NumOutputs() const override {
+ return static_cast(outputs_desc_.size());
+ }
+
+ TensorInfo GetInputInfo(int index) override;
+ TensorInfo GetOutputInfo(int index) override;
+ std::vector GetInputInfos() override;
+ std::vector GetOutputInfos() override;
+ bool Infer(std::vector& inputs,
+ std::vector* outputs) override;
+
+ private:
+ // The object of rknn context.
+ rknn_context ctx{};
+ // The structure rknn_sdk_version is used to indicate the version information of the RKNN SDK.
+ rknn_sdk_version sdk_ver{};
+ // The structure rknn_input_output_num represents the number of input and output Tensor
+ rknn_input_output_num io_num{};
+ std::vector inputs_desc_;
+ std::vector outputs_desc_;
+
+ rknn_tensor_attr* input_attrs = nullptr;
+ rknn_tensor_attr* output_attrs = nullptr;
+
+ RKNPU2BackendOption option_;
+
+ static void DumpTensorAttr(rknn_tensor_attr& attr);
+ static FDDataType RknnTensorTypeToFDDataType(rknn_tensor_type type);
+ static rknn_tensor_type FDDataTypeToRknnTensorType(FDDataType type);
+};
+} // namespace fastdeploy
diff --git a/fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h b/fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h
new file mode 100644
index 000000000..9b38b5a8b
--- /dev/null
+++ b/fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h
@@ -0,0 +1,40 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef RKNPU2_CONFIG_H
+#define RKNPU2_CONFIG_H
+
+namespace fastdeploy {
+namespace rknpu2 {
+typedef enum _rknpu2_cpu_name {
+ RK356X = 0, /* run on RK356X. */
+ RK3588 = 1, /* default,run on RK3588. */
+ UNDEFINED,
+} CpuName;
+
+/*! RKNPU2 core mask for mobile device. */
+typedef enum _rknpu2_core_mask {
+ RKNN_NPU_CORE_AUTO = 0, ///< default, run on NPU core randomly.
+ RKNN_NPU_CORE_0 = 1, ///< run on NPU core 0.
+ RKNN_NPU_CORE_1 = 2, ///< run on NPU core 1.
+ RKNN_NPU_CORE_2 = 4, ///< run on NPU core 2.
+ RKNN_NPU_CORE_0_1 =
+ RKNN_NPU_CORE_0 | RKNN_NPU_CORE_1, ///< run on NPU core 1 and core 2.
+ RKNN_NPU_CORE_0_1_2 =
+ RKNN_NPU_CORE_0_1 | RKNN_NPU_CORE_2, ///< run on NPU core 1 and core 2.
+ RKNN_NPU_CORE_UNDEFINED,
+} CoreMask;
+} // namespace RKNN
+} // namespace fastdeploy
+#endif //RKNPU2_CONFIG_H
diff --git a/fastdeploy/core/fd_type.cc b/fastdeploy/core/fd_type.cc
index afebfd973..45ca90a1b 100644
--- a/fastdeploy/core/fd_type.cc
+++ b/fastdeploy/core/fd_type.cc
@@ -53,6 +53,9 @@ std::string Str(const Device& d) {
case Device::GPU:
out = "Device::GPU";
break;
+ case Device::RKNPU:
+ out = "Device::RKNPU";
+ break;
case Device::IPU:
out = "Device::IPU";
break;
@@ -70,6 +73,9 @@ std::ostream& operator<<(std::ostream& out,const Device& d){
case Device::GPU:
out << "Device::GPU";
break;
+ case Device::RKNPU:
+ out << "Device::RKNPU";
+ break;
default:
out << "Device::UNKOWN";
}
diff --git a/fastdeploy/core/fd_type.h b/fastdeploy/core/fd_type.h
index e98d0b9ec..5236601b0 100644
--- a/fastdeploy/core/fd_type.h
+++ b/fastdeploy/core/fd_type.h
@@ -22,7 +22,7 @@
namespace fastdeploy {
-enum FASTDEPLOY_DECL Device { CPU, GPU, IPU };
+enum FASTDEPLOY_DECL Device { CPU, GPU, RKNPU, IPU};
FASTDEPLOY_DECL std::string Str(const Device& d);
diff --git a/fastdeploy/fastdeploy_model.cc b/fastdeploy/fastdeploy_model.cc
index b95c5bde8..c3f0641d2 100644
--- a/fastdeploy/fastdeploy_model.cc
+++ b/fastdeploy/fastdeploy_model.cc
@@ -41,6 +41,7 @@ bool FastDeployModel::InitRuntime() {
#ifndef WITH_IPU
use_ipu = false;
#endif
+ bool use_rknpu = (runtime_option.device == Device::RKNPU);
// whether the model is supported by the setted backend
bool is_supported = false;
@@ -51,6 +52,13 @@ bool FastDeployModel::InitRuntime() {
break;
}
}
+ } else if (use_rknpu) {
+ for (auto& item : valid_rknpu_backends) {
+ if (item == runtime_option.backend) {
+ is_supported = true;
+ break;
+ }
+ }
} else if(use_ipu) {
for (auto& item : valid_ipu_backends) {
if (item == runtime_option.backend) {
@@ -101,6 +109,8 @@ bool FastDeployModel::InitRuntime() {
<< std::endl;
return false;
#endif
+ } else if (runtime_option.device == Device::RKNPU) {
+ return CreateRKNPUBackend();
} else if (runtime_option.device == Device::IPU) {
#ifdef WITH_IPU
return CreateIpuBackend();
@@ -110,7 +120,7 @@ bool FastDeployModel::InitRuntime() {
return false;
#endif
}
- FDERROR << "Only support CPU/GPU now." << std::endl;
+ FDERROR << "Only support CPU/GPU/NPU now." << std::endl;
return false;
}
@@ -138,7 +148,7 @@ bool FastDeployModel::CreateCpuBackend() {
}
bool FastDeployModel::CreateGpuBackend() {
- if (valid_gpu_backends.size() == 0) {
+ if (valid_gpu_backends.empty()) {
FDERROR << "There's no valid gpu backends for model: " << ModelName()
<< std::endl;
return false;
@@ -161,6 +171,30 @@ bool FastDeployModel::CreateGpuBackend() {
return false;
}
+bool FastDeployModel::CreateRKNPUBackend() {
+ if (valid_rknpu_backends.empty()) {
+ FDERROR << "There's no valid npu backends for model: " << ModelName()
+ << std::endl;
+ return false;
+ }
+
+ for (size_t i = 0; i < valid_rknpu_backends.size(); ++i) {
+ if (!IsBackendAvailable(valid_rknpu_backends[i])) {
+ continue;
+ }
+ runtime_option.backend = valid_rknpu_backends[i];
+ runtime_ = std::unique_ptr(new Runtime());
+ if (!runtime_->Init(runtime_option)) {
+ return false;
+ }
+ runtime_initialized_ = true;
+ return true;
+ }
+ FDERROR << "Cannot find an available npu backend to load this model."
+ << std::endl;
+ return false;
+}
+
bool FastDeployModel::CreateIpuBackend() {
if (valid_ipu_backends.size() == 0) {
FDERROR << "There's no valid ipu backends for model: " << ModelName()
diff --git a/fastdeploy/fastdeploy_model.h b/fastdeploy/fastdeploy_model.h
index 3f5ff4c35..6a2c13032 100644
--- a/fastdeploy/fastdeploy_model.h
+++ b/fastdeploy/fastdeploy_model.h
@@ -38,6 +38,12 @@ class FASTDEPLOY_DECL FastDeployModel {
/** Model's valid ipu backends. This member defined all the ipu backends have successfully tested for the model
*/
std::vector valid_ipu_backends = {Backend::PDINFER};
+
+
+ /** Model's valid hardware backends. This member defined all the gpu backends have successfully tested for the model
+ */
+ std::vector valid_rknpu_backends = {};
+
/// Get number of inputs for this model
virtual int NumInputsOfRuntime() { return runtime_->NumInputs(); }
/// Get number of outputs for this model
@@ -99,6 +105,8 @@ class FASTDEPLOY_DECL FastDeployModel {
virtual bool CreateCpuBackend();
virtual bool CreateGpuBackend();
virtual bool CreateIpuBackend();
+ virtual bool CreateRKNPUBackend();
+
bool initialized = false;
std::vector valid_external_backends;
diff --git a/fastdeploy/pybind/main.cc.in b/fastdeploy/pybind/main.cc.in
index 222d75fd9..74fe90433 100644
--- a/fastdeploy/pybind/main.cc.in
+++ b/fastdeploy/pybind/main.cc.in
@@ -164,6 +164,9 @@ PYBIND11_MODULE(@PY_LIBRARY_NAME@, m) {
m.def_submodule("text", "Text module of FastDeploy.");
BindText(text_module);
#endif
+ auto rknpu2_module =
+ m.def_submodule("rknpu2", "RKNPU2 config module of FastDeploy.");
+ BindRKNPU2Config(rknpu2_module);
}
} // namespace fastdeploy
diff --git a/fastdeploy/pybind/main.h b/fastdeploy/pybind/main.h
index 4cd21b7ea..4499b3e0f 100644
--- a/fastdeploy/pybind/main.h
+++ b/fastdeploy/pybind/main.h
@@ -39,6 +39,7 @@ void BindBackend(pybind11::module&);
void BindVision(pybind11::module&);
void BindText(pybind11::module& m);
void BindPipeline(pybind11::module& m);
+void BindRKNPU2Config(pybind11::module&);
pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType& fd_dtype);
diff --git a/fastdeploy/pybind/rknpu2_config_pybind.cc b/fastdeploy/pybind/rknpu2_config_pybind.cc
new file mode 100644
index 000000000..4880b2db6
--- /dev/null
+++ b/fastdeploy/pybind/rknpu2_config_pybind.cc
@@ -0,0 +1,33 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h"
+#include "fastdeploy/pybind/main.h"
+namespace fastdeploy {
+void BindRKNPU2Config(pybind11::module& m) {
+ pybind11::enum_(m, "CpuName", pybind11::arithmetic(),
+ "CpuName for inference.")
+ .value("RK356X", fastdeploy::rknpu2::CpuName::RK356X)
+ .value("RK3588", fastdeploy::rknpu2::CpuName::RK3588)
+ .value("UNDEFINED", fastdeploy::rknpu2::CpuName::UNDEFINED);
+ pybind11::enum_(m, "CoreMask", pybind11::arithmetic(),
+ "CoreMask for inference.")
+ .value("RKNN_NPU_CORE_AUTO", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO)
+ .value("RKNN_NPU_CORE_0", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0)
+ .value("RKNN_NPU_CORE_1", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_1)
+ .value("RKNN_NPU_CORE_2", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_2)
+ .value("RKNN_NPU_CORE_0_1", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0_1)
+ .value("RKNN_NPU_CORE_0_1_2", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0_1_2)
+ .value("RKNN_NPU_CORE_UNDEFINED", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_UNDEFINED);
+}
+} // namespace fastdeploy
\ No newline at end of file
diff --git a/fastdeploy/pybind/runtime.cc b/fastdeploy/pybind/runtime.cc
index 41afbcdac..a694be970 100755
--- a/fastdeploy/pybind/runtime.cc
+++ b/fastdeploy/pybind/runtime.cc
@@ -22,6 +22,7 @@ void BindRuntime(pybind11::module& m) {
.def("set_model_path", &RuntimeOption::SetModelPath)
.def("use_gpu", &RuntimeOption::UseGpu)
.def("use_cpu", &RuntimeOption::UseCpu)
+ .def("use_rknpu2", &RuntimeOption::UseRKNPU2)
.def("set_external_stream", &RuntimeOption::SetExternalStream)
.def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum)
.def("use_paddle_backend", &RuntimeOption::UsePaddleBackend)
@@ -174,17 +175,20 @@ void BindRuntime(pybind11::module& m) {
.value("TRT", Backend::TRT)
.value("POROS", Backend::POROS)
.value("PDINFER", Backend::PDINFER)
+ .value("RKNPU2", Backend::RKNPU2)
.value("LITE", Backend::LITE);
pybind11::enum_(m, "ModelFormat", pybind11::arithmetic(),
"ModelFormat for inference.")
.value("PADDLE", ModelFormat::PADDLE)
.value("TORCHSCRIPT", ModelFormat::TORCHSCRIPT)
+ .value("RKNN", ModelFormat::RKNN)
.value("ONNX", ModelFormat::ONNX);
pybind11::enum_(m, "Device", pybind11::arithmetic(),
"Device for inference.")
.value("CPU", Device::CPU)
.value("GPU", Device::GPU)
- .value("IPU", Device::IPU);
+ .value("IPU", Device::IPU)
+ .value("RKNPU", Device::RKNPU);
pybind11::enum_(m, "FDDataType", pybind11::arithmetic(),
"Data type of FastDeploy.")
diff --git a/fastdeploy/runtime.cc b/fastdeploy/runtime.cc
index 31c3bd6e9..86c533f6e 100755
--- a/fastdeploy/runtime.cc
+++ b/fastdeploy/runtime.cc
@@ -41,6 +41,10 @@
#include "fastdeploy/backends/lite/lite_backend.h"
#endif
+#ifdef ENABLE_RKNPU2_BACKEND
+#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h"
+#endif
+
namespace fastdeploy {
std::vector GetAvailableBackends() {
@@ -62,6 +66,9 @@ std::vector GetAvailableBackends() {
#endif
#ifdef ENABLE_LITE_BACKEND
backends.push_back(Backend::LITE);
+#endif
+#ifdef ENABLE_RKNPU2_BACKEND
+ backends.push_back(Backend::RKNPU2);
#endif
return backends;
}
@@ -85,7 +92,9 @@ std::string Str(const Backend& b) {
return "Backend::PDINFER";
} else if (b == Backend::POROS) {
return "Backend::POROS";
- } else if (b == Backend::OPENVINO) {
+ } else if (b == Backend::RKNPU2) {
+ return "Backend::RKNPU2";
+ }else if (b == Backend::OPENVINO) {
return "Backend::OPENVINO";
} else if (b == Backend::LITE) {
return "Backend::LITE";
@@ -98,6 +107,8 @@ std::string Str(const ModelFormat& f) {
return "ModelFormat::PADDLE";
} else if (f == ModelFormat::ONNX) {
return "ModelFormat::ONNX";
+ }else if (f == ModelFormat::RKNN) {
+ return "ModelFormat::RKNN";
} else if (f == ModelFormat::TORCHSCRIPT) {
return "ModelFormat::TORCHSCRIPT";
}
@@ -113,7 +124,9 @@ std::ostream& operator<<(std::ostream& out, const Backend& backend) {
out << "Backend::PDINFER";
} else if (backend == Backend::OPENVINO) {
out << "Backend::OPENVINO";
- } else if (backend == Backend::POROS) {
+ } else if (backend == Backend::RKNPU2) {
+ out << "Backend::RKNPU2";
+ }else if (backend == Backend::POROS) {
out << "Backend::POROS";
} else if (backend == Backend::LITE) {
out << "Backend::LITE";
@@ -127,6 +140,8 @@ std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
out << "ModelFormat::PADDLE";
} else if (format == ModelFormat::ONNX) {
out << "ModelFormat::ONNX";
+ } else if (format == ModelFormat::RKNN) {
+ out << "ModelFormat::RKNN";
} else if (format == ModelFormat::TORCHSCRIPT) {
out << "ModelFormat::TORCHSCRIPT";
}
@@ -152,6 +167,14 @@ bool CheckModelFormat(const std::string& model_file,
<< model_file << std::endl;
return false;
}
+ } else if (model_format == ModelFormat::RKNN) {
+ if (model_file.size() < 5 ||
+ model_file.substr(model_file.size() - 5, 5) != ".rknn") {
+ FDERROR << "With model format of ModelFormat::RKNN, the model file "
+ "should ends with `.rknn`, but now it's "
+ << model_file << std::endl;
+ return false;
+ }
} else if (model_format == ModelFormat::TORCHSCRIPT) {
if (model_file.size() < 3 ||
model_file.substr(model_file.size() - 3, 3) != ".pt") {
@@ -162,7 +185,7 @@ bool CheckModelFormat(const std::string& model_file,
}
} else {
FDERROR << "Only support model format with frontend ModelFormat::PADDLE / "
- "ModelFormat::ONNX / ModelFormat::TORCHSCRIPT."
+ "ModelFormat::ONNX / ModelFormat::RKNN / ModelFormat::TORCHSCRIPT."
<< std::endl;
return false;
}
@@ -182,6 +205,10 @@ ModelFormat GuessModelFormat(const std::string& model_file) {
model_file.substr(model_file.size() - 3, 3) == ".pt") {
FDINFO << "Model Format: Torchscript." << std::endl;
return ModelFormat::TORCHSCRIPT;
+ } else if (model_file.size() > 5 &&
+ model_file.substr(model_file.size() - 5, 5) == ".rknn") {
+ FDINFO << "Model Format: RKNN." << std::endl;
+ return ModelFormat::RKNN;
}
FDERROR << "Cannot guess which model format you are using, please set "
@@ -223,6 +250,13 @@ void RuntimeOption::UseGpu(int gpu_id) {
void RuntimeOption::UseCpu() { device = Device::CPU; }
+void RuntimeOption::UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name,
+ fastdeploy::rknpu2::CoreMask rknpu2_core) {
+ rknpu2_cpu_name_ = rknpu2_name;
+ rknpu2_core_mask_ = rknpu2_core;
+ device = Device::RKNPU;
+}
+
void RuntimeOption::SetExternalStream(void* external_stream) {
external_stream_ = external_stream;
}
@@ -234,7 +268,8 @@ void RuntimeOption::SetCpuThreadNum(int thread_num) {
void RuntimeOption::SetOrtGraphOptLevel(int level) {
std::vector supported_level{-1, 0, 1, 2};
- auto valid_level = std::find(supported_level.begin(), supported_level.end(), level) != supported_level.end();
+ auto valid_level = std::find(supported_level.begin(), supported_level.end(),
+ level) != supported_level.end();
FDASSERT(valid_level, "The level must be -1, 0, 1, 2.");
ort_graph_opt_level = level;
}
@@ -321,18 +356,16 @@ void RuntimeOption::EnableLiteFP16() {
lite_enable_fp16 = true;
}
-void RuntimeOption::DisableLiteFP16() {
- lite_enable_fp16 = false;
+void RuntimeOption::DisableLiteFP16() {
+ lite_enable_fp16 = false;
}
-
void RuntimeOption::EnableLiteInt8() {
lite_enable_int8 = true;
}
-void RuntimeOption::DisableLiteInt8() {
- lite_enable_int8 = false;
+void RuntimeOption::DisableLiteInt8() {
+ lite_enable_int8 = false;
}
-
void RuntimeOption::SetLitePowerMode(LitePowerMode mode) {
lite_power_mode = mode;
}
@@ -406,7 +439,7 @@ bool Runtime::Compile(std::vector>& prewarm_tensors,
"ENABLE_POROS_BACKEND=ON.");
#endif
return true;
-}
+}
void RuntimeOption::EnablePaddleTrtCollectShape() {
pd_collect_shape = true;
@@ -454,6 +487,8 @@ bool Runtime::Init(const RuntimeOption& _option) {
option.backend = Backend::POROS;
} else if (IsBackendAvailable(Backend::OPENVINO)) {
option.backend = Backend::OPENVINO;
+ } else if (IsBackendAvailable(Backend::RKNPU2)) {
+ option.backend = Backend::RKNPU2;
} else {
FDERROR << "Please define backend in RuntimeOption, current it's "
"Backend::UNKNOWN."
@@ -506,6 +541,13 @@ bool Runtime::Init(const RuntimeOption& _option) {
CreateLiteBackend();
FDINFO << "Runtime initialized with Backend::LITE in " << Str(option.device)
<< "." << std::endl;
+ } else if (option.backend == Backend::RKNPU2) {
+ FDASSERT(option.device == Device::RKNPU,
+ "Backend::RKNPU2 only supports Device::RKNPU2");
+ CreateRKNPU2Backend();
+
+ FDINFO << "Runtime initialized with Backend::RKNPU2 in "
+ << Str(option.device) << "." << std::endl;
} else {
FDERROR << "Runtime only support "
"Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as "
@@ -720,4 +762,21 @@ void Runtime::CreateLiteBackend() {
#endif
}
+void Runtime::CreateRKNPU2Backend() {
+#ifdef ENABLE_RKNPU2_BACKEND
+ auto rknpu2_option = RKNPU2BackendOption();
+ rknpu2_option.cpu_name = option.rknpu2_cpu_name_;
+ rknpu2_option.core_mask = option.rknpu2_core_mask_;
+ FDASSERT(option.model_format == ModelFormat::RKNN,
+ "RKNPU2Backend only support model format of ModelFormat::RKNN");
+ backend_ = utils::make_unique();
+ auto casted_backend = dynamic_cast(backend_.get());
+ FDASSERT(casted_backend->InitFromRKNN(option.model_file, rknpu2_option),
+ "Load model from nb file failed while initializing LiteBackend.");
+#else
+ FDASSERT(false, "RKNPU2Backend is not available, please compiled with "
+ "ENABLE_RKNPU2_BACKEND=ON.");
+#endif
+}
+
} // namespace fastdeploy
diff --git a/fastdeploy/runtime.h b/fastdeploy/runtime.h
index 3d432909b..634c1f633 100755
--- a/fastdeploy/runtime.h
+++ b/fastdeploy/runtime.h
@@ -20,12 +20,13 @@
#pragma once
+#include
#include