[Backend] Add RKNPU2 backend support (#456)

* 10-29/14:05 * 新增cmake * 新增rknpu2 backend * 10-29/14:43 * Runtime fd_type新增RKNPU代码 * 10-29/15:02 * 新增ppseg RKNPU2推理代码 * 10-29/15:46 * 新增ppseg RKNPU2 cpp example代码 * 10-29/15:51 * 新增README文档 * 10-29/15:51 * 按照要求修改部分注释以及变量名称 * 10-29/15:51 * 修复重命名之后，cc文件中的部分代码还用旧函数名的bug * 10-29/22:32 * str(Device::NPU)将输出NPU而不是UNKOWN * 修改runtime文件中的注释格式 * 新增Building Summary ENABLE_RKNPU2_BACKEND输出 * pybind新增支持rknpu2 * 新增python编译选项 * 新增PPSeg Python代码 * 新增以及更新各种文档 * 10-30/14:11 * 尝试修复编译cuda时产生的错误 * 10-30/19:27 * 修改CpuName和CoreMask层级 * 修改ppseg rknn推理层级 * 图片将移动到网络进行下载 * 10-30/19:39 * 更新文档 * 10-30/19:39 * 更新文档 * 更新ppseg rknpu2 example中的函数命名方式 * 更新ppseg rknpu2 example为一个cc文件 * 修复disable_normalize_and_permute部分的逻辑错误 * 移除rknpu2初始化时的无用参数 * 10-30/19:39 * 尝试重置python代码 * 10-30/10:16 * rknpu2_config.h文件不再包含rknn_api头文件防止出现导入错误的问题 * 10-31/14:31 * 修改pybind，支持最新的rknpu2 backends * 再次支持ppseg python推理 * 移动cpuname 和 coremask的层级 * 10-31/15:35 * 尝试修复rknpu2导入错误 * 10-31/19:00 * 新增RKNPU2模型导出代码以及其对应的文档 * 更新大量文档错误 * 10-31/19:00 * 现在编译完fastdeploy仓库后无需重新设置RKNN2_TARGET_SOC * 10-31/19:26 * 修改部分错误文档 * 10-31/19:26 * 修复错误删除的部分 * 修复各种错误文档 * 修复FastDeploy.cmake在设置RKNN2_TARGET_SOC错误时，提示错误的信息 * 修复rknpu2_backend.cc中存在的中文注释 * 10-31/20:45 * 删除无用的注释 * 10-31/20:45 * 按照要求修改Device::NPU为Device::RKNPU，硬件将共用valid_hardware_backends * 删除无用注释以及debug代码 * 11-01/09:45 * 更新变量命名方式 * 11-01/10:16 * 修改部分文档，修改函数命名方式 Co-authored-by: Jason <jiangjiajun@baidu.com>
2025-10-06 00:57:33 +08:00 · 2022-11-01 11:14:05 +08:00
parent bb00e0757e
commit 4ffcfbe726
37 changed files with 1567 additions and 74 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -58,6 +58,7 @@ option(ENABLE_TRT_BACKEND "Whether to enable tensorrt backend." OFF)
 option(ENABLE_PADDLE_BACKEND "Whether to enable paddle backend." OFF)
 option(ENABLE_POROS_BACKEND "Whether to enable poros backend." OFF)
 option(ENABLE_OPENVINO_BACKEND "Whether to enable openvino backend." OFF)
 option(ENABLE_RKNPU2_BACKEND "Whether to enable RKNPU2 backend." OFF)
 option(ENABLE_LITE_BACKEND "Whether to enable paddle lite backend." OFF)
 option(ENABLE_VISION "Whether to enable vision models usage." OFF)
 option(ENABLE_TEXT "Whether to enable text models usage." OFF)
@@ -164,13 +165,14 @@ file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fas
 file(GLOB_RECURSE DEPLOY_POROS_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/poros/*.cc)
 file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cpp)
 file(GLOB_RECURSE DEPLOY_OPENVINO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/openvino/*.cc)
 file(GLOB_RECURSE DEPLOY_RKNPU2_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/rknpu/rknpu2/*.cc)
 file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/lite/*.cc)
 file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cc)
 file(GLOB_RECURSE DEPLOY_PIPELINE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pipeline/*.cc)
 file(GLOB_RECURSE DEPLOY_VISION_CUDA_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cu)
 file(GLOB_RECURSE DEPLOY_TEXT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/text/*.cc)
 file(GLOB_RECURSE DEPLOY_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*_pybind.cc)
-list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} ${DEPLOY_POROS_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_OPENVINO_SRCS} ${DEPLOY_LITE_SRCS} ${DEPLOY_VISION_SRCS} ${DEPLOY_TEXT_SRCS} ${DEPLOY_PIPELINE_SRCS})
+list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} ${DEPLOY_POROS_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_OPENVINO_SRCS} ${DEPLOY_LITE_SRCS} ${DEPLOY_VISION_SRCS} ${DEPLOY_TEXT_SRCS} ${DEPLOY_PIPELINE_SRCS} ${DEPLOY_RKNPU2_SRCS})
 set(DEPEND_LIBS "")
@@ -227,6 +229,13 @@ if(ENABLE_OPENVINO_BACKEND)
  include(${PROJECT_SOURCE_DIR}/cmake/openvino.cmake)
 endif()
 if(ENABLE_RKNPU2_BACKEND)
  add_definitions(-DENABLE_RKNPU2_BACKEND)
  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_RKNPU2_SRCS})
  include(${PROJECT_SOURCE_DIR}/cmake/rknpu2.cmake)
  list(APPEND DEPEND_LIBS ${RKNN_RT_LIB})
 endif()
 if(ENABLE_POROS_BACKEND)
  set(CMAKE_CXX_STANDARD 14)
  add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
--- a/FastDeploy.cmake.in
+++ b/FastDeploy.cmake.in
@@ -2,6 +2,7 @@ CMAKE_MINIMUM_REQUIRED(VERSION 3.8)
 set(WITH_GPU @WITH_GPU@)
 set(ENABLE_ORT_BACKEND @ENABLE_ORT_BACKEND@)
 set(ENABLE_RKNPU2_BACKEND @ENABLE_RKNPU2_BACKEND@)
 set(ENABLE_LITE_BACKEND @ENABLE_LITE_BACKEND@)
 set(ENABLE_PADDLE_BACKEND @ENABLE_PADDLE_BACKEND@)
 set(ENABLE_OPENVINO_BACKEND @ENABLE_OPENVINO_BACKEND@)
@@ -27,6 +28,7 @@ set(LIBRARY_NAME @LIBRARY_NAME@)
 set(OPENCV_DIRECTORY "@OPENCV_DIRECTORY@")
 set(ORT_DIRECTORY "@ORT_DIRECTORY@")
 set(OPENVINO_DIRECTORY "@OPENVINO_DIRECTORY@")
 set(RKNN2_TARGET_SOC "@RKNN2_TARGET_SOC@")
 set(FASTDEPLOY_LIBS "")
 set(FASTDEPLOY_INCS "")
@@ -88,6 +90,18 @@ if(ENABLE_OPENVINO_BACKEND)
  list(APPEND FASTDEPLOY_LIBS ${OPENVINO_LIBS})
 endif()
 if(ENABLE_RKNPU2_BACKEND)
  if(RKNN2_TARGET_SOC STREQUAL "RK356X")
    set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/RK356X/lib/librknn_api.so)
  elseif (RKNN2_TARGET_SOC STREQUAL "RK3588")
    set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/RK3588/lib/librknn_api.so)
  else ()
    message(FATAL_ERROR "RKNN2_TARGET_SOC is not set, ref value: RK356X or RK3588")
  endif()
  message(STATUS "The path of RKNPU2 is ${RKNPU2_LIB}.")
  list(APPEND FASTDEPLOY_LIBS ${RKNPU2_LIB})
 endif()
 if(ENABLE_LITE_BACKEND)
  set(LITE_DIR ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/${PADDLELITE_FILENAME})
  if(ANDROID)
@@ -234,6 +248,7 @@ message(STATUS "  C++ compiler version      : ${CMAKE_CXX_COMPILER_VERSION}")
 message(STATUS "  CXX flags                 : ${CMAKE_CXX_FLAGS}")
 message(STATUS "  WITH_GPU                  : ${WITH_GPU}")
 message(STATUS "  ENABLE_ORT_BACKEND        : ${ENABLE_ORT_BACKEND}")
 message(STATUS "  ENABLE_RKNPU2_BACKEND     : ${ENABLE_RKNPU2_BACKEND}")
 message(STATUS "  ENABLE_PADDLE_BACKEND     : ${ENABLE_PADDLE_BACKEND}")
 message(STATUS "  ENABLE_POROS_BACKEND      : ${ENABLE_POROS_BACKEND}")
 message(STATUS "  ENABLE_OPENVINO_BACKEND   : ${ENABLE_OPENVINO_BACKEND}")
--- a/cmake/rknpu2.cmake
+++ b/cmake/rknpu2.cmake
@@ -0,0 +1,26 @@
 # get RKNPU2_URL
 set(RKNPU2_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
 set(RKNPU2_VERSION "1.4.0")
 set(RKNPU2_FILE "rknpu2_runtime-linux-x64-${RKNPU2_VERSION}.tgz")
 set(RKNPU2_URL "${RKNPU2_URL_BASE}${RKNPU2_FILE}")
 # download_and_decompress
 download_and_decompress(${RKNPU2_URL} ${CMAKE_CURRENT_BINARY_DIR}/${RKNPU2_FILE} ${THIRD_PARTY_PATH}/install/)
 # set path
 set(RKNPU_RUNTIME_PATH ${THIRD_PARTY_PATH}/install/rknpu2_runtime)
 if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
 else ()
    message(FATAL_ERROR "[rknpu2.cmake] Only support build rknpu2 in Linux")
 endif ()
 if (EXISTS ${RKNPU_RUNTIME_PATH})
    set(RKNN_RT_LIB ${RKNPU_RUNTIME_PATH}/${RKNN2_TARGET_SOC}/lib/librknnrt.so)
    include_directories(${RKNPU_RUNTIME_PATH}/${RKNN2_TARGET_SOC}/include)
 else ()
    message(FATAL_ERROR "[rknpu2.cmake] download_and_decompress rknpu2_runtime error")
 endif ()
--- a/cmake/summary.cmake
+++ b/cmake/summary.cmake
@@ -31,6 +31,7 @@ function(fastdeploy_summary)
  message(STATUS "  FastDeploy version        : ${FASTDEPLOY_VERSION}")
  message(STATUS "  Paddle2ONNX version       : ${PADDLE2ONNX_VERSION}")
  message(STATUS "  ENABLE_ORT_BACKEND        : ${ENABLE_ORT_BACKEND}")
  message(STATUS "  ENABLE_RKNPU2_BACKEND     : ${ENABLE_RKNPU2_BACKEND}")
  message(STATUS "  ENABLE_PADDLE_BACKEND     : ${ENABLE_PADDLE_BACKEND}")
  message(STATUS "  ENABLE_POROS_BACKEND      : ${ENABLE_POROS_BACKEND}")
  message(STATUS "  ENABLE_TRT_BACKEND        : ${ENABLE_TRT_BACKEND}")
--- a/docs/cn/build_and_install/README.md
+++ b/docs/cn/build_and_install/README.md
@@ -10,15 +10,17 @@
 ## FastDeploy编译选项说明
 | 选项                      | 说明                                                                        |
-| :--- | :---- |
+|:------------------------|:--------------------------------------------------------------------------|
 | ENABLE_ORT_BACKEND      | 默认OFF, 是否编译集成ONNX Runtime后端(CPU/GPU上推荐打开)                                 |
 | ENABLE_PADDLE_BACKEND   | 默认OFF，是否编译集成Paddle Inference后端(CPU/GPU上推荐打开)                              |  
 | ENABLE_LITE_BACKEND     | 默认OFF，是否编译集成Paddle Lite后端(编译Android库时需要设置为ON)                             |
 | ENABLE_RKNPU2_BACKEND   | 默认OFF，是否编译集成RKNPU2后端(RK3588/RK3568/RK3566上推荐打开)                           |
 | ENABLE_TRT_BACKEND      | 默认OFF，是否编译集成TensorRT后端(GPU上推荐打开)                                          |
 | ENABLE_OPENVINO_BACKEND | 默认OFF，是否编译集成OpenVINO后端(CPU上推荐打开)                                          |
 | ENABLE_VISION           | 默认OFF，是否编译集成视觉模型的部署模块                                                     |
 | ENABLE_TEXT             | 默认OFF，是否编译集成文本NLP模型的部署模块                                                  |
 | WITH_GPU                | 默认OFF, 当需要在GPU上部署时，需设置为ON                                                 |
 | RKNN2_TARGET_SOC        | ENABLE_RKNPU2_BACKEND时才需要使用这个编译选项。无默认值, 可输入值为RK3588/RK356X, 必须填入，否则 将编译失败 |
 | CUDA_DIRECTORY          | 默认/usr/local/cuda, 当需要在GPU上部署时，用于指定CUDA(>=11.2)的路径                        |
 | TRT_DIRECTORY           | 当开启TensorRT后端时，必须通过此开关指定TensorRT(>=8.4)的路径                                |
 | ORT_DIRECTORY           | 当开启ONNX Runtime后端时，用于指定用户本地的ONNX Runtime库路径；如果不指定，编译过程会自动下载ONNX Runtime库  |
--- a/docs/cn/build_and_install/rknpu2.md
+++ b/docs/cn/build_and_install/rknpu2.md
@@ -0,0 +1,102 @@
 # RK2代NPU部署库编译
 ## 写在前面
 FastDeploy已经初步支持RKNPU2的部署。使用的过程中，如果出现Bug请提Issues反馈。
 ## 简介
 FastDeploy当前在RK平台上支持后端引擎如下:
 | 后端                | 平台                   | 支持模型格式 | 说明                                         |
 |:------------------|:---------------------|:-------|:-------------------------------------------|
 | ONNX&nbsp;Runtime | RK356X   <br> RK3588 | ONNX   | 编译开关`ENABLE_ORT_BACKEND`为ON或OFF控制，默认OFF    |
 | RKNPU2            | RK356X   <br> RK3588 | RKNN   | 编译开关`ENABLE_RKNPU2_BACKEND`为ON或OFF控制，默认OFF |
 ## C++ SDK编译安装
 RKNPU2仅支持linux下进行编译,以下教程均在linux环境下完成。
 ### 更新驱动和安装编译时需要的环境
 在运行代码之前，我们需要安装以下最新的RKNPU驱动，目前驱动更新至1.4.0。为了简化安装我编写了快速安装脚本，一键即可进行安装。
 **方法1: 通过脚本安装**
 ```bash
 # 下载解压rknpu2_device_install_1.4.0
 wget https://bj.bcebos.com/fastdeploy/third_libs/rknpu2_device_install_1.4.0.zip
 unzip rknpu2_device_install_1.4.0.zip
 cd rknpu2_device_install_1.4.0
 # RK3588运行以下代码
 sudo rknn_install_rk3588.sh
 # RK356X运行以下代码
 sudo rknn_install_rk356X.sh
 ```
 **方法2: 通过gitee安装**
 ```bash
 # 安装必备的包
 sudo apt update -y
 sudo apt install -y python3 
 sudo apt install -y python3-dev 
 sudo apt install -y python3-pip 
 sudo apt install -y gcc
 sudo apt install -y python3-opencv
 sudo apt install -y python3-numpy
 sudo apt install -y cmake
 # 下载rknpu2
 # RK3588运行以下代码
 git clone https://gitee.com/mirrors_rockchip-linux/rknpu2.git
 sudo cp ./rknpu2/runtime/RK3588/Linux/librknn_api/aarch64/* /usr/lib
 sudo cp ./rknpu2/runtime/RK3588/Linux/rknn_server/aarch64/usr/bin/* /usr/bin/
 # RK356X运行以下代码
 git clone https://gitee.com/mirrors_rockchip-linux/rknpu2.git
 sudo cp ./rknpu2/runtime/RK356X/Linux/librknn_api/aarch64/* /usr/lib
 sudo cp ./rknpu2/runtime/RK356X/Linux/rknn_server/aarch64/usr/bin/* /usr/bin/
 ```
 ### 编译C++ SDK
 ```bash
 git clone https://github.com/PaddlePaddle/FastDeploy.git
 cd FastDeploy
 mkdir build && cd build
 # 编译配置详情见README文件，这里只介绍关键的几个配置
 # -DENABLE_ORT_BACKEND:     是否开启ONNX模型，默认关闭
 # -DENABLE_RKNPU2_BACKEND:  是否开启RKNPU模型，默认关闭
 # -RKNN2_TARGET_SOC:             编译SDK的板子型号，只能输入RK356X或者RK3588，注意区分大小写
 cmake ..  -DENABLE_ORT_BACKEND=ON \
 	      -DENABLE_RKNPU2_BACKEND=ON \
 	      -DENABLE_VISION=ON \
 	      -DRKNN2_TARGET_SOC=RK3588 \
          -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3
 make -j8
 make install
 ```
 ### 编译Python SDK
 ```bash
 git clone https://github.com/PaddlePaddle/FastDeploy.git
 cd FastDeploy
 cd python
 export ENABLE_ORT_BACKEND=ON
 export ENABLE_RKNPU2_BACKEND=ON
 export ENABLE_VISION=ON
 export RKNN2_TARGET_SOC=RK3588
 python3 setup.py build
 python3 setup.py bdist_wheel
 cd dist
 pip3 install fastdeploy_python-0.0.0-cp39-cp39-linux_aarch64.whl
 ```
 ## 部署模型
 请查看[RKNPU2部署模型教程](../faq/rknpu2/rknpu2.md)
--- a/docs/cn/faq/rknpu2/export.md
+++ b/docs/cn/faq/rknpu2/export.md
@@ -0,0 +1,48 @@
 # 导出模型指南
 ## 简介
 Fastdeploy已经简单的集成了onnx->rknn的转换过程。本教程使用tools/export.py文件导出模型，在导出之前需要编写yaml配置文件。
 在进行转换前请根据[rknn_toolkit2安装文档](./install_rknn_toolkit2.md)检查环境是否已经安装成功。
 ## export.py 配置参数介绍
 | 参数名称            | 是否可以为空     | 参数作用               |
 |-----------------|------------|--------------------|
 | verbose         | 是，默认值为True | 是否在屏幕上输出转换模型时的具体信息 |
 | config_path     | 否          | 配置文件路径             |
 ## config 配置文件介绍
 ### config yaml文件模版
 ```yaml
 model_path: ./portrait_pp_humansegv2_lite_256x144_pretrained.onnx
 output_folder: ./
 target_platform: RK3588
 normalize:
  mean: [0.5,0.5,0.5]
  std: [0.5,0.5,0.5]
 outputs: None
 ```
 ### config 配置参数介绍
 * model_path: 模型储存路径
 * output_folder: 模型储存文件夹名字
 * target_platform: 模型跑在哪一个设备上，只能为RK3588或RK3568
 * normalize: 配置在NPU上的normalize操作，有std和mean两个参数
  * std: 如果在外部做normalize操作，请配置为[1/255,1/255,1/255]
  * mean: 如果在外部做normalize操作，请配置为[0,0,0]
 * outputs: 输出节点列表，如果使用默认输出节点，请配置为None
 ## 如何转换模型
 根目录下执行以下代码
 ```bash
 python tools/export.py  --config_path=./config.yaml
 ```
 ## 模型导出要注意的事项
 * 请不要导出带softmax和argmax的模型，这两个算子存在bug，请在外部进行运算
--- a/docs/cn/faq/rknpu2/install_rknn_toolkit2.md
+++ b/docs/cn/faq/rknpu2/install_rknn_toolkit2.md
@@ -0,0 +1,49 @@
 # 安装rknn_toolkit2仓库
 ## 下载rknn_toolkit2
 rknn_toolkit2的下载一般有两种方式，以下将一一介绍:
 * github仓库下载
  github仓库中提供了稳定版本的rknn_toolkit2下载
  ```bash
  git clone https://github.com/rockchip-linux/rknn-toolkit2.git
  ```
 * 百度网盘下载
  在有些时候，如果稳定版本的rknn_toolkit2存在bug，不满足模型部署的要求，我们也可以使用百度网盘下载beta版本的rknn_toolkit2使用。其安装方式与
  稳定版本一致
  ```text
  链接：https://eyun.baidu.com/s/3eTDMk6Y 密码：rknn
  ```
 ## 安装rknn_toolkit2
 安装rknn_toolkit2中会存在依赖问题，这里介绍以下如何安装。首先，因为rknn_toolkit2依赖一些特定的包，因此建议使用conda新建一个虚拟环境进行安装。
 安装conda的方法百度有很多，这里跳过，直接介绍如何安装rknn_toolkit2。
 ### 下载安装需要的软件包
 ```bash
 sudo apt-get install libxslt1-dev zlib1g zlib1g-dev libglib2.0-0 \
 libsm6 libgl1-mesa-glx libprotobuf-dev gcc g++
 ```
 ### 安装rknn_toolkit2环境
 ```bash
 # 创建虚拟环境
 conda create -n rknn2 python=3.6
 conda activate rknn2
 # rknn_toolkit2对numpy存在特定依赖,因此需要先安装numpy==1.16.6
 pip install numpy==1.16.6
 # 安装rknn_toolkit2-1.3.0_11912b58-cp38-cp38-linux_x86_64.whl 
 cd ~/下载/rknn-toolkit2-master/packages
 pip install rknn_toolkit2-1.3.0_11912b58-cp38-cp38-linux_x86_64.whl 
 ```
 ## 其他文档
 - [onnx转换rknn文档](./export.md)
--- a/docs/cn/faq/rknpu2/rknpu2.md
+++ b/docs/cn/faq/rknpu2/rknpu2.md
@@ -0,0 +1,64 @@
 # RKNPU2模型部署
 ## ONNX模型转换为RKNN模型
 ONNX模型不能直接调用RK芯片中的NPU进行运算，需要把ONNX模型转换为RKNN模型，具体流程请查看[转换文档](./export.md)
 ## RKNPU2已经支持的模型列表
 | 任务场景             | 模型                | 模型版本(表示已经测试的版本)                                                                                                                            | 大小  | ONNX/RKNN是否支持 | ONNX/RKNN速度(ms) |
 |------------------|-------------------|--------------------------------------------------------------------------------------------------------------------------------------------|-----|---------------|-----------------|
 | Detection        | Picodet           | [Picodet-s-npu](https://bj.bcebos.com/fastdeploy/models/rknn2/picodet_s_416_coco_npu_3588.tgz)                                             | -   | True/True     | 454/177         |
 | Segmentation     | PP-LiteSeg        | [PP_LiteSeg_T_STDC1_cityscapes](https://bj.bcebos.com/fastdeploy/models/rknn2/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_3588.tgz) | -   | True/True     | 6634/5598       |
 | Segmentation     | PP-HumanSegV2Lite | [portrait](https://bj.bcebos.com/fastdeploy/models/rknn2/portrait_pp_humansegv2_lite_256x144_inference_model_without_softmax_3588.tgz)     | -   | True/True     | 456/266         |
 | Segmentation     | PP-HumanSegV2Lite | [human](https://bj.bcebos.com/fastdeploy/models/rknn2/human_pp_humansegv2_lite_192x192_pretrained_3588.tgz)                                | -   | True/True     | 496/256         |
 | Face Detection   | SCRFD             | [SCRFD-2.5G-kps-640](https://bj.bcebos.com/fastdeploy/models/rknn2/scrfd_2.5g_bnkps_shape640x640.rknn)                                     | -   | True/True     | 963/142         |
 | Face Recognition | ArcFace           | [ArcFace_r18](https://bj.bcebos.com/fastdeploy/models/rknn2/new_ms1mv3_arcface_r18.rknn)                                                   | -   | True/True     | 600/3           |
 | Face Recognition | cosFace           | [cosFace_r18](https://bj.bcebos.com/fastdeploy/models/rknn2/new_glint360k_cosface_r18.rknn)                                                | -   | True/True     | 600/3           |
 ## RKNPU2 Backend推理使用教程
 这里以Scrfd模型为例子教你如何使用RKNPU2 Backend推理模型。以下注释中的改动，是对比onnx cpu的改动。
 ```c++
 int infer_scrfd_npu() {
    char model_path[] = "./model/scrfd_2.5g_bnkps_shape640x640.rknn";
    char image_file[] = "./image/test_lite_face_detector_3.jpg";
    auto option = fastdeploy::RuntimeOption();
 	// 改动1: option需要调用UseRKNPU2
    option.UseRKNPU2();  
 	// 改动2: 模型加载时需要传递fastdeploy::ModelFormat::RKNN参数
    auto *model = new fastdeploy::vision::facedet::SCRFD(model_path,"",option,fastdeploy::ModelFormat::RKNN);  
    if (!model->Initialized()) {
        std::cerr << "Failed to initialize." << std::endl;
        return 0;
    }
 	// 改动3(可选): RKNPU2支持使用NPU进行normalize操作，并且输入格式为nhwc格式。
 	// DisableNormalizeAndPermute操作将屏蔽预处理时的nor操作和hwc转chw操作。
 	// 如果你使用的是已经支持的模型列表，请在Predict前调用该方法。
    model->DisableNormalizeAndPermute();
    auto im = cv::imread(image_file);
    auto im_bak = im.clone();
    fastdeploy::vision::FaceDetectionResult res;
    clock_t start = clock();
    if (!model->Predict(&im, &res, 0.8, 0.8)) {
        std::cerr << "Failed to predict." << std::endl;
        return 0;
    }
    clock_t end = clock();
    double dur = (double) (end - start);
    printf("infer_scrfd_npu use time:%f\n", (dur / CLOCKS_PER_SEC));
    auto vis_im = fastdeploy::vision::Visualize::VisFaceDetection(im_bak, res);
    cv::imwrite("scrfd_rknn_vis_result.jpg", vis_im);
    std::cout << "Visualized result saved in ./scrfd_rknn_vis_result.jpg" << std::endl;
    return 0;
 }
 ```
 ## 其他关联文档
 - [rknpu2板端环境安装配置](../../build_and_install/rknpu2.md)
 - [rknn_toolkit2安装文档](./install_rknn_toolkit2.md)
 - [onnx转换rknn文档](./export.md)
--- a/examples/vision/segmentation/paddleseg/rknpu2/README.md
+++ b/examples/vision/segmentation/paddleseg/rknpu2/README.md
@@ -0,0 +1,52 @@
 # PaddleSeg 模型部署
 ## 模型版本说明
 - [PaddleSeg develop](https://github.com/PaddlePaddle/PaddleSeg/tree/develop)
 目前FastDeploy支持如下模型的部署
 - [U-Net系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/unet/README.md)
 - [PP-LiteSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/pp_liteseg/README.md)
 - [PP-HumanSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/contrib/PP-HumanSeg/README.md)
 - [FCN系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/fcn/README.md)
 - [DeepLabV3系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/deeplabv3/README.md)
 【注意】如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../matting)
 ## 准备PaddleSeg部署模型以及转换模型
 RKNPU部署模型前需要将模型转换成RKNN模型，其过程一般可以简化为如下步骤:
 *   Paddle动态图模型 -> ONNX模型 -> RKNN模型。
    *   Paddle动态图模型 转换为 ONNX模型的过程请参考([PaddleSeg模型导出说明](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/contrib/PP-HumanSeg))。
    *   对于ONNX模型 转换 RKNN模型的过程，请参考[转换文档](../../../../../docs/cn/faq/rknpu2/export.md)进行转换。
        以PPHumanSeg为例，在获取到ONNX模型后，其转换为RK3588步骤如下:
        * 编写config.yaml文件
        ```yaml
        model_path: ./portrait_pp_humansegv2_lite_256x144_pretrained.onnx
        output_folder: ./
        target_platform: RK3588
        normalize:
        mean: [0.5,0.5,0.5]
        std: [0.5,0.5,0.5]
        outputs: None
        ```
        * 执行转换代码
        ```bash
        python /path/to/fastDeploy/toosl/export.py --config_path=/path/to/fastdeploy/tools/rknpu2/config/ppset_config.yaml
        ```
 ## 下载预训练模型
 为了方便开发者的测试，下面提供了PaddleSeg导出的部分模型（导出方式为：**指定**`--input_shape`，**指定**`--output_op none`，**指定**`--without_argmax`），开发者可直接下载使用。
 | 任务场景             | 模型                | 模型版本(表示已经测试的版本)                                                                                                                            | 大小  | ONNX/RKNN是否支持 | ONNX/RKNN速度(ms) |
 |------------------|-------------------|--------------------------------------------------------------------------------------------------------------------------------------------|-----|---------------|-----------------|
 | Segmentation     | PP-LiteSeg        | [PP_LiteSeg_T_STDC1_cityscapes](https://bj.bcebos.com/fastdeploy/models/rknn2/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_3588.tgz) | -   | True/True     | 6634/5598       |
 | Segmentation     | PP-HumanSegV2Lite | [portrait](https://bj.bcebos.com/fastdeploy/models/rknn2/portrait_pp_humansegv2_lite_256x144_inference_model_without_softmax_3588.tgz)     | -   | True/True     | 456/266         |
 | Segmentation     | PP-HumanSegV2Lite | [human](https://bj.bcebos.com/fastdeploy/models/rknn2/human_pp_humansegv2_lite_192x192_pretrained_3588.tgz)                                | -   | True/True     | 496/256         |
 ## 详细部署文档
 - [RKNN总体部署教程](../../../../../docs/cn/faq/rknpu2.md)
 - [C++部署](cpp)
 - [Python部署](python)
--- a/examples/vision/segmentation/paddleseg/rknpu2/cpp/CMakeLists.txt
+++ b/examples/vision/segmentation/paddleseg/rknpu2/cpp/CMakeLists.txt
@@ -0,0 +1,36 @@
 CMAKE_MINIMUM_REQUIRED(VERSION 3.10)
 project(rknpu_test)
 set(CMAKE_CXX_STANDARD 14)
 # 指定下载解压后的fastdeploy库路径
 set(FASTDEPLOY_INSTALL_DIR "thirdpartys/fastdeploy-0.0.3")
 include(${FASTDEPLOY_INSTALL_DIR}/FastDeployConfig.cmake)
 include_directories(${FastDeploy_INCLUDE_DIRS})
 add_executable(rknpu_test infer.cc)
 target_link_libraries(rknpu_test
        ${FastDeploy_LIBS}
        )
 set(CMAKE_INSTALL_PREFIX ${CMAKE_SOURCE_DIR}/build/install)
 install(TARGETS rknpu_test DESTINATION ./)
 install(DIRECTORY model DESTINATION ./)
 install(DIRECTORY images DESTINATION ./)
 file(GLOB FASTDEPLOY_LIBS ${FASTDEPLOY_INSTALL_DIR}/lib/*)
 message("${FASTDEPLOY_LIBS}")
 install(PROGRAMS ${FASTDEPLOY_LIBS} DESTINATION lib)
 file(GLOB ONNXRUNTIME_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/onnxruntime/lib/*)
 install(PROGRAMS ${ONNXRUNTIME_LIBS} DESTINATION lib)
 install(DIRECTORY ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/opencv/lib DESTINATION ./)
 file(GLOB PADDLETOONNX_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/paddle2onnx/lib/*)
 install(PROGRAMS ${PADDLETOONNX_LIBS} DESTINATION lib)
 file(GLOB RKNPU2_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/rknpu2_runtime/RK3588/lib/*)
 install(PROGRAMS ${RKNPU2_LIBS} DESTINATION lib)
--- a/examples/vision/segmentation/paddleseg/rknpu2/cpp/README.md
+++ b/examples/vision/segmentation/paddleseg/rknpu2/cpp/README.md
@@ -0,0 +1,84 @@
 # PaddleSeg C++部署示例
 本目录下用于展示PaddleSeg系列模型在RKNPU2上的部署，以下的部署过程以PPHumanSeg为例子。
 在部署前，需确认以下两个步骤:
 1. 软硬件环境满足要求
 2. 根据开发环境，下载预编译部署库或者从头编译FastDeploy仓库
 以上步骤请参考[RK2代NPU部署库编译](../../../../../../docs/cn/build_and_install/rknpu2.md)实现
 ## 生成基本目录文件
 该例程由以下几个部分组成
 ```text
 .
 ├── CMakeLists.txt
 ├── build  # 编译文件夹
 ├── image  # 存放图片的文件夹
 ├── infer_cpu_npu.cc
 ├── infer_cpu_npu.h
 ├── main.cc
 ├── model  # 存放模型文件的文件夹
 └── thirdpartys  # 存放sdk的文件夹
 ```
 首先需要先生成目录结构
 ```bash
 mkdir build
 mkdir images
 mkdir model
 mkdir thirdpartys
 ```
 ## 编译
 ### 编译并拷贝SDK到thirdpartys文件夹
 请参考[RK2代NPU部署库编译](../../../../../../docs/cn/build_and_install/rknpu2.md)仓库编译SDK，编译完成后，将在build目录下生成
 fastdeploy-0.0.3目录，请移动它至thirdpartys目录下.
 ### 拷贝模型文件，以及配置文件至model文件夹
 在Paddle动态图模型 -> Paddle静态图模型 -> ONNX模型的过程中，将生成ONNX文件以及对应的yaml配置文件，请将配置文件存放到model文件夹内。
 转换为RKNN后的模型文件也需要拷贝至model，这里提供了转换好的文件，输入以下命令下载使用(模型文件为RK3588，RK3568需要重新[转换PPSeg RKNN模型](../README.md))。
 ```bash
 cd model
 wget https://bj.bcebos.com/fastdeploy/models/rknn2/human_pp_humansegv2_lite_192x192_pretrained_3588.tgz
 tar xvf human_pp_humansegv2_lite_192x192_pretrained_3588.tgz
 cp -r ./human_pp_humansegv2_lite_192x192_pretrained_3588 ./model
 ```
 ### 准备测试图片至image文件夹
 ```bash
 wget https://paddleseg.bj.bcebos.com/dygraph/pp_humanseg_v2/images.zip
 unzip -qo images.zip
 ```
 ### 编译example
 ```bash
 cd build
 cmake ..
 make -j8
 make install
 ```
 ## 运行例程
 ```bash
 cd ./build/install
 ./rknpu_test
 ```
 ## 运行结果展示
 运行后将在install文件夹下生成human_pp_humansegv2_lite_npu_result.jpg文件，如下图:
 ![](https://user-images.githubusercontent.com/58363586/198875853-72821ad1-d4f7-41e3-b616-bef43027de3c.jpg)
 ## 注意事项
 RKNPU上对模型的输入要求是使用NHWC格式，且图片归一化操作会在转RKNN模型时，内嵌到模型中，因此我们在使用FastDeploy部署时，
 需要先调用DisableNormalizePermute(C++)或`disable_normalize_permute(Python)，在预处理阶段禁用归一化以及数据格式的转换。
 - [模型介绍](../../)
 - [Python部署](../python)
 - [转换PPSeg RKNN模型文档](../README.md)
--- a/examples/vision/segmentation/paddleseg/rknpu2/cpp/infer.cc
+++ b/examples/vision/segmentation/paddleseg/rknpu2/cpp/infer.cc
@@ -0,0 +1,84 @@
 #include <iostream>
 #include <string>
 #include "fastdeploy/vision.h"
 void InferHumanPPHumansegv2Lite(const std::string& device = "cpu");
 int main() {
  InferHumanPPHumansegv2Lite("npu");
  return 0;
 }
 fastdeploy::RuntimeOption GetOption(const std::string& device) {
  auto option = fastdeploy::RuntimeOption();
  if (device == "npu") {
    option.UseRKNPU2();
  } else {
    option.UseCpu();
  }
  return option;
 }
 fastdeploy::ModelFormat GetFormat(const std::string& device) {
  auto format = fastdeploy::ModelFormat::ONNX;
  if (device == "npu") {
    format = fastdeploy::ModelFormat::RKNN;
  } else {
    format = fastdeploy::ModelFormat::ONNX;
  }
  return format;
 }
 std::string GetModelPath(std::string& model_path, const std::string& device) {
  if (device == "npu") {
    model_path += "rknn";
  } else {
    model_path += "onnx";
  }
  return model_path;
 }
 void InferHumanPPHumansegv2Lite(const std::string& device) {
  std::string model_file =
      "./model/human_pp_humansegv2_lite_192x192_pretrained_3588/"
      "human_pp_humansegv2_lite_192x192_pretrained_3588.";
  std::string params_file;
  std::string config_file =
      "./model/human_pp_humansegv2_lite_192x192_pretrained_3588/deploy.yaml";
  fastdeploy::RuntimeOption option = GetOption(device);
  fastdeploy::ModelFormat format = GetFormat(device);
  model_file = GetModelPath(model_file, device);
  auto model = fastdeploy::vision::segmentation::PaddleSegModel(
      model_file, params_file, config_file, option, format);
  if (!model.Initialized()) {
    std::cerr << "Failed to initialize." << std::endl;
    return;
  }
  auto image_file =
      "./images/portrait_heng.jpg";
  auto im = cv::imread(image_file);
  if (device == "npu") {
    model.DisableNormalizeAndPermute();
  }
  fastdeploy::vision::SegmentationResult res;
  clock_t start = clock();
  if (!model.Predict(&im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }
  clock_t end = clock();
  auto dur = (double)(end - start);
  printf("infer_human_pp_humansegv2_lite_npu use time:%f\n",
         (dur / CLOCKS_PER_SEC));
  std::cout << res.Str() << std::endl;
  auto vis_im = fastdeploy::vision::VisSegmentation(im, res);
  cv::imwrite("human_pp_humansegv2_lite_npu_result.jpg", vis_im);
  std::cout
      << "Visualized result saved in ./human_pp_humansegv2_lite_npu_result.jpg"
      << std::endl;
 }
--- a/examples/vision/segmentation/paddleseg/rknpu2/python/README.md
+++ b/examples/vision/segmentation/paddleseg/rknpu2/python/README.md
@@ -0,0 +1,44 @@
 # PaddleSeg Python部署示例
 在部署前，需确认以下两个步骤
 - 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../../docs/cn/build_and_install/rknpu2.md) 
 【注意】如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../../matting)
 本目录下提供`infer.py`快速完成PPHumanseg在RKNPU上部署的示例。执行如下脚本即可完成
 ```bash
 # 下载部署示例代码
 git clone https://github.com/PaddlePaddle/FastDeploy.git
 cd FastDeploy/examples/vision/segmentation/paddleseg/python
 # 下载模型
 wget https://bj.bcebos.com/fastdeploy/models/rknn2/human_pp_humansegv2_lite_192x192_pretrained_3588.tgz
 tar xvf human_pp_humansegv2_lite_192x192_pretrained_3588.tgz
 # 下载图片
 wget https://paddleseg.bj.bcebos.com/dygraph/pp_humanseg_v2/images.zip
 unzip images.zip
 # 推理
 python3 infer.py --model_file ./human_pp_humansegv2_lite_192x192_pretrained_3588/human_pp_humansegv2_lite_192x192_pretrained_3588.rknn \
                --config_file ./human_pp_humansegv2_lite_192x192_pretrained_3588/deploy.yaml \
                --image images/portrait_heng.jpg
 ```
 运行完成可视化结果如下图所示
 <div  align="center">  
 <img src="https://user-images.githubusercontent.com/16222477/191712880-91ae128d-247a-43e0-b1e3-cafae78431e0.jpg", width=512px, height=256px />
 </div>
 ## 注意事项
 RKNPU上对模型的输入要求是使用NHWC格式，且图片归一化操作会在转RKNN模型时，内嵌到模型中，因此我们在使用FastDeploy部署时，
 需要先调用DisableNormalizePermute(C++)或`disable_normalize_permute(Python)，在预处理阶段禁用归一化以及数据格式的转换。
 ## 其它文档
 - [PaddleSeg 模型介绍](..)
 - [PaddleSeg C++部署](../cpp)
 - [模型预测结果说明](../../../../../../docs/api/vision_results/)
 - [转换PPSeg RKNN模型文档](../README.md)
--- a/examples/vision/segmentation/paddleseg/rknpu2/python/infer.py
+++ b/examples/vision/segmentation/paddleseg/rknpu2/python/infer.py
@@ -0,0 +1,44 @@
 import fastdeploy as fd
 import cv2
 import os
 def parse_arguments():
    import argparse
    import ast
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model_file", required=True, help="Path of PaddleSeg model.")
    parser.add_argument(
        "--config_file", required=True, help="Path of PaddleSeg config.")
    parser.add_argument(
        "--image", type=str, required=True, help="Path of test image file.")
    return parser.parse_args()
 def build_option(args):
    option = fd.RuntimeOption()
    option.use_rknpu2()
    return option
 args = parse_arguments()
 # 配置runtime，加载模型
 runtime_option = build_option(args)
 model_file = args.model_file
 params_file = ""
 config_file = args.config_file
 model = fd.vision.segmentation.PaddleSegModel(
    model_file, params_file, config_file, runtime_option=runtime_option,model_format=fd.ModelFormat.RKNN)
 model.disable_normalize_and_permute()
 # 预测图片分割结果
 im = cv2.imread(args.image)
 result = model.predict(im.copy())
 print(result)
 # 可视化结果
 vis_im = fd.vision.vis_segmentation(im, result, weight=0.5)
 cv2.imwrite("vis_img.png", vis_im)
--- a/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.cc
+++ b/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.cc
@@ -0,0 +1,425 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h"
 namespace fastdeploy {
 RKNPU2Backend::~RKNPU2Backend() {
  if(input_attrs != nullptr){
    free(input_attrs);
  }
  if(output_attrs != nullptr){
    free(output_attrs);
  }
 }
 /***************************************************************
 *  @name       GetSDKAndDeviceVersion
 *  @brief      get RKNN sdk and device version
 *  @param      None
 *  @return     bool
 *  @note       None
 ***************************************************************/
 bool RKNPU2Backend::GetSDKAndDeviceVersion() {
  int ret;
  // get sdk and device version
  ret = rknn_query(ctx, RKNN_QUERY_SDK_VERSION, &sdk_ver, sizeof(sdk_ver));
  if (ret != RKNN_SUCC) {
    printf("rknn_query fail! ret=%d\n", ret);
    return false;
  }
  FDINFO << "rknn_api/rknnrt version: " << sdk_ver.api_version
         << ", driver version: " << sdk_ver.drv_version << std::endl;
  return true;
 }
 /***************************************************************
 *  @name      BuildOption
 *  @brief     save option
 *  @param     RKNPU2BackendOption
 *  @note      None
 ***************************************************************/
 void RKNPU2Backend::BuildOption(const RKNPU2BackendOption& option) {
  this->option_ = option;
  // save cpu_name
  this->option_.cpu_name = option.cpu_name;
  // save context
  this->option_.core_mask = option.core_mask;
 }
 /***************************************************************
 *  @name       InitFromRKNN
 *  @brief      Initialize RKNN model
 *  @param      model_file: Binary data for the RKNN model or the path of RKNN model.
 *              params_file: None
 *              option: config
 *  @return     bool
 *  @note       None
 ***************************************************************/
 bool RKNPU2Backend::InitFromRKNN(const std::string& model_file,
                                 const RKNPU2BackendOption& option) {
  // LoadModel
  if (!this->LoadModel((char*)model_file.data())) {
    FDERROR << "load model failed" << std::endl;
    return false;
  }
  // GetSDKAndDeviceVersion
  if (!this->GetSDKAndDeviceVersion()) {
    FDERROR << "get SDK and device version failed" << std::endl;
    return false;
  }
  // BuildOption
  this->BuildOption(option);
  // SetCoreMask if RK3588
  if (this->option_.cpu_name == rknpu2::CpuName::RK3588) {
    if (!this->SetCoreMask(option_.core_mask)) {
      FDERROR << "set core mask failed" << std::endl;
      return false;
    }
  }
  // GetModelInputOutputInfos
  if (!this->GetModelInputOutputInfos()) {
    FDERROR << "get model input output infos failed" << std::endl;
    return false;
  }
  return true;
 }
 /***************************************************************
 *  @name       SetCoreMask
 *  @brief      set NPU core for model
 *  @param      core_mask: The specification of NPU core setting.
 *  @return     bool
 *  @note       Only support RK3588
 ***************************************************************/
 bool RKNPU2Backend::SetCoreMask(rknpu2::CoreMask& core_mask) const {
  int ret = rknn_set_core_mask(ctx, static_cast<rknn_core_mask>(core_mask));
  if (ret != RKNN_SUCC) {
    FDERROR << "rknn_set_core_mask fail! ret=" << ret << std::endl;
    return false;
  }
  return true;
 }
 /***************************************************************
 *  @name       LoadModel
 *  @brief      read rknn model
 *  @param      model: Binary data for the RKNN model or the path of RKNN model.
 *  @return     bool
 *  @note       None
 ***************************************************************/
 bool RKNPU2Backend::LoadModel(void* model) {
  int ret = RKNN_SUCC;
  ret = rknn_init(&ctx, model, 0, 0, nullptr);
  if (ret != RKNN_SUCC) {
    FDERROR << "rknn_init fail! ret=" << ret << std::endl;
    return false;
  }
  return true;
 }
 /***************************************************************
 *  @name       GetModelInputOutputInfos
 *  @brief      Get the detailed input and output infos of Model
 *  @param      None
 *  @return     bool
 *  @note       None
 ***************************************************************/
 bool RKNPU2Backend::GetModelInputOutputInfos() {
  int ret = RKNN_SUCC;
  // Get the number of model inputs and outputs
  ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
  if (ret != RKNN_SUCC) {
    return false;
  }
  // Get detailed input parameters
  input_attrs = (rknn_tensor_attr*)malloc(sizeof(rknn_tensor_attr) * io_num.n_input);
  memset(input_attrs, 0, io_num.n_input * sizeof(rknn_tensor_attr));
  inputs_desc_.resize(io_num.n_input);
  for (uint32_t i = 0; i < io_num.n_input; i++) {
    input_attrs[i].index = i;
    // query info
    ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]),
                     sizeof(rknn_tensor_attr));
    if (ret != RKNN_SUCC) {
      printf("rknn_init error! ret=%d\n", ret);
      return false;
    }
    std::string temp_name = input_attrs[i].name;
    std::vector<int> temp_shape{};
    temp_shape.resize(input_attrs[i].n_dims);
    for (int j = 0; j < input_attrs[i].n_dims; j++) {
      temp_shape[j] = (int)input_attrs[i].dims[j];
    }
    FDDataType temp_dtype =
        fastdeploy::RKNPU2Backend::RknnTensorTypeToFDDataType(
            input_attrs[i].type);
    TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype};
    inputs_desc_[i] = temp_input_info;
  }
  // Get detailed output parameters
  output_attrs =
      (rknn_tensor_attr*)malloc(sizeof(rknn_tensor_attr) * io_num.n_output);
  memset(output_attrs, 0, io_num.n_output * sizeof(rknn_tensor_attr));
  outputs_desc_.resize(io_num.n_output);
  for (uint32_t i = 0; i < io_num.n_output; i++) {
    output_attrs[i].index = i;
    // query info
    ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]),
                     sizeof(rknn_tensor_attr));
    if (ret != RKNN_SUCC) {
      FDERROR << "rknn_query fail! ret = " << ret << std::endl;
      return false;
    }
    std::string temp_name = output_attrs[i].name;
    std::vector<int> temp_shape{};
    temp_shape.resize(output_attrs[i].n_dims);
    for (int j = 0; j < output_attrs[i].n_dims; j++) {
      temp_shape[j] = (int)output_attrs[i].dims[j];
    }
    FDDataType temp_dtype =
        fastdeploy::RKNPU2Backend::RknnTensorTypeToFDDataType(
            output_attrs[i].type);
    TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype};
    outputs_desc_[i] = temp_input_info;
  }
  return true;
 }
 /***************************************************************
 *  @name       DumpTensorAttr
 *  @brief      Get the model's detailed inputs and outputs
 *  @param      rknn_tensor_attr
 *  @return     None
 *  @note       None
 ***************************************************************/
 void RKNPU2Backend::DumpTensorAttr(rknn_tensor_attr& attr) {
  printf("index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], "
         "n_elems=%d, size=%d, fmt=%s, type=%s, "
         "qnt_type=%s, zp=%d, scale=%f\n",
         attr.index, attr.name, attr.n_dims, attr.dims[0], attr.dims[1],
         attr.dims[2], attr.dims[3], attr.n_elems, attr.size,
         get_format_string(attr.fmt), get_type_string(attr.type),
         get_qnt_type_string(attr.qnt_type), attr.zp, attr.scale);
 }
 TensorInfo RKNPU2Backend::GetInputInfo(int index) {
  FDASSERT(index < NumInputs(),
           "The index: %d should less than the number of inputs: %d.", index,
           NumInputs())
  return inputs_desc_[index];
 }
 std::vector<TensorInfo> RKNPU2Backend::GetInputInfos() { return inputs_desc_; }
 TensorInfo RKNPU2Backend::GetOutputInfo(int index) {
  FDASSERT(index < NumOutputs(),
           "The index: %d should less than the number of outputs %d.", index,
           NumOutputs())
  return outputs_desc_[index];
 }
 std::vector<TensorInfo> RKNPU2Backend::GetOutputInfos() {
  return outputs_desc_;
 }
 bool RKNPU2Backend::Infer(std::vector<FDTensor>& inputs,
                          std::vector<FDTensor>* outputs) {
  int ret = RKNN_SUCC;
  // Judge whether the input and output size are the same
  if (inputs.size() != inputs_desc_.size()) {
    FDERROR << "[RKNPU2Backend] Size of the inputs(" << inputs.size()
            << ") should keep same with the inputs of this model("
            << inputs_desc_.size() << ")." << std::endl;
    return false;
  }
  // the input size only can be one
  if (inputs.size() > 1) {
    FDERROR << "[RKNPU2Backend] Size of the inputs only support 1."
            << std::endl;
    return false;
  }
  // Judge whether the input and output types are the same
  rknn_tensor_type input_type =
      fastdeploy::RKNPU2Backend::FDDataTypeToRknnTensorType(inputs[0].dtype);
  if (input_type != input_attrs[0].type) {
    FDWARNING << "The input tensor type != model's inputs type."
              << "The input_type need " << get_type_string(input_attrs[0].type)
              << ",but inputs[0].type is " << get_type_string(input_type)
              << std::endl;
  }
  rknn_tensor_format input_layout =
      RKNN_TENSOR_NHWC; // RK3588 only support NHWC
  input_attrs[0].type = input_type;
  input_attrs[0].fmt = input_layout;
  input_attrs[0].size = inputs[0].Nbytes();
  input_attrs[0].size_with_stride = inputs[0].Nbytes();
  input_attrs[0].pass_through = 0;
  // create input tensor memory
  rknn_tensor_mem* input_mems[1];
  input_mems[0] = rknn_create_mem(ctx, inputs[0].Nbytes());
  if (input_mems[0] == nullptr) {
    FDERROR << "rknn_create_mem input_mems error." << std::endl;
    return false;
  }
  // Copy input data to input tensor memory
  uint32_t width = input_attrs[0].dims[2];
  uint32_t stride = input_attrs[0].w_stride;
  if (width == stride) {
    if (inputs[0].Data() == nullptr) {
      FDERROR << "inputs[0].Data is NULL." << std::endl;
      return false;
    }
    memcpy(input_mems[0]->virt_addr, inputs[0].Data(), inputs[0].Nbytes());
  } else {
    FDERROR << "[RKNPU2Backend] only support width == stride." << std::endl;
    return false;
  }
  // Create output tensor memory
  rknn_tensor_mem* output_mems[io_num.n_output];
  for (uint32_t i = 0; i < io_num.n_output; ++i) {
    // Most post-processing does not support the fp16 format.
    // The unified output here is float32
    uint32_t output_size = output_attrs[i].n_elems * sizeof(float);
    output_mems[i] = rknn_create_mem(ctx, output_size);
  }
  // Set input tensor memory
  ret = rknn_set_io_mem(ctx, input_mems[0], &input_attrs[0]);
  if (ret != RKNN_SUCC) {
    FDERROR << "input tensor memory rknn_set_io_mem fail! ret=" << ret
            << std::endl;
    return false;
  }
  // Set output tensor memory
  for (uint32_t i = 0; i < io_num.n_output; ++i) {
    // default output type is depend on model, this requires float32 to compute top5
    output_attrs[i].type = RKNN_TENSOR_FLOAT32;
    ret = rknn_set_io_mem(ctx, output_mems[i], &output_attrs[i]);
    // set output memory and attribute
    if (ret != RKNN_SUCC) {
      FDERROR << "output tensor memory rknn_set_io_mem fail! ret=" << ret
              << std::endl;
      return false;
    }
  }
  // run rknn
  ret = rknn_run(ctx, nullptr);
  if (ret != RKNN_SUCC) {
    FDERROR << "rknn run error! ret=" << ret << std::endl;
    return false;
  }
  rknn_destroy_mem(ctx, input_mems[0]);
  // get result
  outputs->resize(outputs_desc_.size());
  std::vector<int64_t> temp_shape(4);
  for (size_t i = 0; i < outputs_desc_.size(); ++i) {
    temp_shape.resize(outputs_desc_[i].shape.size());
    for (int j = 0; j < outputs_desc_[i].shape.size(); ++j) {
      temp_shape[j] = outputs_desc_[i].shape[j];
    }
    (*outputs)[i].Resize(temp_shape, outputs_desc_[i].dtype,
                         outputs_desc_[i].name);
    memcpy((*outputs)[i].MutableData(), (float*)output_mems[i]->virt_addr, (*outputs)[i].Nbytes());
    rknn_destroy_mem(ctx, output_mems[i]);
  }
  return true;
 }
 /***************************************************************
 *  @name       RknnTensorTypeToFDDataType
 *  @brief      Change RknnTensorType To FDDataType
 *  @param      rknn_tensor_type
 *  @return     None
 *  @note       Most post-processing does not support the fp16 format. 
 *              Therefore, if the input is FP16, the output will be FP32.
 ***************************************************************/
 FDDataType RKNPU2Backend::RknnTensorTypeToFDDataType(rknn_tensor_type type) {
  if (type == rknn_tensor_type::RKNN_TENSOR_FLOAT16) {
    return FDDataType::FP32;
  }
  if (type == rknn_tensor_type::RKNN_TENSOR_FLOAT32) {
    return FDDataType::FP32;
  }
  if (type == rknn_tensor_type::RKNN_TENSOR_INT8) {
    return FDDataType::INT8;
  }
  if (type == rknn_tensor_type::RKNN_TENSOR_INT16) {
    return FDDataType::INT16;
  }
  if (type == rknn_tensor_type::RKNN_TENSOR_INT32) {
    return FDDataType::INT32;
  }
  if (type == rknn_tensor_type::RKNN_TENSOR_UINT8) {
    return FDDataType::UINT8;
  }
  if (type == rknn_tensor_type::RKNN_TENSOR_BOOL) {
    return FDDataType::BOOL;
  }
  FDERROR << "FDDataType don't support this type" << std::endl;
  return FDDataType::UNKNOWN1;
 }
 /***************************************************************
 *  @name       FDDataTypeToRknnTensorType
 *  @brief      Change FDDataType To RknnTensorType
 *  @param      FDDataType
 *  @return     None
 *  @note       None
 ***************************************************************/
 rknn_tensor_type
 RKNPU2Backend::FDDataTypeToRknnTensorType(fastdeploy::FDDataType type) {
  if (type == FDDataType::FP16) {
    return rknn_tensor_type::RKNN_TENSOR_FLOAT16;
  }
  if (type == FDDataType::FP32) {
    return rknn_tensor_type::RKNN_TENSOR_FLOAT32;
  }
  if (type == FDDataType::INT8) {
    return rknn_tensor_type::RKNN_TENSOR_INT8;
  }
  if (type == FDDataType::INT16) {
    return rknn_tensor_type::RKNN_TENSOR_INT16;
  }
  if (type == FDDataType::INT32) {
    return rknn_tensor_type::RKNN_TENSOR_INT32;
  }
  if (type == FDDataType::UINT8) {
    return rknn_tensor_type::RKNN_TENSOR_UINT8;
  }
  if (type == FDDataType::BOOL) {
    return rknn_tensor_type::RKNN_TENSOR_BOOL;
  }
  FDERROR << "rknn_tensor_type don't support this type" << std::endl;
  return RKNN_TENSOR_TYPE_MAX;
 }
 } // namespace fastdeploy
--- a/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h
+++ b/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h
@@ -0,0 +1,96 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include "fastdeploy/backends/backend.h"
 #include "fastdeploy/core/fd_tensor.h"
 #include "rknn_api.h" // NOLINT
 #include "rknpu2_config.h"
 #include <cstring>    // for memset
 #include <iostream>
 #include <memory>
 #include <string>
 #include <vector>
 namespace fastdeploy {
 struct RKNPU2BackendOption {
  rknpu2::CpuName cpu_name = rknpu2::CpuName::RK3588;
  //The specification of NPU core setting.It has the following choices :
  // RKNN_NPU_CORE_AUTO : Referring to automatic mode, meaning that it will
  // select the idle core inside the NPU.
  // RKNN_NPU_CORE_0 : Running on the NPU0 core
  // RKNN_NPU_CORE_1: Runing on the NPU1 core
  // RKNN_NPU_CORE_2: Runing on the NPU2 core
  // RKNN_NPU_CORE_0_1: Running on both NPU0 and NPU1 core simultaneously.
  // RKNN_NPU_CORE_0_1_2: Running on both NPU0, NPU1 and NPU2 simultaneously.
  rknpu2::CoreMask core_mask = rknpu2::CoreMask::RKNN_NPU_CORE_AUTO;
 };
 class RKNPU2Backend : public BaseBackend {
 public:
  RKNPU2Backend() = default;
  virtual ~RKNPU2Backend();
  // RKNN API
  bool LoadModel(void* model);
  bool GetSDKAndDeviceVersion();
  bool SetCoreMask(rknpu2::CoreMask& core_mask) const;
  bool GetModelInputOutputInfos();
  // BaseBackend API
  void BuildOption(const RKNPU2BackendOption& option);
  bool InitFromRKNN(const std::string& model_file,
                    const RKNPU2BackendOption& option = RKNPU2BackendOption());
  int NumInputs() const override {
    return static_cast<int>(inputs_desc_.size());
  }
  int NumOutputs() const override {
    return static_cast<int>(outputs_desc_.size());
  }
  TensorInfo GetInputInfo(int index) override;
  TensorInfo GetOutputInfo(int index) override;
  std::vector<TensorInfo> GetInputInfos() override;
  std::vector<TensorInfo> GetOutputInfos() override;
  bool Infer(std::vector<FDTensor>& inputs,
             std::vector<FDTensor>* outputs) override;
 private:
  // The object of rknn context.
  rknn_context ctx{};
  // The structure rknn_sdk_version is used to indicate the version information of the RKNN SDK.
  rknn_sdk_version sdk_ver{};
  // The structure rknn_input_output_num represents the number of input and output Tensor
  rknn_input_output_num io_num{};
  std::vector<TensorInfo> inputs_desc_;
  std::vector<TensorInfo> outputs_desc_;
  rknn_tensor_attr* input_attrs = nullptr;
  rknn_tensor_attr* output_attrs = nullptr;
  RKNPU2BackendOption option_;
  static void DumpTensorAttr(rknn_tensor_attr& attr);
  static FDDataType RknnTensorTypeToFDDataType(rknn_tensor_type type);
  static rknn_tensor_type FDDataTypeToRknnTensorType(FDDataType type);
 };
 } // namespace fastdeploy
--- a/fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h
+++ b/fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h
@@ -0,0 +1,40 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #ifndef RKNPU2_CONFIG_H
 #define RKNPU2_CONFIG_H
 namespace fastdeploy {
 namespace rknpu2 {
 typedef enum _rknpu2_cpu_name {
  RK356X = 0, /* run on RK356X. */
  RK3588 = 1, /* default,run on RK3588. */
  UNDEFINED,
 } CpuName;
 /*! RKNPU2 core mask for mobile device. */
 typedef enum _rknpu2_core_mask {
  RKNN_NPU_CORE_AUTO = 0, ///< default, run on NPU core randomly.
  RKNN_NPU_CORE_0 = 1,    ///< run on NPU core 0.
  RKNN_NPU_CORE_1 = 2,    ///< run on NPU core 1.
  RKNN_NPU_CORE_2 = 4,    ///< run on NPU core 2.
  RKNN_NPU_CORE_0_1 =
      RKNN_NPU_CORE_0 | RKNN_NPU_CORE_1, ///< run on NPU core 1 and core 2.
  RKNN_NPU_CORE_0_1_2 =
      RKNN_NPU_CORE_0_1 | RKNN_NPU_CORE_2, ///< run on NPU core 1 and core 2.
  RKNN_NPU_CORE_UNDEFINED,
 } CoreMask;
 } // namespace RKNN
 } // namespace fastdeploy
 #endif //RKNPU2_CONFIG_H
--- a/fastdeploy/core/fd_type.cc
+++ b/fastdeploy/core/fd_type.cc
@@ -53,6 +53,9 @@ std::string Str(const Device& d) {
    case Device::GPU:
      out = "Device::GPU";
      break;
    case Device::RKNPU:
      out = "Device::RKNPU";
      break;
    case Device::IPU:
      out = "Device::IPU";
      break;
@@ -70,6 +73,9 @@ std::ostream& operator<<(std::ostream& out,const Device& d){
  case Device::GPU:
    out << "Device::GPU";
    break;
  case Device::RKNPU:
    out << "Device::RKNPU";
    break;
  default:
    out << "Device::UNKOWN";
  }
--- a/fastdeploy/core/fd_type.h
+++ b/fastdeploy/core/fd_type.h
@@ -22,7 +22,7 @@
 namespace fastdeploy {
-enum FASTDEPLOY_DECL Device { CPU, GPU, IPU };
+enum FASTDEPLOY_DECL Device { CPU, GPU, RKNPU, IPU};
 FASTDEPLOY_DECL std::string Str(const Device& d);
--- a/fastdeploy/fastdeploy_model.cc
+++ b/fastdeploy/fastdeploy_model.cc
@@ -41,6 +41,7 @@ bool FastDeployModel::InitRuntime() {
 #ifndef WITH_IPU
    use_ipu = false;
 #endif
    bool use_rknpu = (runtime_option.device == Device::RKNPU);
    // whether the model is supported by the setted backend
    bool is_supported = false;
@@ -51,6 +52,13 @@ bool FastDeployModel::InitRuntime() {
          break;
        }
      }
    } else if (use_rknpu) {
      for (auto& item : valid_rknpu_backends) {
        if (item == runtime_option.backend) {
          is_supported = true;
          break;
        }
      }
    } else if(use_ipu) {
      for (auto& item : valid_ipu_backends) {
        if (item == runtime_option.backend) {
@@ -101,6 +109,8 @@ bool FastDeployModel::InitRuntime() {
            << std::endl;
    return false;
 #endif
  } else if (runtime_option.device == Device::RKNPU) {
    return CreateRKNPUBackend();
  } else if (runtime_option.device == Device::IPU) {
 #ifdef WITH_IPU
    return CreateIpuBackend();
@@ -110,7 +120,7 @@ bool FastDeployModel::InitRuntime() {
    return false;
 #endif
  }
-  FDERROR << "Only support CPU/GPU now." << std::endl;
+  FDERROR << "Only support CPU/GPU/NPU now." << std::endl;
  return false;
 }
@@ -138,7 +148,7 @@ bool FastDeployModel::CreateCpuBackend() {
 }
 bool FastDeployModel::CreateGpuBackend() {
-  if (valid_gpu_backends.size() == 0) {
+  if (valid_gpu_backends.empty()) {
    FDERROR << "There's no valid gpu backends for model: " << ModelName()
            << std::endl;
    return false;
@@ -161,6 +171,30 @@ bool FastDeployModel::CreateGpuBackend() {
  return false;
 }
 bool FastDeployModel::CreateRKNPUBackend() {
  if (valid_rknpu_backends.empty()) {
    FDERROR << "There's no valid npu backends for model: " << ModelName()
            << std::endl;
    return false;
  }
  for (size_t i = 0; i < valid_rknpu_backends.size(); ++i) {
    if (!IsBackendAvailable(valid_rknpu_backends[i])) {
      continue;
    }
    runtime_option.backend = valid_rknpu_backends[i];
    runtime_ = std::unique_ptr<Runtime>(new Runtime());
    if (!runtime_->Init(runtime_option)) {
      return false;
    }
    runtime_initialized_ = true;
    return true;
  }
  FDERROR << "Cannot find an available npu backend to load this model."
          << std::endl;
  return false;
 }
 bool FastDeployModel::CreateIpuBackend() {
  if (valid_ipu_backends.size() == 0) {
    FDERROR << "There's no valid ipu backends for model: " << ModelName()
--- a/fastdeploy/fastdeploy_model.h
+++ b/fastdeploy/fastdeploy_model.h
@@ -38,6 +38,12 @@ class FASTDEPLOY_DECL FastDeployModel {
  /** Model's valid ipu backends. This member defined all the ipu backends have successfully tested for the model
   */
  std::vector<Backend> valid_ipu_backends = {Backend::PDINFER};
  /** Model's valid hardware backends. This member defined all the gpu backends have successfully tested for the model
   */
  std::vector<Backend> valid_rknpu_backends = {};
  /// Get number of inputs for this model
  virtual int NumInputsOfRuntime() { return runtime_->NumInputs(); }
  /// Get number of outputs for this model
@@ -99,6 +105,8 @@ class FASTDEPLOY_DECL FastDeployModel {
  virtual bool CreateCpuBackend();
  virtual bool CreateGpuBackend();
  virtual bool CreateIpuBackend();
  virtual bool CreateRKNPUBackend();
  bool initialized = false;
  std::vector<Backend> valid_external_backends;
--- a/fastdeploy/pybind/main.cc.in
+++ b/fastdeploy/pybind/main.cc.in
@@ -164,6 +164,9 @@ PYBIND11_MODULE(@PY_LIBRARY_NAME@, m) {
      m.def_submodule("text", "Text module of FastDeploy.");
  BindText(text_module);
 #endif
  auto rknpu2_module =
      m.def_submodule("rknpu2", "RKNPU2 config module of FastDeploy.");
  BindRKNPU2Config(rknpu2_module);
 }
 }  // namespace fastdeploy
--- a/fastdeploy/pybind/main.h
+++ b/fastdeploy/pybind/main.h
@@ -39,6 +39,7 @@ void BindBackend(pybind11::module&);
 void BindVision(pybind11::module&);
 void BindText(pybind11::module& m);
 void BindPipeline(pybind11::module& m);
 void BindRKNPU2Config(pybind11::module&);
 pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType& fd_dtype);
--- a/fastdeploy/pybind/rknpu2_config_pybind.cc
+++ b/fastdeploy/pybind/rknpu2_config_pybind.cc
@@ -0,0 +1,33 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h"
 #include "fastdeploy/pybind/main.h"
 namespace fastdeploy {
 void BindRKNPU2Config(pybind11::module& m) {
  pybind11::enum_<fastdeploy::rknpu2::CpuName>(m, "CpuName", pybind11::arithmetic(),
                           "CpuName for inference.")
      .value("RK356X", fastdeploy::rknpu2::CpuName::RK356X)
      .value("RK3588", fastdeploy::rknpu2::CpuName::RK3588)
      .value("UNDEFINED", fastdeploy::rknpu2::CpuName::UNDEFINED);
  pybind11::enum_<fastdeploy::rknpu2::CoreMask>(m, "CoreMask", pybind11::arithmetic(),
                            "CoreMask for inference.")
      .value("RKNN_NPU_CORE_AUTO", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO)
      .value("RKNN_NPU_CORE_0", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0)
      .value("RKNN_NPU_CORE_1", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_1)
      .value("RKNN_NPU_CORE_2", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_2)
      .value("RKNN_NPU_CORE_0_1", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0_1)
      .value("RKNN_NPU_CORE_0_1_2", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0_1_2)
      .value("RKNN_NPU_CORE_UNDEFINED", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_UNDEFINED);
 }
 } // namespace fastdeploy
--- a/fastdeploy/pybind/runtime.cc
+++ b/fastdeploy/pybind/runtime.cc
@@ -22,6 +22,7 @@ void BindRuntime(pybind11::module& m) {
      .def("set_model_path", &RuntimeOption::SetModelPath)
      .def("use_gpu", &RuntimeOption::UseGpu)
      .def("use_cpu", &RuntimeOption::UseCpu)
      .def("use_rknpu2", &RuntimeOption::UseRKNPU2)
      .def("set_external_stream", &RuntimeOption::SetExternalStream)
      .def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum)
      .def("use_paddle_backend", &RuntimeOption::UsePaddleBackend)
@@ -174,17 +175,20 @@ void BindRuntime(pybind11::module& m) {
      .value("TRT", Backend::TRT)
      .value("POROS", Backend::POROS)
      .value("PDINFER", Backend::PDINFER)
      .value("RKNPU2", Backend::RKNPU2)
      .value("LITE", Backend::LITE);
  pybind11::enum_<ModelFormat>(m, "ModelFormat", pybind11::arithmetic(),
                               "ModelFormat for inference.")
      .value("PADDLE", ModelFormat::PADDLE)
      .value("TORCHSCRIPT", ModelFormat::TORCHSCRIPT)
      .value("RKNN", ModelFormat::RKNN)
      .value("ONNX", ModelFormat::ONNX);
  pybind11::enum_<Device>(m, "Device", pybind11::arithmetic(),
                          "Device for inference.")
      .value("CPU", Device::CPU)
      .value("GPU", Device::GPU)
-      .value("IPU", Device::IPU);
+      .value("IPU", Device::IPU)
      .value("RKNPU", Device::RKNPU);
  pybind11::enum_<FDDataType>(m, "FDDataType", pybind11::arithmetic(),
                              "Data type of FastDeploy.")
--- a/fastdeploy/runtime.cc
+++ b/fastdeploy/runtime.cc
@@ -41,6 +41,10 @@
 #include "fastdeploy/backends/lite/lite_backend.h"
 #endif
 #ifdef ENABLE_RKNPU2_BACKEND
 #include "fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h"
 #endif
 namespace fastdeploy {
 std::vector<Backend> GetAvailableBackends() {
@@ -62,6 +66,9 @@ std::vector<Backend> GetAvailableBackends() {
 #endif
 #ifdef ENABLE_LITE_BACKEND
  backends.push_back(Backend::LITE);
 #endif
 #ifdef ENABLE_RKNPU2_BACKEND
  backends.push_back(Backend::RKNPU2);
 #endif
  return backends;
 }
@@ -85,6 +92,8 @@ std::string Str(const Backend& b) {
    return "Backend::PDINFER";
  } else if (b == Backend::POROS) {
    return "Backend::POROS";
  } else if (b == Backend::RKNPU2) {
    return "Backend::RKNPU2";
  }else if (b == Backend::OPENVINO) {
    return "Backend::OPENVINO";
  } else if (b == Backend::LITE) {
@@ -98,6 +107,8 @@ std::string Str(const ModelFormat& f) {
    return "ModelFormat::PADDLE";
  } else if (f == ModelFormat::ONNX) {
    return "ModelFormat::ONNX";
  }else if (f == ModelFormat::RKNN) {
    return "ModelFormat::RKNN";
  } else if (f == ModelFormat::TORCHSCRIPT) {
    return "ModelFormat::TORCHSCRIPT";
  }
@@ -113,6 +124,8 @@ std::ostream& operator<<(std::ostream& out, const Backend& backend) {
    out << "Backend::PDINFER";
  } else if (backend == Backend::OPENVINO) {
    out << "Backend::OPENVINO";
  } else if (backend == Backend::RKNPU2) {
    out << "Backend::RKNPU2";
  }else if (backend == Backend::POROS) {
    out << "Backend::POROS";
  } else if (backend == Backend::LITE) {
@@ -127,6 +140,8 @@ std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
    out << "ModelFormat::PADDLE";
  } else if (format == ModelFormat::ONNX) {
    out << "ModelFormat::ONNX";
  } else if (format == ModelFormat::RKNN) {
    out << "ModelFormat::RKNN";
  } else if (format == ModelFormat::TORCHSCRIPT) {
    out << "ModelFormat::TORCHSCRIPT";
  }
@@ -152,6 +167,14 @@ bool CheckModelFormat(const std::string& model_file,
              << model_file << std::endl;
      return false;
    }
  } else if (model_format == ModelFormat::RKNN) {
    if (model_file.size() < 5 ||
        model_file.substr(model_file.size() - 5, 5) != ".rknn") {
      FDERROR << "With model format of ModelFormat::RKNN, the model file "
                 "should ends with `.rknn`, but now it's "
              << model_file << std::endl;
      return false;
    }
  } else if (model_format == ModelFormat::TORCHSCRIPT) {
    if (model_file.size() < 3 ||
        model_file.substr(model_file.size() - 3, 3) != ".pt") {
@@ -162,7 +185,7 @@ bool CheckModelFormat(const std::string& model_file,
    }
  } else {
    FDERROR << "Only support model format with frontend ModelFormat::PADDLE / "
-               "ModelFormat::ONNX / ModelFormat::TORCHSCRIPT."
+               "ModelFormat::ONNX / ModelFormat::RKNN / ModelFormat::TORCHSCRIPT."
            << std::endl;
    return false;
  }
@@ -182,6 +205,10 @@ ModelFormat GuessModelFormat(const std::string& model_file) {
             model_file.substr(model_file.size() - 3, 3) == ".pt") {
    FDINFO << "Model Format: Torchscript." << std::endl;
    return ModelFormat::TORCHSCRIPT;
  } else if (model_file.size() > 5 &&
             model_file.substr(model_file.size() - 5, 5) == ".rknn") {
    FDINFO << "Model Format: RKNN." << std::endl;
    return ModelFormat::RKNN;
  }
  FDERROR << "Cannot guess which model format you are using, please set "
@@ -223,6 +250,13 @@ void RuntimeOption::UseGpu(int gpu_id) {
 void RuntimeOption::UseCpu() { device = Device::CPU; }
 void RuntimeOption::UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name,
                              fastdeploy::rknpu2::CoreMask rknpu2_core) {
  rknpu2_cpu_name_ = rknpu2_name;
  rknpu2_core_mask_ = rknpu2_core;
  device = Device::RKNPU;
 }
 void RuntimeOption::SetExternalStream(void* external_stream) {
  external_stream_ = external_stream;
 }
@@ -234,7 +268,8 @@ void RuntimeOption::SetCpuThreadNum(int thread_num) {
 void RuntimeOption::SetOrtGraphOptLevel(int level) {
  std::vector<int> supported_level{-1, 0, 1, 2};
-  auto valid_level = std::find(supported_level.begin(), supported_level.end(), level) != supported_level.end();
+  auto valid_level = std::find(supported_level.begin(), supported_level.end(),
                               level) != supported_level.end();
  FDASSERT(valid_level, "The level must be -1, 0, 1, 2.");
  ort_graph_opt_level = level;
 }
@@ -324,7 +359,6 @@ void RuntimeOption::EnableLiteFP16() {
 void RuntimeOption::DisableLiteFP16() {
  lite_enable_fp16 = false;
 }
 void RuntimeOption::EnableLiteInt8() {
  lite_enable_int8 = true;
 }
@@ -332,7 +366,6 @@ void RuntimeOption::EnableLiteInt8() {
 void RuntimeOption::DisableLiteInt8() {
  lite_enable_int8 = false;
 }
 void RuntimeOption::SetLitePowerMode(LitePowerMode mode) {
  lite_power_mode = mode;
 }
@@ -454,6 +487,8 @@ bool Runtime::Init(const RuntimeOption& _option) {
      option.backend = Backend::POROS;
    } else if (IsBackendAvailable(Backend::OPENVINO)) {
      option.backend = Backend::OPENVINO;
    } else if (IsBackendAvailable(Backend::RKNPU2)) {
      option.backend = Backend::RKNPU2;
    } else {
      FDERROR << "Please define backend in RuntimeOption, current it's "
                 "Backend::UNKNOWN."
@@ -506,6 +541,13 @@ bool Runtime::Init(const RuntimeOption& _option) {
    CreateLiteBackend();
    FDINFO << "Runtime initialized with Backend::LITE in " << Str(option.device)
           << "." << std::endl;
  } else if (option.backend == Backend::RKNPU2) {
    FDASSERT(option.device == Device::RKNPU,
             "Backend::RKNPU2 only supports Device::RKNPU2");
    CreateRKNPU2Backend();
    FDINFO << "Runtime initialized with Backend::RKNPU2 in "
           << Str(option.device) << "." << std::endl;
  } else {
    FDERROR << "Runtime only support "
               "Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as "
@@ -720,4 +762,21 @@ void Runtime::CreateLiteBackend() {
 #endif
 }
 void Runtime::CreateRKNPU2Backend() {
 #ifdef ENABLE_RKNPU2_BACKEND
  auto rknpu2_option = RKNPU2BackendOption();
  rknpu2_option.cpu_name = option.rknpu2_cpu_name_;
  rknpu2_option.core_mask = option.rknpu2_core_mask_;
  FDASSERT(option.model_format == ModelFormat::RKNN,
           "RKNPU2Backend only support model format of ModelFormat::RKNN");
  backend_ = utils::make_unique<RKNPU2Backend>();
  auto casted_backend = dynamic_cast<RKNPU2Backend*>(backend_.get());
  FDASSERT(casted_backend->InitFromRKNN(option.model_file, rknpu2_option),
           "Load model from nb file failed while initializing LiteBackend.");
 #else
  FDASSERT(false, "RKNPU2Backend is not available, please compiled with "
                  "ENABLE_RKNPU2_BACKEND=ON.");
 #endif
 }
 }  // namespace fastdeploy
--- a/fastdeploy/runtime.h
+++ b/fastdeploy/runtime.h
@@ -20,12 +20,13 @@
 #pragma once
 #include <algorithm>
 #include <map>
 #include <vector>
 #include <algorithm>
 #include "fastdeploy/backends/backend.h"
 #include "fastdeploy/utils/perf.h"
 #include "backends/rknpu/rknpu2/rknpu2_config.h"
 /** \brief All C++ FastDeploy APIs are defined inside this namespace
 *
@@ -41,6 +42,7 @@ enum Backend {
  POROS,   ///< Poros, support TorchScript format model, CPU / Nvidia GPU
  OPENVINO, ///< Intel OpenVINO, support Paddle/ONNX format, CPU only
  LITE,     ///< Paddle Lite, support Paddle format model, ARM CPU only
  RKNPU2,   ///< RKNPU2, support RKNN format model, Rockchip NPU only
 };
 /*! Deep learning model format */
@@ -48,6 +50,7 @@ enum ModelFormat {
  AUTOREC,     ///< Auto recognize the model format by model file name
  PADDLE,      ///< Model with paddlepaddle format
  ONNX,        ///< Model with ONNX format
  RKNN,        ///< Model with RKNN format
  TORCHSCRIPT, ///< Model with TorchScript format
 };
@@ -102,6 +105,9 @@ struct FASTDEPLOY_DECL RuntimeOption {
  /// Use Nvidia GPU to inference
  void UseGpu(int gpu_id = 0);
  void UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name = fastdeploy::rknpu2::CpuName::RK3588,
                 fastdeploy::rknpu2::CoreMask rknpu2_core = fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0);
  void SetExternalStream(void* external_stream);
  /*
@@ -325,6 +331,10 @@ struct FASTDEPLOY_DECL RuntimeOption {
  int unconst_ops_thres = -1;
  std::string poros_file = "";
  // ======Only for RKNPU2 Backend=======
  fastdeploy::rknpu2::CpuName rknpu2_cpu_name_ = fastdeploy::rknpu2::CpuName::RK3588;
  fastdeploy::rknpu2::CoreMask rknpu2_core_mask_ = fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO;
  std::string model_file = "";  // Path of model file
  std::string params_file = ""; // Path of parameters file, can be empty
  ModelFormat model_format = ModelFormat::AUTOREC; // format of input model
@@ -388,6 +398,7 @@ struct FASTDEPLOY_DECL Runtime {
  void CreateTrtBackend();
  void CreateOpenVINOBackend();
  void CreateLiteBackend();
  void CreateRKNPU2Backend();
  std::unique_ptr<BaseBackend> backend_;
 };
 } // namespace fastdeploy
--- a/fastdeploy/vision/segmentation/ppseg/model.cc
+++ b/fastdeploy/vision/segmentation/ppseg/model.cc
@@ -28,6 +28,7 @@ PaddleSegModel::PaddleSegModel(const std::string& model_file,
  config_file_ = config_file;
  valid_cpu_backends = {Backend::OPENVINO, Backend::PDINFER, Backend::ORT, Backend::LITE};
  valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
  valid_rknpu_backends = {Backend::RKNPU2};
  runtime_option = custom_option;
  runtime_option.model_format = model_format;
  runtime_option.model_file = model_file;
@@ -67,6 +68,7 @@ bool PaddleSegModel::BuildPreprocessPipelineFromConfig() {
      FDASSERT(op.IsMap(),
               "Require the transform information in yaml be Map type.");
      if (op["type"].as<std::string>() == "Normalize") {
        if(!(this->disable_normalize_and_permute)){
          std::vector<float> mean = {0.5, 0.5, 0.5};
          std::vector<float> std = {0.5, 0.5, 0.5};
          if (op["mean"]) {
@@ -76,7 +78,7 @@ bool PaddleSegModel::BuildPreprocessPipelineFromConfig() {
            std = op["std"].as<std::vector<float>>();
          }
          processors_.push_back(std::make_shared<Normalize>(mean, std));
-
+        }
      } else if (op["type"].as<std::string>() == "Resize") {
        yml_contain_resize_op = true;
        const auto& target_size = op["target_size"];
@@ -130,7 +132,9 @@ bool PaddleSegModel::BuildPreprocessPipelineFromConfig() {
              << "." << std::endl;
    }
  }
  if(!(this->disable_normalize_and_permute)){
    processors_.push_back(std::make_shared<HWC2CHW>());
  }
  return true;
 }
@@ -357,6 +361,14 @@ bool PaddleSegModel::Predict(cv::Mat* im, SegmentationResult* result) {
  return true;
 }
 void PaddleSegModel::DisableNormalizeAndPermute(){
  this->disable_normalize_and_permute = true;
  // the DisableNormalizeAndPermute function will be invalid if the configuration file is loaded during preprocessing
  if (!BuildPreprocessPipelineFromConfig()) {
    FDERROR << "Failed to build preprocess pipeline from configuration file." << std::endl;
  }
 }
 }  // namespace segmentation
 }  // namespace vision
 }  // namespace fastdeploy
--- a/fastdeploy/vision/segmentation/ppseg/model.h
+++ b/fastdeploy/vision/segmentation/ppseg/model.h
@@ -60,6 +60,9 @@ class FASTDEPLOY_DECL PaddleSegModel : public FastDeployModel {
   */
  bool is_vertical_screen = false;
  // This function will disable normalize and hwc2chw in preprocessing step.
  void DisableNormalizeAndPermute();
 private:
  bool Initialize();
@@ -76,6 +79,9 @@ class FASTDEPLOY_DECL PaddleSegModel : public FastDeployModel {
  std::vector<std::shared_ptr<Processor>> processors_;
  std::string config_file_;
  // for recording the switch of normalize and hwc2chw
  bool disable_normalize_and_permute = false;
 };
 }  // namespace segmentation
--- a/fastdeploy/vision/segmentation/ppseg/ppseg_pybind.cc
+++ b/fastdeploy/vision/segmentation/ppseg/ppseg_pybind.cc
@@ -27,6 +27,7 @@ void BindPPSeg(pybind11::module& m) {
             self.Predict(&mat, res);
             return res;
           })
      .def("disable_normalize_and_permute",&vision::segmentation::PaddleSegModel::DisableNormalizeAndPermute)
      .def_readwrite("apply_softmax",
                     &vision::segmentation::PaddleSegModel::apply_softmax)
      .def_readwrite("is_vertical_screen",
--- a/python/fastdeploy/init.py
+++ b/python/fastdeploy/init.py
@@ -16,10 +16,11 @@ import logging
 import os
 import sys
-from .c_lib_wrap import (ModelFormat, Backend, FDDataType, TensorInfo, Device,
+from .c_lib_wrap import (ModelFormat, Backend, rknpu2,
                         FDDataType, TensorInfo, Device,
                         FDTensor, is_built_with_gpu, is_built_with_ort,
                         ModelFormat, is_built_with_paddle, is_built_with_trt,
-                         get_default_cuda_directory)
+                         get_default_cuda_directory, )
 from .runtime import Runtime, RuntimeOption
 from .model import FastDeployModel
--- a/python/fastdeploy/runtime.py
+++ b/python/fastdeploy/runtime.py
@@ -16,7 +16,7 @@ import logging
 import numpy as np
 from . import ModelFormat
 from . import c_lib_wrap as C
-
+from . import rknpu2
 class Runtime:
    """FastDeploy Runtime object.
@@ -207,6 +207,11 @@ class RuntimeOption:
        """
        return self._option.use_cpu()
    def use_rknpu2(self,rknpu2_name=rknpu2.CpuName.RK3588,rknpu2_core=rknpu2.CoreMask.RKNN_NPU_CORE_0):
        """Inference with CPU
        """
        return self._option.use_rknpu2(rknpu2_name,rknpu2_core)
    def set_cpu_thread_num(self, thread_num=-1):
        """Set number of threads if inference with CPU
--- a/python/fastdeploy/vision/segmentation/ppseg/init.py
+++ b/python/fastdeploy/vision/segmentation/ppseg/init.py
@@ -35,7 +35,7 @@ class PaddleSegModel(FastDeployModel):
        """
        super(PaddleSegModel, self).__init__(runtime_option)
-        assert model_format == ModelFormat.PADDLE, "PaddleSeg only support model format of ModelFormat.Paddle now."
+        # assert model_format == ModelFormat.PADDLE, "PaddleSeg only support model format of ModelFormat.Paddle now."
        self._model = C.vision.segmentation.PaddleSegModel(
            model_file, params_file, config_file, self._runtime_option,
            model_format)
@@ -49,6 +49,9 @@ class PaddleSegModel(FastDeployModel):
        """
        return self._model.predict(input_image)
    def disable_normalize_and_permute(self):
        return self._model.disable_normalize_and_permute()
    @property
    def apply_softmax(self):
        """Atrribute of PaddleSeg model. Stating Whether applying softmax operator in the postprocess, default value is False
--- a/python/setup.py
+++ b/python/setup.py
@@ -48,6 +48,8 @@ with open(os.path.join(TOP_DIR, "python", "requirements.txt")) as fin:
 setup_configs = dict()
 setup_configs["ENABLE_PADDLE_FRONTEND"] = os.getenv("ENABLE_PADDLE_FRONTEND",
                                                    "ON")
 setup_configs["ENABLE_RKNPU2_BACKEND"] = os.getenv("ENABLE_RKNPU2_BACKEND",
                                                    "OFF")
 setup_configs["ENABLE_ORT_BACKEND"] = os.getenv("ENABLE_ORT_BACKEND", "OFF")
 setup_configs["ENABLE_OPENVINO_BACKEND"] = os.getenv("ENABLE_OPENVINO_BACKEND",
                                                     "OFF")
@@ -69,6 +71,7 @@ setup_configs["LIBRARY_NAME"] = PACKAGE_NAME
 setup_configs["PY_LIBRARY_NAME"] = PACKAGE_NAME + "_main"
 setup_configs["OPENCV_DIRECTORY"] = os.getenv("OPENCV_DIRECTORY", "")
 setup_configs["ORT_DIRECTORY"] = os.getenv("ORT_DIRECTORY", "")
 setup_configs["RKNN2_TARGET_SOC"] = os.getenv("RKNN2_TARGET_SOC", "")
 if setup_configs["WITH_GPU"] == "ON" or setup_configs[
        "BUILD_ON_JETSON"] == "ON":
--- a/tools/rknpu2/config/ppseg_config.yaml
+++ b/tools/rknpu2/config/ppseg_config.yaml
@@ -0,0 +1,7 @@
 model_path: ./portrait_pp_humansegv2_lite_256x144_pretrained.onnx
 output_folder: ./
 target_platform: RK3588
 normalize:
  mean: [0.5,0.5,0.5]
  std: [0.5,0.5,0.5]
 outputs: None
--- a/tools/rknpu2/export.py
+++ b/tools/rknpu2/export.py
@@ -0,0 +1,75 @@
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 import yaml
 import argparse
 from rknn.api import RKNN
 def get_config():
    parser = argparse.ArgumentParser()
    parser.add_argument("--verbose", default=True, help="rknntoolkit verbose")
    parser.add_argument("--config_path")
    args = parser.parse_args()
    return args
 if __name__ == "__main__":
    config = get_config()
    with open(config.config_path) as file:
        file_data = file.read()
        yaml_config = yaml.safe_load(file_data)
    print(yaml_config)
    model = RKNN(config.verbose)
    # Config
    mean_values = [[255 * mean for mean in yaml_config["normalize"]["mean"]]]
    std_values = [[255 * std for std in yaml_config["normalize"]["std"]]]
    model.config(mean_values=mean_values,
                 std_values=std_values,
                 target_platform=yaml_config["target_platform"])
    # Load ONNX model
    print(type(yaml_config["outputs"]))
    print("yaml_config[\"outputs\"] = ", yaml_config["outputs"])
    if yaml_config["outputs"] == "None":
        ret = model.load_onnx(model=yaml_config["model_path"])
    else:
        ret = model.load_onnx(model=yaml_config["model_path"],
                              outputs=yaml_config["outputs"])
    assert ret == 0, "Load model failed!"
    # Build model
    ret = model.build(do_quantization=None)
    assert ret == 0, "Build model failed!"
    # Init Runtime
    ret = model.init_runtime()
    assert ret == 0, "Init runtime environment failed!"
    # Export
    if not os.path.exists(yaml_config["output_folder"]):
        os.mkdir(yaml_config["output_folder"])
    model_base_name = os.path.basename(yaml_config["model_path"]).split(".")[0]
    model_device_name = yaml_config["target_platform"].lower()
    model_save_name = model_base_name + "_" + model_device_name + ".rknn"
    ret = model.export_rknn(
        os.path.join(yaml_config["output_folder"], model_save_name))
    assert ret == 0, "Export rknn model failed!"
    print("Export OK!")