From 4ffcfbe7260e15b19ca6871126f5e6e4e3b94ee2 Mon Sep 17 00:00:00 2001 From: Zheng_Bicheng <58363586+Zheng-Bicheng@users.noreply.github.com> Date: Tue, 1 Nov 2022 11:14:05 +0800 Subject: [PATCH] [Backend] Add RKNPU2 backend support (#456) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 10-29/14:05 * 新增cmake * 新增rknpu2 backend * 10-29/14:43 * Runtime fd_type新增RKNPU代码 * 10-29/15:02 * 新增ppseg RKNPU2推理代码 * 10-29/15:46 * 新增ppseg RKNPU2 cpp example代码 * 10-29/15:51 * 新增README文档 * 10-29/15:51 * 按照要求修改部分注释以及变量名称 * 10-29/15:51 * 修复重命名之后,cc文件中的部分代码还用旧函数名的bug * 10-29/22:32 * str(Device::NPU)将输出NPU而不是UNKOWN * 修改runtime文件中的注释格式 * 新增Building Summary ENABLE_RKNPU2_BACKEND输出 * pybind新增支持rknpu2 * 新增python编译选项 * 新增PPSeg Python代码 * 新增以及更新各种文档 * 10-30/14:11 * 尝试修复编译cuda时产生的错误 * 10-30/19:27 * 修改CpuName和CoreMask层级 * 修改ppseg rknn推理层级 * 图片将移动到网络进行下载 * 10-30/19:39 * 更新文档 * 10-30/19:39 * 更新文档 * 更新ppseg rknpu2 example中的函数命名方式 * 更新ppseg rknpu2 example为一个cc文件 * 修复disable_normalize_and_permute部分的逻辑错误 * 移除rknpu2初始化时的无用参数 * 10-30/19:39 * 尝试重置python代码 * 10-30/10:16 * rknpu2_config.h文件不再包含rknn_api头文件防止出现导入错误的问题 * 10-31/14:31 * 修改pybind,支持最新的rknpu2 backends * 再次支持ppseg python推理 * 移动cpuname 和 coremask的层级 * 10-31/15:35 * 尝试修复rknpu2导入错误 * 10-31/19:00 * 新增RKNPU2模型导出代码以及其对应的文档 * 更新大量文档错误 * 10-31/19:00 * 现在编译完fastdeploy仓库后无需重新设置RKNN2_TARGET_SOC * 10-31/19:26 * 修改部分错误文档 * 10-31/19:26 * 修复错误删除的部分 * 修复各种错误文档 * 修复FastDeploy.cmake在设置RKNN2_TARGET_SOC错误时,提示错误的信息 * 修复rknpu2_backend.cc中存在的中文注释 * 10-31/20:45 * 删除无用的注释 * 10-31/20:45 * 按照要求修改Device::NPU为Device::RKNPU,硬件将共用valid_hardware_backends * 删除无用注释以及debug代码 * 11-01/09:45 * 更新变量命名方式 * 11-01/10:16 * 修改部分文档,修改函数命名方式 Co-authored-by: Jason --- CMakeLists.txt | 11 +- FastDeploy.cmake.in | 15 + cmake/rknpu2.cmake | 26 ++ cmake/summary.cmake | 1 + docs/cn/build_and_install/README.md | 32 +- docs/cn/build_and_install/rknpu2.md | 102 +++++ docs/cn/faq/rknpu2/export.md | 48 ++ docs/cn/faq/rknpu2/install_rknn_toolkit2.md | 49 ++ docs/cn/faq/rknpu2/rknpu2.md | 64 +++ .../segmentation/paddleseg/rknpu2/README.md | 52 +++ .../paddleseg/rknpu2/cpp/CMakeLists.txt | 36 ++ .../paddleseg/rknpu2/cpp/README.md | 84 ++++ .../paddleseg/rknpu2/cpp/infer.cc | 84 ++++ .../paddleseg/rknpu2/python/README.md | 44 ++ .../paddleseg/rknpu2/python/infer.py | 44 ++ .../backends/rknpu/rknpu2/rknpu2_backend.cc | 425 ++++++++++++++++++ .../backends/rknpu/rknpu2/rknpu2_backend.h | 96 ++++ .../backends/rknpu/rknpu2/rknpu2_config.h | 40 ++ fastdeploy/core/fd_type.cc | 6 + fastdeploy/core/fd_type.h | 2 +- fastdeploy/fastdeploy_model.cc | 38 +- fastdeploy/fastdeploy_model.h | 8 + fastdeploy/pybind/main.cc.in | 3 + fastdeploy/pybind/main.h | 1 + fastdeploy/pybind/rknpu2_config_pybind.cc | 33 ++ fastdeploy/pybind/runtime.cc | 6 +- fastdeploy/runtime.cc | 81 +++- fastdeploy/runtime.h | 67 +-- fastdeploy/vision/segmentation/ppseg/model.cc | 34 +- fastdeploy/vision/segmentation/ppseg/model.h | 6 + .../vision/segmentation/ppseg/ppseg_pybind.cc | 1 + python/fastdeploy/__init__.py | 5 +- python/fastdeploy/runtime.py | 7 +- .../vision/segmentation/ppseg/__init__.py | 5 +- python/setup.py | 3 + tools/rknpu2/config/ppseg_config.yaml | 7 + tools/rknpu2/export.py | 75 ++++ 37 files changed, 1567 insertions(+), 74 deletions(-) create mode 100644 cmake/rknpu2.cmake create mode 100644 docs/cn/build_and_install/rknpu2.md create mode 100644 docs/cn/faq/rknpu2/export.md create mode 100644 docs/cn/faq/rknpu2/install_rknn_toolkit2.md create mode 100644 docs/cn/faq/rknpu2/rknpu2.md create mode 100644 examples/vision/segmentation/paddleseg/rknpu2/README.md create mode 100644 examples/vision/segmentation/paddleseg/rknpu2/cpp/CMakeLists.txt create mode 100644 examples/vision/segmentation/paddleseg/rknpu2/cpp/README.md create mode 100644 examples/vision/segmentation/paddleseg/rknpu2/cpp/infer.cc create mode 100644 examples/vision/segmentation/paddleseg/rknpu2/python/README.md create mode 100644 examples/vision/segmentation/paddleseg/rknpu2/python/infer.py create mode 100644 fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.cc create mode 100644 fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h create mode 100644 fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h create mode 100644 fastdeploy/pybind/rknpu2_config_pybind.cc create mode 100644 tools/rknpu2/config/ppseg_config.yaml create mode 100644 tools/rknpu2/export.py diff --git a/CMakeLists.txt b/CMakeLists.txt index ded88c54e..ce6b1920d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -58,6 +58,7 @@ option(ENABLE_TRT_BACKEND "Whether to enable tensorrt backend." OFF) option(ENABLE_PADDLE_BACKEND "Whether to enable paddle backend." OFF) option(ENABLE_POROS_BACKEND "Whether to enable poros backend." OFF) option(ENABLE_OPENVINO_BACKEND "Whether to enable openvino backend." OFF) +option(ENABLE_RKNPU2_BACKEND "Whether to enable RKNPU2 backend." OFF) option(ENABLE_LITE_BACKEND "Whether to enable paddle lite backend." OFF) option(ENABLE_VISION "Whether to enable vision models usage." OFF) option(ENABLE_TEXT "Whether to enable text models usage." OFF) @@ -164,13 +165,14 @@ file(GLOB_RECURSE DEPLOY_PADDLE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fas file(GLOB_RECURSE DEPLOY_POROS_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/poros/*.cc) file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/tensorrt/*.cpp) file(GLOB_RECURSE DEPLOY_OPENVINO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/openvino/*.cc) +file(GLOB_RECURSE DEPLOY_RKNPU2_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/rknpu/rknpu2/*.cc) file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/backends/lite/*.cc) file(GLOB_RECURSE DEPLOY_VISION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cc) file(GLOB_RECURSE DEPLOY_PIPELINE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pipeline/*.cc) file(GLOB_RECURSE DEPLOY_VISION_CUDA_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/vision/*.cu) file(GLOB_RECURSE DEPLOY_TEXT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/text/*.cc) file(GLOB_RECURSE DEPLOY_PYBIND_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/*_pybind.cc) -list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} ${DEPLOY_POROS_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_OPENVINO_SRCS} ${DEPLOY_LITE_SRCS} ${DEPLOY_VISION_SRCS} ${DEPLOY_TEXT_SRCS} ${DEPLOY_PIPELINE_SRCS}) +list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS} ${DEPLOY_POROS_SRCS} ${DEPLOY_TRT_SRCS} ${DEPLOY_OPENVINO_SRCS} ${DEPLOY_LITE_SRCS} ${DEPLOY_VISION_SRCS} ${DEPLOY_TEXT_SRCS} ${DEPLOY_PIPELINE_SRCS} ${DEPLOY_RKNPU2_SRCS}) set(DEPEND_LIBS "") @@ -227,6 +229,13 @@ if(ENABLE_OPENVINO_BACKEND) include(${PROJECT_SOURCE_DIR}/cmake/openvino.cmake) endif() +if(ENABLE_RKNPU2_BACKEND) + add_definitions(-DENABLE_RKNPU2_BACKEND) + list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_RKNPU2_SRCS}) + include(${PROJECT_SOURCE_DIR}/cmake/rknpu2.cmake) + list(APPEND DEPEND_LIBS ${RKNN_RT_LIB}) +endif() + if(ENABLE_POROS_BACKEND) set(CMAKE_CXX_STANDARD 14) add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) diff --git a/FastDeploy.cmake.in b/FastDeploy.cmake.in index e1a1cf2f4..1074c86ad 100755 --- a/FastDeploy.cmake.in +++ b/FastDeploy.cmake.in @@ -2,6 +2,7 @@ CMAKE_MINIMUM_REQUIRED(VERSION 3.8) set(WITH_GPU @WITH_GPU@) set(ENABLE_ORT_BACKEND @ENABLE_ORT_BACKEND@) +set(ENABLE_RKNPU2_BACKEND @ENABLE_RKNPU2_BACKEND@) set(ENABLE_LITE_BACKEND @ENABLE_LITE_BACKEND@) set(ENABLE_PADDLE_BACKEND @ENABLE_PADDLE_BACKEND@) set(ENABLE_OPENVINO_BACKEND @ENABLE_OPENVINO_BACKEND@) @@ -27,6 +28,7 @@ set(LIBRARY_NAME @LIBRARY_NAME@) set(OPENCV_DIRECTORY "@OPENCV_DIRECTORY@") set(ORT_DIRECTORY "@ORT_DIRECTORY@") set(OPENVINO_DIRECTORY "@OPENVINO_DIRECTORY@") +set(RKNN2_TARGET_SOC "@RKNN2_TARGET_SOC@") set(FASTDEPLOY_LIBS "") set(FASTDEPLOY_INCS "") @@ -88,6 +90,18 @@ if(ENABLE_OPENVINO_BACKEND) list(APPEND FASTDEPLOY_LIBS ${OPENVINO_LIBS}) endif() +if(ENABLE_RKNPU2_BACKEND) + if(RKNN2_TARGET_SOC STREQUAL "RK356X") + set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/RK356X/lib/librknn_api.so) + elseif (RKNN2_TARGET_SOC STREQUAL "RK3588") + set(RKNPU2_LIB ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/rknpu2_runtime/RK3588/lib/librknn_api.so) + else () + message(FATAL_ERROR "RKNN2_TARGET_SOC is not set, ref value: RK356X or RK3588") + endif() + message(STATUS "The path of RKNPU2 is ${RKNPU2_LIB}.") + list(APPEND FASTDEPLOY_LIBS ${RKNPU2_LIB}) +endif() + if(ENABLE_LITE_BACKEND) set(LITE_DIR ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/${PADDLELITE_FILENAME}) if(ANDROID) @@ -234,6 +248,7 @@ message(STATUS " C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}") message(STATUS " CXX flags : ${CMAKE_CXX_FLAGS}") message(STATUS " WITH_GPU : ${WITH_GPU}") message(STATUS " ENABLE_ORT_BACKEND : ${ENABLE_ORT_BACKEND}") +message(STATUS " ENABLE_RKNPU2_BACKEND : ${ENABLE_RKNPU2_BACKEND}") message(STATUS " ENABLE_PADDLE_BACKEND : ${ENABLE_PADDLE_BACKEND}") message(STATUS " ENABLE_POROS_BACKEND : ${ENABLE_POROS_BACKEND}") message(STATUS " ENABLE_OPENVINO_BACKEND : ${ENABLE_OPENVINO_BACKEND}") diff --git a/cmake/rknpu2.cmake b/cmake/rknpu2.cmake new file mode 100644 index 000000000..e8ed424be --- /dev/null +++ b/cmake/rknpu2.cmake @@ -0,0 +1,26 @@ +# get RKNPU2_URL +set(RKNPU2_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/") +set(RKNPU2_VERSION "1.4.0") +set(RKNPU2_FILE "rknpu2_runtime-linux-x64-${RKNPU2_VERSION}.tgz") +set(RKNPU2_URL "${RKNPU2_URL_BASE}${RKNPU2_FILE}") + +# download_and_decompress +download_and_decompress(${RKNPU2_URL} ${CMAKE_CURRENT_BINARY_DIR}/${RKNPU2_FILE} ${THIRD_PARTY_PATH}/install/) + +# set path +set(RKNPU_RUNTIME_PATH ${THIRD_PARTY_PATH}/install/rknpu2_runtime) + +if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") +else () + message(FATAL_ERROR "[rknpu2.cmake] Only support build rknpu2 in Linux") +endif () + + +if (EXISTS ${RKNPU_RUNTIME_PATH}) + set(RKNN_RT_LIB ${RKNPU_RUNTIME_PATH}/${RKNN2_TARGET_SOC}/lib/librknnrt.so) + include_directories(${RKNPU_RUNTIME_PATH}/${RKNN2_TARGET_SOC}/include) +else () + message(FATAL_ERROR "[rknpu2.cmake] download_and_decompress rknpu2_runtime error") +endif () + + diff --git a/cmake/summary.cmake b/cmake/summary.cmake index fb4c534c6..9a545441f 100755 --- a/cmake/summary.cmake +++ b/cmake/summary.cmake @@ -31,6 +31,7 @@ function(fastdeploy_summary) message(STATUS " FastDeploy version : ${FASTDEPLOY_VERSION}") message(STATUS " Paddle2ONNX version : ${PADDLE2ONNX_VERSION}") message(STATUS " ENABLE_ORT_BACKEND : ${ENABLE_ORT_BACKEND}") + message(STATUS " ENABLE_RKNPU2_BACKEND : ${ENABLE_RKNPU2_BACKEND}") message(STATUS " ENABLE_PADDLE_BACKEND : ${ENABLE_PADDLE_BACKEND}") message(STATUS " ENABLE_POROS_BACKEND : ${ENABLE_POROS_BACKEND}") message(STATUS " ENABLE_TRT_BACKEND : ${ENABLE_TRT_BACKEND}") diff --git a/docs/cn/build_and_install/README.md b/docs/cn/build_and_install/README.md index 1b4bc21bb..43b98602d 100644 --- a/docs/cn/build_and_install/README.md +++ b/docs/cn/build_and_install/README.md @@ -9,18 +9,20 @@ ## FastDeploy编译选项说明 -| 选项 | 说明 | -| :--- | :---- | -| ENABLE_ORT_BACKEND | 默认OFF, 是否编译集成ONNX Runtime后端(CPU/GPU上推荐打开) | -| ENABLE_PADDLE_BACKEND | 默认OFF,是否编译集成Paddle Inference后端(CPU/GPU上推荐打开) | -| ENABLE_LITE_BACKEND | 默认OFF,是否编译集成Paddle Lite后端(编译Android库时需要设置为ON) | -| ENABLE_TRT_BACKEND | 默认OFF,是否编译集成TensorRT后端(GPU上推荐打开) | -| ENABLE_OPENVINO_BACKEND | 默认OFF,是否编译集成OpenVINO后端(CPU上推荐打开) | -| ENABLE_VISION | 默认OFF,是否编译集成视觉模型的部署模块 | -| ENABLE_TEXT | 默认OFF,是否编译集成文本NLP模型的部署模块 | -| WITH_GPU | 默认OFF, 当需要在GPU上部署时,需设置为ON | -| CUDA_DIRECTORY | 默认/usr/local/cuda, 当需要在GPU上部署时,用于指定CUDA(>=11.2)的路径 | -| TRT_DIRECTORY | 当开启TensorRT后端时,必须通过此开关指定TensorRT(>=8.4)的路径 | -| ORT_DIRECTORY | 当开启ONNX Runtime后端时,用于指定用户本地的ONNX Runtime库路径;如果不指定,编译过程会自动下载ONNX Runtime库 | -| OPENCV_DIRECTORY | 当ENABLE_VISION=ON时,用于指定用户本地的OpenCV库路径;如果不指定,编译过程会自动下载OpenCV库 | -| OPENVINO_DIRECTORY | 当开启OpenVINO后端时, 用于指定用户本地的OpenVINO库路径;如果不指定,编译过程会自动下载OpenVINO库 | +| 选项 | 说明 | +|:------------------------|:--------------------------------------------------------------------------| +| ENABLE_ORT_BACKEND | 默认OFF, 是否编译集成ONNX Runtime后端(CPU/GPU上推荐打开) | +| ENABLE_PADDLE_BACKEND | 默认OFF,是否编译集成Paddle Inference后端(CPU/GPU上推荐打开) | +| ENABLE_LITE_BACKEND | 默认OFF,是否编译集成Paddle Lite后端(编译Android库时需要设置为ON) | +| ENABLE_RKNPU2_BACKEND | 默认OFF,是否编译集成RKNPU2后端(RK3588/RK3568/RK3566上推荐打开) | +| ENABLE_TRT_BACKEND | 默认OFF,是否编译集成TensorRT后端(GPU上推荐打开) | +| ENABLE_OPENVINO_BACKEND | 默认OFF,是否编译集成OpenVINO后端(CPU上推荐打开) | +| ENABLE_VISION | 默认OFF,是否编译集成视觉模型的部署模块 | +| ENABLE_TEXT | 默认OFF,是否编译集成文本NLP模型的部署模块 | +| WITH_GPU | 默认OFF, 当需要在GPU上部署时,需设置为ON | +| RKNN2_TARGET_SOC | ENABLE_RKNPU2_BACKEND时才需要使用这个编译选项。无默认值, 可输入值为RK3588/RK356X, 必须填入,否则 将编译失败 | +| CUDA_DIRECTORY | 默认/usr/local/cuda, 当需要在GPU上部署时,用于指定CUDA(>=11.2)的路径 | +| TRT_DIRECTORY | 当开启TensorRT后端时,必须通过此开关指定TensorRT(>=8.4)的路径 | +| ORT_DIRECTORY | 当开启ONNX Runtime后端时,用于指定用户本地的ONNX Runtime库路径;如果不指定,编译过程会自动下载ONNX Runtime库 | +| OPENCV_DIRECTORY | 当ENABLE_VISION=ON时,用于指定用户本地的OpenCV库路径;如果不指定,编译过程会自动下载OpenCV库 | +| OPENVINO_DIRECTORY | 当开启OpenVINO后端时, 用于指定用户本地的OpenVINO库路径;如果不指定,编译过程会自动下载OpenVINO库 | diff --git a/docs/cn/build_and_install/rknpu2.md b/docs/cn/build_and_install/rknpu2.md new file mode 100644 index 000000000..61057252c --- /dev/null +++ b/docs/cn/build_and_install/rknpu2.md @@ -0,0 +1,102 @@ +# RK2代NPU部署库编译 + +## 写在前面 +FastDeploy已经初步支持RKNPU2的部署。使用的过程中,如果出现Bug请提Issues反馈。 + +## 简介 +FastDeploy当前在RK平台上支持后端引擎如下: + +| 后端 | 平台 | 支持模型格式 | 说明 | +|:------------------|:---------------------|:-------|:-------------------------------------------| +| ONNX Runtime | RK356X
RK3588 | ONNX | 编译开关`ENABLE_ORT_BACKEND`为ON或OFF控制,默认OFF | +| RKNPU2 | RK356X
RK3588 | RKNN | 编译开关`ENABLE_RKNPU2_BACKEND`为ON或OFF控制,默认OFF | + + +## C++ SDK编译安装 + +RKNPU2仅支持linux下进行编译,以下教程均在linux环境下完成。 + +### 更新驱动和安装编译时需要的环境 + + +在运行代码之前,我们需要安装以下最新的RKNPU驱动,目前驱动更新至1.4.0。为了简化安装我编写了快速安装脚本,一键即可进行安装。 + +**方法1: 通过脚本安装** +```bash +# 下载解压rknpu2_device_install_1.4.0 +wget https://bj.bcebos.com/fastdeploy/third_libs/rknpu2_device_install_1.4.0.zip +unzip rknpu2_device_install_1.4.0.zip + +cd rknpu2_device_install_1.4.0 +# RK3588运行以下代码 +sudo rknn_install_rk3588.sh +# RK356X运行以下代码 +sudo rknn_install_rk356X.sh +``` + +**方法2: 通过gitee安装** +```bash +# 安装必备的包 +sudo apt update -y +sudo apt install -y python3 +sudo apt install -y python3-dev +sudo apt install -y python3-pip +sudo apt install -y gcc +sudo apt install -y python3-opencv +sudo apt install -y python3-numpy +sudo apt install -y cmake + +# 下载rknpu2 +# RK3588运行以下代码 +git clone https://gitee.com/mirrors_rockchip-linux/rknpu2.git +sudo cp ./rknpu2/runtime/RK3588/Linux/librknn_api/aarch64/* /usr/lib +sudo cp ./rknpu2/runtime/RK3588/Linux/rknn_server/aarch64/usr/bin/* /usr/bin/ + +# RK356X运行以下代码 +git clone https://gitee.com/mirrors_rockchip-linux/rknpu2.git +sudo cp ./rknpu2/runtime/RK356X/Linux/librknn_api/aarch64/* /usr/lib +sudo cp ./rknpu2/runtime/RK356X/Linux/rknn_server/aarch64/usr/bin/* /usr/bin/ +``` + +### 编译C++ SDK + +```bash +git clone https://github.com/PaddlePaddle/FastDeploy.git +cd FastDeploy +mkdir build && cd build + +# 编译配置详情见README文件,这里只介绍关键的几个配置 +# -DENABLE_ORT_BACKEND: 是否开启ONNX模型,默认关闭 +# -DENABLE_RKNPU2_BACKEND: 是否开启RKNPU模型,默认关闭 +# -RKNN2_TARGET_SOC: 编译SDK的板子型号,只能输入RK356X或者RK3588,注意区分大小写 +cmake .. -DENABLE_ORT_BACKEND=ON \ + -DENABLE_RKNPU2_BACKEND=ON \ + -DENABLE_VISION=ON \ + -DRKNN2_TARGET_SOC=RK3588 \ + -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 +make -j8 +make install +``` + +### 编译Python SDK + +```bash +git clone https://github.com/PaddlePaddle/FastDeploy.git +cd FastDeploy +cd python + +export ENABLE_ORT_BACKEND=ON +export ENABLE_RKNPU2_BACKEND=ON +export ENABLE_VISION=ON +export RKNN2_TARGET_SOC=RK3588 +python3 setup.py build +python3 setup.py bdist_wheel + +cd dist + +pip3 install fastdeploy_python-0.0.0-cp39-cp39-linux_aarch64.whl +``` + +## 部署模型 + +请查看[RKNPU2部署模型教程](../faq/rknpu2/rknpu2.md) \ No newline at end of file diff --git a/docs/cn/faq/rknpu2/export.md b/docs/cn/faq/rknpu2/export.md new file mode 100644 index 000000000..9399c78d5 --- /dev/null +++ b/docs/cn/faq/rknpu2/export.md @@ -0,0 +1,48 @@ +# 导出模型指南 + +## 简介 + +Fastdeploy已经简单的集成了onnx->rknn的转换过程。本教程使用tools/export.py文件导出模型,在导出之前需要编写yaml配置文件。 +在进行转换前请根据[rknn_toolkit2安装文档](./install_rknn_toolkit2.md)检查环境是否已经安装成功。 + + +## export.py 配置参数介绍 + +| 参数名称 | 是否可以为空 | 参数作用 | +|-----------------|------------|--------------------| +| verbose | 是,默认值为True | 是否在屏幕上输出转换模型时的具体信息 | +| config_path | 否 | 配置文件路径 | + +## config 配置文件介绍 + +### config yaml文件模版 + +```yaml +model_path: ./portrait_pp_humansegv2_lite_256x144_pretrained.onnx +output_folder: ./ +target_platform: RK3588 +normalize: + mean: [0.5,0.5,0.5] + std: [0.5,0.5,0.5] +outputs: None +``` + +### config 配置参数介绍 +* model_path: 模型储存路径 +* output_folder: 模型储存文件夹名字 +* target_platform: 模型跑在哪一个设备上,只能为RK3588或RK3568 +* normalize: 配置在NPU上的normalize操作,有std和mean两个参数 + * std: 如果在外部做normalize操作,请配置为[1/255,1/255,1/255] + * mean: 如果在外部做normalize操作,请配置为[0,0,0] +* outputs: 输出节点列表,如果使用默认输出节点,请配置为None + +## 如何转换模型 +根目录下执行以下代码 + +```bash +python tools/export.py --config_path=./config.yaml +``` + +## 模型导出要注意的事项 + +* 请不要导出带softmax和argmax的模型,这两个算子存在bug,请在外部进行运算 \ No newline at end of file diff --git a/docs/cn/faq/rknpu2/install_rknn_toolkit2.md b/docs/cn/faq/rknpu2/install_rknn_toolkit2.md new file mode 100644 index 000000000..ebebdeb6d --- /dev/null +++ b/docs/cn/faq/rknpu2/install_rknn_toolkit2.md @@ -0,0 +1,49 @@ +# 安装rknn_toolkit2仓库 + +## 下载rknn_toolkit2 + +rknn_toolkit2的下载一般有两种方式,以下将一一介绍: + +* github仓库下载 + + github仓库中提供了稳定版本的rknn_toolkit2下载 + ```bash + git clone https://github.com/rockchip-linux/rknn-toolkit2.git + ``` + +* 百度网盘下载 + + 在有些时候,如果稳定版本的rknn_toolkit2存在bug,不满足模型部署的要求,我们也可以使用百度网盘下载beta版本的rknn_toolkit2使用。其安装方式与 + 稳定版本一致 + ```text + 链接:https://eyun.baidu.com/s/3eTDMk6Y 密码:rknn + ``` + +## 安装rknn_toolkit2 + +安装rknn_toolkit2中会存在依赖问题,这里介绍以下如何安装。首先,因为rknn_toolkit2依赖一些特定的包,因此建议使用conda新建一个虚拟环境进行安装。 +安装conda的方法百度有很多,这里跳过,直接介绍如何安装rknn_toolkit2。 + + +### 下载安装需要的软件包 +```bash +sudo apt-get install libxslt1-dev zlib1g zlib1g-dev libglib2.0-0 \ +libsm6 libgl1-mesa-glx libprotobuf-dev gcc g++ +``` + +### 安装rknn_toolkit2环境 +```bash +# 创建虚拟环境 +conda create -n rknn2 python=3.6 +conda activate rknn2 + +# rknn_toolkit2对numpy存在特定依赖,因此需要先安装numpy==1.16.6 +pip install numpy==1.16.6 + +# 安装rknn_toolkit2-1.3.0_11912b58-cp38-cp38-linux_x86_64.whl +cd ~/下载/rknn-toolkit2-master/packages +pip install rknn_toolkit2-1.3.0_11912b58-cp38-cp38-linux_x86_64.whl +``` + +## 其他文档 +- [onnx转换rknn文档](./export.md) \ No newline at end of file diff --git a/docs/cn/faq/rknpu2/rknpu2.md b/docs/cn/faq/rknpu2/rknpu2.md new file mode 100644 index 000000000..1e40585d1 --- /dev/null +++ b/docs/cn/faq/rknpu2/rknpu2.md @@ -0,0 +1,64 @@ +# RKNPU2模型部署 + +## ONNX模型转换为RKNN模型 +ONNX模型不能直接调用RK芯片中的NPU进行运算,需要把ONNX模型转换为RKNN模型,具体流程请查看[转换文档](./export.md) + +## RKNPU2已经支持的模型列表 + +| 任务场景 | 模型 | 模型版本(表示已经测试的版本) | 大小 | ONNX/RKNN是否支持 | ONNX/RKNN速度(ms) | +|------------------|-------------------|--------------------------------------------------------------------------------------------------------------------------------------------|-----|---------------|-----------------| +| Detection | Picodet | [Picodet-s-npu](https://bj.bcebos.com/fastdeploy/models/rknn2/picodet_s_416_coco_npu_3588.tgz) | - | True/True | 454/177 | +| Segmentation | PP-LiteSeg | [PP_LiteSeg_T_STDC1_cityscapes](https://bj.bcebos.com/fastdeploy/models/rknn2/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_3588.tgz) | - | True/True | 6634/5598 | +| Segmentation | PP-HumanSegV2Lite | [portrait](https://bj.bcebos.com/fastdeploy/models/rknn2/portrait_pp_humansegv2_lite_256x144_inference_model_without_softmax_3588.tgz) | - | True/True | 456/266 | +| Segmentation | PP-HumanSegV2Lite | [human](https://bj.bcebos.com/fastdeploy/models/rknn2/human_pp_humansegv2_lite_192x192_pretrained_3588.tgz) | - | True/True | 496/256 | +| Face Detection | SCRFD | [SCRFD-2.5G-kps-640](https://bj.bcebos.com/fastdeploy/models/rknn2/scrfd_2.5g_bnkps_shape640x640.rknn) | - | True/True | 963/142 | +| Face Recognition | ArcFace | [ArcFace_r18](https://bj.bcebos.com/fastdeploy/models/rknn2/new_ms1mv3_arcface_r18.rknn) | - | True/True | 600/3 | +| Face Recognition | cosFace | [cosFace_r18](https://bj.bcebos.com/fastdeploy/models/rknn2/new_glint360k_cosface_r18.rknn) | - | True/True | 600/3 | + +## RKNPU2 Backend推理使用教程 + +这里以Scrfd模型为例子教你如何使用RKNPU2 Backend推理模型。以下注释中的改动,是对比onnx cpu的改动。 + +```c++ +int infer_scrfd_npu() { + char model_path[] = "./model/scrfd_2.5g_bnkps_shape640x640.rknn"; + char image_file[] = "./image/test_lite_face_detector_3.jpg"; + auto option = fastdeploy::RuntimeOption(); + // 改动1: option需要调用UseRKNPU2 + option.UseRKNPU2(); + + // 改动2: 模型加载时需要传递fastdeploy::ModelFormat::RKNN参数 + auto *model = new fastdeploy::vision::facedet::SCRFD(model_path,"",option,fastdeploy::ModelFormat::RKNN); + if (!model->Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return 0; + } + + // 改动3(可选): RKNPU2支持使用NPU进行normalize操作,并且输入格式为nhwc格式。 + // DisableNormalizeAndPermute操作将屏蔽预处理时的nor操作和hwc转chw操作。 + // 如果你使用的是已经支持的模型列表,请在Predict前调用该方法。 + model->DisableNormalizeAndPermute(); + auto im = cv::imread(image_file); + auto im_bak = im.clone(); + fastdeploy::vision::FaceDetectionResult res; + clock_t start = clock(); + if (!model->Predict(&im, &res, 0.8, 0.8)) { + std::cerr << "Failed to predict." << std::endl; + return 0; + } + clock_t end = clock(); + double dur = (double) (end - start); + printf("infer_scrfd_npu use time:%f\n", (dur / CLOCKS_PER_SEC)); + auto vis_im = fastdeploy::vision::Visualize::VisFaceDetection(im_bak, res); + cv::imwrite("scrfd_rknn_vis_result.jpg", vis_im); + std::cout << "Visualized result saved in ./scrfd_rknn_vis_result.jpg" << std::endl; + return 0; +} +``` + + +## 其他关联文档 +- [rknpu2板端环境安装配置](../../build_and_install/rknpu2.md) +- [rknn_toolkit2安装文档](./install_rknn_toolkit2.md) +- [onnx转换rknn文档](./export.md) + diff --git a/examples/vision/segmentation/paddleseg/rknpu2/README.md b/examples/vision/segmentation/paddleseg/rknpu2/README.md new file mode 100644 index 000000000..5a96661b1 --- /dev/null +++ b/examples/vision/segmentation/paddleseg/rknpu2/README.md @@ -0,0 +1,52 @@ +# PaddleSeg 模型部署 + +## 模型版本说明 + +- [PaddleSeg develop](https://github.com/PaddlePaddle/PaddleSeg/tree/develop) + +目前FastDeploy支持如下模型的部署 + +- [U-Net系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/unet/README.md) +- [PP-LiteSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/pp_liteseg/README.md) +- [PP-HumanSeg系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/contrib/PP-HumanSeg/README.md) +- [FCN系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/fcn/README.md) +- [DeepLabV3系列模型](https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.6/configs/deeplabv3/README.md) + +【注意】如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../matting) + +## 准备PaddleSeg部署模型以及转换模型 + +RKNPU部署模型前需要将模型转换成RKNN模型,其过程一般可以简化为如下步骤: +* Paddle动态图模型 -> ONNX模型 -> RKNN模型。 + * Paddle动态图模型 转换为 ONNX模型的过程请参考([PaddleSeg模型导出说明](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.6/contrib/PP-HumanSeg))。 + * 对于ONNX模型 转换 RKNN模型的过程,请参考[转换文档](../../../../../docs/cn/faq/rknpu2/export.md)进行转换。 + 以PPHumanSeg为例,在获取到ONNX模型后,其转换为RK3588步骤如下: + * 编写config.yaml文件 + ```yaml + model_path: ./portrait_pp_humansegv2_lite_256x144_pretrained.onnx + output_folder: ./ + target_platform: RK3588 + normalize: + mean: [0.5,0.5,0.5] + std: [0.5,0.5,0.5] + outputs: None + ``` + * 执行转换代码 + ```bash + python /path/to/fastDeploy/toosl/export.py --config_path=/path/to/fastdeploy/tools/rknpu2/config/ppset_config.yaml + ``` + +## 下载预训练模型 + +为了方便开发者的测试,下面提供了PaddleSeg导出的部分模型(导出方式为:**指定**`--input_shape`,**指定**`--output_op none`,**指定**`--without_argmax`),开发者可直接下载使用。 + +| 任务场景 | 模型 | 模型版本(表示已经测试的版本) | 大小 | ONNX/RKNN是否支持 | ONNX/RKNN速度(ms) | +|------------------|-------------------|--------------------------------------------------------------------------------------------------------------------------------------------|-----|---------------|-----------------| +| Segmentation | PP-LiteSeg | [PP_LiteSeg_T_STDC1_cityscapes](https://bj.bcebos.com/fastdeploy/models/rknn2/PP_LiteSeg_T_STDC1_cityscapes_without_argmax_infer_3588.tgz) | - | True/True | 6634/5598 | +| Segmentation | PP-HumanSegV2Lite | [portrait](https://bj.bcebos.com/fastdeploy/models/rknn2/portrait_pp_humansegv2_lite_256x144_inference_model_without_softmax_3588.tgz) | - | True/True | 456/266 | +| Segmentation | PP-HumanSegV2Lite | [human](https://bj.bcebos.com/fastdeploy/models/rknn2/human_pp_humansegv2_lite_192x192_pretrained_3588.tgz) | - | True/True | 496/256 | + +## 详细部署文档 +- [RKNN总体部署教程](../../../../../docs/cn/faq/rknpu2.md) +- [C++部署](cpp) +- [Python部署](python) \ No newline at end of file diff --git a/examples/vision/segmentation/paddleseg/rknpu2/cpp/CMakeLists.txt b/examples/vision/segmentation/paddleseg/rknpu2/cpp/CMakeLists.txt new file mode 100644 index 000000000..28161a83d --- /dev/null +++ b/examples/vision/segmentation/paddleseg/rknpu2/cpp/CMakeLists.txt @@ -0,0 +1,36 @@ +CMAKE_MINIMUM_REQUIRED(VERSION 3.10) +project(rknpu_test) + +set(CMAKE_CXX_STANDARD 14) + +# 指定下载解压后的fastdeploy库路径 +set(FASTDEPLOY_INSTALL_DIR "thirdpartys/fastdeploy-0.0.3") + +include(${FASTDEPLOY_INSTALL_DIR}/FastDeployConfig.cmake) +include_directories(${FastDeploy_INCLUDE_DIRS}) +add_executable(rknpu_test infer.cc) +target_link_libraries(rknpu_test + ${FastDeploy_LIBS} + ) + +set(CMAKE_INSTALL_PREFIX ${CMAKE_SOURCE_DIR}/build/install) + +install(TARGETS rknpu_test DESTINATION ./) + +install(DIRECTORY model DESTINATION ./) +install(DIRECTORY images DESTINATION ./) + +file(GLOB FASTDEPLOY_LIBS ${FASTDEPLOY_INSTALL_DIR}/lib/*) +message("${FASTDEPLOY_LIBS}") +install(PROGRAMS ${FASTDEPLOY_LIBS} DESTINATION lib) + +file(GLOB ONNXRUNTIME_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/onnxruntime/lib/*) +install(PROGRAMS ${ONNXRUNTIME_LIBS} DESTINATION lib) + +install(DIRECTORY ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/opencv/lib DESTINATION ./) + +file(GLOB PADDLETOONNX_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/paddle2onnx/lib/*) +install(PROGRAMS ${PADDLETOONNX_LIBS} DESTINATION lib) + +file(GLOB RKNPU2_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/rknpu2_runtime/RK3588/lib/*) +install(PROGRAMS ${RKNPU2_LIBS} DESTINATION lib) \ No newline at end of file diff --git a/examples/vision/segmentation/paddleseg/rknpu2/cpp/README.md b/examples/vision/segmentation/paddleseg/rknpu2/cpp/README.md new file mode 100644 index 000000000..17defad01 --- /dev/null +++ b/examples/vision/segmentation/paddleseg/rknpu2/cpp/README.md @@ -0,0 +1,84 @@ +# PaddleSeg C++部署示例 + +本目录下用于展示PaddleSeg系列模型在RKNPU2上的部署,以下的部署过程以PPHumanSeg为例子。 + +在部署前,需确认以下两个步骤: + +1. 软硬件环境满足要求 +2. 根据开发环境,下载预编译部署库或者从头编译FastDeploy仓库 + +以上步骤请参考[RK2代NPU部署库编译](../../../../../../docs/cn/build_and_install/rknpu2.md)实现 + +## 生成基本目录文件 + +该例程由以下几个部分组成 +```text +. +├── CMakeLists.txt +├── build # 编译文件夹 +├── image # 存放图片的文件夹 +├── infer_cpu_npu.cc +├── infer_cpu_npu.h +├── main.cc +├── model # 存放模型文件的文件夹 +└── thirdpartys # 存放sdk的文件夹 +``` + +首先需要先生成目录结构 +```bash +mkdir build +mkdir images +mkdir model +mkdir thirdpartys +``` + +## 编译 + +### 编译并拷贝SDK到thirdpartys文件夹 + +请参考[RK2代NPU部署库编译](../../../../../../docs/cn/build_and_install/rknpu2.md)仓库编译SDK,编译完成后,将在build目录下生成 +fastdeploy-0.0.3目录,请移动它至thirdpartys目录下. + +### 拷贝模型文件,以及配置文件至model文件夹 +在Paddle动态图模型 -> Paddle静态图模型 -> ONNX模型的过程中,将生成ONNX文件以及对应的yaml配置文件,请将配置文件存放到model文件夹内。 +转换为RKNN后的模型文件也需要拷贝至model,这里提供了转换好的文件,输入以下命令下载使用(模型文件为RK3588,RK3568需要重新[转换PPSeg RKNN模型](../README.md))。 +```bash +cd model +wget https://bj.bcebos.com/fastdeploy/models/rknn2/human_pp_humansegv2_lite_192x192_pretrained_3588.tgz +tar xvf human_pp_humansegv2_lite_192x192_pretrained_3588.tgz +cp -r ./human_pp_humansegv2_lite_192x192_pretrained_3588 ./model +``` + +### 准备测试图片至image文件夹 +```bash +wget https://paddleseg.bj.bcebos.com/dygraph/pp_humanseg_v2/images.zip +unzip -qo images.zip +``` + +### 编译example + +```bash +cd build +cmake .. +make -j8 +make install +``` + +## 运行例程 + +```bash +cd ./build/install +./rknpu_test +``` + +## 运行结果展示 +运行后将在install文件夹下生成human_pp_humansegv2_lite_npu_result.jpg文件,如下图: +![](https://user-images.githubusercontent.com/58363586/198875853-72821ad1-d4f7-41e3-b616-bef43027de3c.jpg) + +## 注意事项 +RKNPU上对模型的输入要求是使用NHWC格式,且图片归一化操作会在转RKNN模型时,内嵌到模型中,因此我们在使用FastDeploy部署时, +需要先调用DisableNormalizePermute(C++)或`disable_normalize_permute(Python),在预处理阶段禁用归一化以及数据格式的转换。 + +- [模型介绍](../../) +- [Python部署](../python) +- [转换PPSeg RKNN模型文档](../README.md) \ No newline at end of file diff --git a/examples/vision/segmentation/paddleseg/rknpu2/cpp/infer.cc b/examples/vision/segmentation/paddleseg/rknpu2/cpp/infer.cc new file mode 100644 index 000000000..b379a5739 --- /dev/null +++ b/examples/vision/segmentation/paddleseg/rknpu2/cpp/infer.cc @@ -0,0 +1,84 @@ +#include +#include +#include "fastdeploy/vision.h" + +void InferHumanPPHumansegv2Lite(const std::string& device = "cpu"); + +int main() { + InferHumanPPHumansegv2Lite("npu"); + return 0; +} + +fastdeploy::RuntimeOption GetOption(const std::string& device) { + auto option = fastdeploy::RuntimeOption(); + if (device == "npu") { + option.UseRKNPU2(); + } else { + option.UseCpu(); + } + return option; +} + +fastdeploy::ModelFormat GetFormat(const std::string& device) { + auto format = fastdeploy::ModelFormat::ONNX; + if (device == "npu") { + format = fastdeploy::ModelFormat::RKNN; + } else { + format = fastdeploy::ModelFormat::ONNX; + } + return format; +} + +std::string GetModelPath(std::string& model_path, const std::string& device) { + if (device == "npu") { + model_path += "rknn"; + } else { + model_path += "onnx"; + } + return model_path; +} + +void InferHumanPPHumansegv2Lite(const std::string& device) { + std::string model_file = + "./model/human_pp_humansegv2_lite_192x192_pretrained_3588/" + "human_pp_humansegv2_lite_192x192_pretrained_3588."; + std::string params_file; + std::string config_file = + "./model/human_pp_humansegv2_lite_192x192_pretrained_3588/deploy.yaml"; + + fastdeploy::RuntimeOption option = GetOption(device); + fastdeploy::ModelFormat format = GetFormat(device); + model_file = GetModelPath(model_file, device); + auto model = fastdeploy::vision::segmentation::PaddleSegModel( + model_file, params_file, config_file, option, format); + + if (!model.Initialized()) { + std::cerr << "Failed to initialize." << std::endl; + return; + } + auto image_file = + "./images/portrait_heng.jpg"; + auto im = cv::imread(image_file); + + if (device == "npu") { + model.DisableNormalizeAndPermute(); + } + + fastdeploy::vision::SegmentationResult res; + clock_t start = clock(); + if (!model.Predict(&im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + clock_t end = clock(); + auto dur = (double)(end - start); + printf("infer_human_pp_humansegv2_lite_npu use time:%f\n", + (dur / CLOCKS_PER_SEC)); + + std::cout << res.Str() << std::endl; + auto vis_im = fastdeploy::vision::VisSegmentation(im, res); + cv::imwrite("human_pp_humansegv2_lite_npu_result.jpg", vis_im); + std::cout + << "Visualized result saved in ./human_pp_humansegv2_lite_npu_result.jpg" + << std::endl; +} \ No newline at end of file diff --git a/examples/vision/segmentation/paddleseg/rknpu2/python/README.md b/examples/vision/segmentation/paddleseg/rknpu2/python/README.md new file mode 100644 index 000000000..6e8eaf199 --- /dev/null +++ b/examples/vision/segmentation/paddleseg/rknpu2/python/README.md @@ -0,0 +1,44 @@ +# PaddleSeg Python部署示例 + +在部署前,需确认以下两个步骤 + +- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../../../docs/cn/build_and_install/rknpu2.md) + +【注意】如你部署的为**PP-Matting**、**PP-HumanMatting**以及**ModNet**请参考[Matting模型部署](../../../matting) + +本目录下提供`infer.py`快速完成PPHumanseg在RKNPU上部署的示例。执行如下脚本即可完成 + +```bash +# 下载部署示例代码 +git clone https://github.com/PaddlePaddle/FastDeploy.git +cd FastDeploy/examples/vision/segmentation/paddleseg/python + +# 下载模型 +wget https://bj.bcebos.com/fastdeploy/models/rknn2/human_pp_humansegv2_lite_192x192_pretrained_3588.tgz +tar xvf human_pp_humansegv2_lite_192x192_pretrained_3588.tgz + +# 下载图片 +wget https://paddleseg.bj.bcebos.com/dygraph/pp_humanseg_v2/images.zip +unzip images.zip + +# 推理 +python3 infer.py --model_file ./human_pp_humansegv2_lite_192x192_pretrained_3588/human_pp_humansegv2_lite_192x192_pretrained_3588.rknn \ + --config_file ./human_pp_humansegv2_lite_192x192_pretrained_3588/deploy.yaml \ + --image images/portrait_heng.jpg +``` + +运行完成可视化结果如下图所示 +
+ +
+ + +## 注意事项 +RKNPU上对模型的输入要求是使用NHWC格式,且图片归一化操作会在转RKNN模型时,内嵌到模型中,因此我们在使用FastDeploy部署时, +需要先调用DisableNormalizePermute(C++)或`disable_normalize_permute(Python),在预处理阶段禁用归一化以及数据格式的转换。 +## 其它文档 + +- [PaddleSeg 模型介绍](..) +- [PaddleSeg C++部署](../cpp) +- [模型预测结果说明](../../../../../../docs/api/vision_results/) +- [转换PPSeg RKNN模型文档](../README.md) diff --git a/examples/vision/segmentation/paddleseg/rknpu2/python/infer.py b/examples/vision/segmentation/paddleseg/rknpu2/python/infer.py new file mode 100644 index 000000000..2b6034a33 --- /dev/null +++ b/examples/vision/segmentation/paddleseg/rknpu2/python/infer.py @@ -0,0 +1,44 @@ +import fastdeploy as fd +import cv2 +import os + + +def parse_arguments(): + import argparse + import ast + parser = argparse.ArgumentParser() + parser.add_argument( + "--model_file", required=True, help="Path of PaddleSeg model.") + parser.add_argument( + "--config_file", required=True, help="Path of PaddleSeg config.") + parser.add_argument( + "--image", type=str, required=True, help="Path of test image file.") + return parser.parse_args() + + +def build_option(args): + option = fd.RuntimeOption() + option.use_rknpu2() + return option + + +args = parse_arguments() + +# 配置runtime,加载模型 +runtime_option = build_option(args) +model_file = args.model_file +params_file = "" +config_file = args.config_file +model = fd.vision.segmentation.PaddleSegModel( + model_file, params_file, config_file, runtime_option=runtime_option,model_format=fd.ModelFormat.RKNN) + +model.disable_normalize_and_permute() + +# 预测图片分割结果 +im = cv2.imread(args.image) +result = model.predict(im.copy()) +print(result) + +# 可视化结果 +vis_im = fd.vision.vis_segmentation(im, result, weight=0.5) +cv2.imwrite("vis_img.png", vis_im) diff --git a/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.cc b/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.cc new file mode 100644 index 000000000..1df1efcc4 --- /dev/null +++ b/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.cc @@ -0,0 +1,425 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h" + +namespace fastdeploy { +RKNPU2Backend::~RKNPU2Backend() { + if(input_attrs != nullptr){ + free(input_attrs); + } + if(output_attrs != nullptr){ + free(output_attrs); + } +} +/*************************************************************** + * @name GetSDKAndDeviceVersion + * @brief get RKNN sdk and device version + * @param None + * @return bool + * @note None + ***************************************************************/ +bool RKNPU2Backend::GetSDKAndDeviceVersion() { + int ret; + // get sdk and device version + ret = rknn_query(ctx, RKNN_QUERY_SDK_VERSION, &sdk_ver, sizeof(sdk_ver)); + if (ret != RKNN_SUCC) { + printf("rknn_query fail! ret=%d\n", ret); + return false; + } + FDINFO << "rknn_api/rknnrt version: " << sdk_ver.api_version + << ", driver version: " << sdk_ver.drv_version << std::endl; + return true; +} + +/*************************************************************** + * @name BuildOption + * @brief save option + * @param RKNPU2BackendOption + * @note None + ***************************************************************/ +void RKNPU2Backend::BuildOption(const RKNPU2BackendOption& option) { + this->option_ = option; + // save cpu_name + this->option_.cpu_name = option.cpu_name; + + // save context + this->option_.core_mask = option.core_mask; +} + +/*************************************************************** + * @name InitFromRKNN + * @brief Initialize RKNN model + * @param model_file: Binary data for the RKNN model or the path of RKNN model. + * params_file: None + * option: config + * @return bool + * @note None + ***************************************************************/ +bool RKNPU2Backend::InitFromRKNN(const std::string& model_file, + const RKNPU2BackendOption& option) { + // LoadModel + if (!this->LoadModel((char*)model_file.data())) { + FDERROR << "load model failed" << std::endl; + return false; + } + + // GetSDKAndDeviceVersion + if (!this->GetSDKAndDeviceVersion()) { + FDERROR << "get SDK and device version failed" << std::endl; + return false; + } + + // BuildOption + this->BuildOption(option); + + // SetCoreMask if RK3588 + if (this->option_.cpu_name == rknpu2::CpuName::RK3588) { + if (!this->SetCoreMask(option_.core_mask)) { + FDERROR << "set core mask failed" << std::endl; + return false; + } + } + + // GetModelInputOutputInfos + if (!this->GetModelInputOutputInfos()) { + FDERROR << "get model input output infos failed" << std::endl; + return false; + } + + return true; +} + +/*************************************************************** + * @name SetCoreMask + * @brief set NPU core for model + * @param core_mask: The specification of NPU core setting. + * @return bool + * @note Only support RK3588 + ***************************************************************/ +bool RKNPU2Backend::SetCoreMask(rknpu2::CoreMask& core_mask) const { + int ret = rknn_set_core_mask(ctx, static_cast(core_mask)); + if (ret != RKNN_SUCC) { + FDERROR << "rknn_set_core_mask fail! ret=" << ret << std::endl; + return false; + } + return true; +} + +/*************************************************************** + * @name LoadModel + * @brief read rknn model + * @param model: Binary data for the RKNN model or the path of RKNN model. + * @return bool + * @note None + ***************************************************************/ +bool RKNPU2Backend::LoadModel(void* model) { + int ret = RKNN_SUCC; + ret = rknn_init(&ctx, model, 0, 0, nullptr); + if (ret != RKNN_SUCC) { + FDERROR << "rknn_init fail! ret=" << ret << std::endl; + return false; + } + return true; +} + +/*************************************************************** + * @name GetModelInputOutputInfos + * @brief Get the detailed input and output infos of Model + * @param None + * @return bool + * @note None + ***************************************************************/ +bool RKNPU2Backend::GetModelInputOutputInfos() { + int ret = RKNN_SUCC; + + // Get the number of model inputs and outputs + ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num)); + if (ret != RKNN_SUCC) { + return false; + } + + // Get detailed input parameters + input_attrs = (rknn_tensor_attr*)malloc(sizeof(rknn_tensor_attr) * io_num.n_input); + memset(input_attrs, 0, io_num.n_input * sizeof(rknn_tensor_attr)); + inputs_desc_.resize(io_num.n_input); + for (uint32_t i = 0; i < io_num.n_input; i++) { + input_attrs[i].index = i; + // query info + ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]), + sizeof(rknn_tensor_attr)); + if (ret != RKNN_SUCC) { + printf("rknn_init error! ret=%d\n", ret); + return false; + } + std::string temp_name = input_attrs[i].name; + std::vector temp_shape{}; + temp_shape.resize(input_attrs[i].n_dims); + for (int j = 0; j < input_attrs[i].n_dims; j++) { + temp_shape[j] = (int)input_attrs[i].dims[j]; + } + + FDDataType temp_dtype = + fastdeploy::RKNPU2Backend::RknnTensorTypeToFDDataType( + input_attrs[i].type); + TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype}; + inputs_desc_[i] = temp_input_info; + } + + // Get detailed output parameters + output_attrs = + (rknn_tensor_attr*)malloc(sizeof(rknn_tensor_attr) * io_num.n_output); + memset(output_attrs, 0, io_num.n_output * sizeof(rknn_tensor_attr)); + outputs_desc_.resize(io_num.n_output); + for (uint32_t i = 0; i < io_num.n_output; i++) { + output_attrs[i].index = i; + // query info + ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]), + sizeof(rknn_tensor_attr)); + if (ret != RKNN_SUCC) { + FDERROR << "rknn_query fail! ret = " << ret << std::endl; + return false; + } + std::string temp_name = output_attrs[i].name; + std::vector temp_shape{}; + temp_shape.resize(output_attrs[i].n_dims); + for (int j = 0; j < output_attrs[i].n_dims; j++) { + temp_shape[j] = (int)output_attrs[i].dims[j]; + } + FDDataType temp_dtype = + fastdeploy::RKNPU2Backend::RknnTensorTypeToFDDataType( + output_attrs[i].type); + TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype}; + outputs_desc_[i] = temp_input_info; + } + return true; +} + +/*************************************************************** + * @name DumpTensorAttr + * @brief Get the model's detailed inputs and outputs + * @param rknn_tensor_attr + * @return None + * @note None + ***************************************************************/ +void RKNPU2Backend::DumpTensorAttr(rknn_tensor_attr& attr) { + printf("index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], " + "n_elems=%d, size=%d, fmt=%s, type=%s, " + "qnt_type=%s, zp=%d, scale=%f\n", + attr.index, attr.name, attr.n_dims, attr.dims[0], attr.dims[1], + attr.dims[2], attr.dims[3], attr.n_elems, attr.size, + get_format_string(attr.fmt), get_type_string(attr.type), + get_qnt_type_string(attr.qnt_type), attr.zp, attr.scale); +} + +TensorInfo RKNPU2Backend::GetInputInfo(int index) { + FDASSERT(index < NumInputs(), + "The index: %d should less than the number of inputs: %d.", index, + NumInputs()) + return inputs_desc_[index]; +} + +std::vector RKNPU2Backend::GetInputInfos() { return inputs_desc_; } + +TensorInfo RKNPU2Backend::GetOutputInfo(int index) { + FDASSERT(index < NumOutputs(), + "The index: %d should less than the number of outputs %d.", index, + NumOutputs()) + return outputs_desc_[index]; +} + +std::vector RKNPU2Backend::GetOutputInfos() { + return outputs_desc_; +} + +bool RKNPU2Backend::Infer(std::vector& inputs, + std::vector* outputs) { + int ret = RKNN_SUCC; + // Judge whether the input and output size are the same + if (inputs.size() != inputs_desc_.size()) { + FDERROR << "[RKNPU2Backend] Size of the inputs(" << inputs.size() + << ") should keep same with the inputs of this model(" + << inputs_desc_.size() << ")." << std::endl; + return false; + } + + // the input size only can be one + if (inputs.size() > 1) { + FDERROR << "[RKNPU2Backend] Size of the inputs only support 1." + << std::endl; + return false; + } + + // Judge whether the input and output types are the same + rknn_tensor_type input_type = + fastdeploy::RKNPU2Backend::FDDataTypeToRknnTensorType(inputs[0].dtype); + if (input_type != input_attrs[0].type) { + FDWARNING << "The input tensor type != model's inputs type." + << "The input_type need " << get_type_string(input_attrs[0].type) + << ",but inputs[0].type is " << get_type_string(input_type) + << std::endl; + } + + rknn_tensor_format input_layout = + RKNN_TENSOR_NHWC; // RK3588 only support NHWC + input_attrs[0].type = input_type; + input_attrs[0].fmt = input_layout; + input_attrs[0].size = inputs[0].Nbytes(); + input_attrs[0].size_with_stride = inputs[0].Nbytes(); + input_attrs[0].pass_through = 0; + + // create input tensor memory + rknn_tensor_mem* input_mems[1]; + input_mems[0] = rknn_create_mem(ctx, inputs[0].Nbytes()); + if (input_mems[0] == nullptr) { + FDERROR << "rknn_create_mem input_mems error." << std::endl; + return false; + } + + // Copy input data to input tensor memory + uint32_t width = input_attrs[0].dims[2]; + uint32_t stride = input_attrs[0].w_stride; + if (width == stride) { + if (inputs[0].Data() == nullptr) { + FDERROR << "inputs[0].Data is NULL." << std::endl; + return false; + } + memcpy(input_mems[0]->virt_addr, inputs[0].Data(), inputs[0].Nbytes()); + } else { + FDERROR << "[RKNPU2Backend] only support width == stride." << std::endl; + return false; + } + + // Create output tensor memory + rknn_tensor_mem* output_mems[io_num.n_output]; + for (uint32_t i = 0; i < io_num.n_output; ++i) { + // Most post-processing does not support the fp16 format. + // The unified output here is float32 + uint32_t output_size = output_attrs[i].n_elems * sizeof(float); + output_mems[i] = rknn_create_mem(ctx, output_size); + } + + // Set input tensor memory + ret = rknn_set_io_mem(ctx, input_mems[0], &input_attrs[0]); + if (ret != RKNN_SUCC) { + FDERROR << "input tensor memory rknn_set_io_mem fail! ret=" << ret + << std::endl; + return false; + } + + // Set output tensor memory + for (uint32_t i = 0; i < io_num.n_output; ++i) { + // default output type is depend on model, this requires float32 to compute top5 + output_attrs[i].type = RKNN_TENSOR_FLOAT32; + ret = rknn_set_io_mem(ctx, output_mems[i], &output_attrs[i]); + // set output memory and attribute + if (ret != RKNN_SUCC) { + FDERROR << "output tensor memory rknn_set_io_mem fail! ret=" << ret + << std::endl; + return false; + } + } + + // run rknn + ret = rknn_run(ctx, nullptr); + if (ret != RKNN_SUCC) { + FDERROR << "rknn run error! ret=" << ret << std::endl; + return false; + } + rknn_destroy_mem(ctx, input_mems[0]); + + // get result + outputs->resize(outputs_desc_.size()); + std::vector temp_shape(4); + for (size_t i = 0; i < outputs_desc_.size(); ++i) { + temp_shape.resize(outputs_desc_[i].shape.size()); + for (int j = 0; j < outputs_desc_[i].shape.size(); ++j) { + temp_shape[j] = outputs_desc_[i].shape[j]; + } + (*outputs)[i].Resize(temp_shape, outputs_desc_[i].dtype, + outputs_desc_[i].name); + memcpy((*outputs)[i].MutableData(), (float*)output_mems[i]->virt_addr, (*outputs)[i].Nbytes()); + rknn_destroy_mem(ctx, output_mems[i]); + } + + return true; +} + +/*************************************************************** + * @name RknnTensorTypeToFDDataType + * @brief Change RknnTensorType To FDDataType + * @param rknn_tensor_type + * @return None + * @note Most post-processing does not support the fp16 format. + * Therefore, if the input is FP16, the output will be FP32. + ***************************************************************/ +FDDataType RKNPU2Backend::RknnTensorTypeToFDDataType(rknn_tensor_type type) { + if (type == rknn_tensor_type::RKNN_TENSOR_FLOAT16) { + return FDDataType::FP32; + } + if (type == rknn_tensor_type::RKNN_TENSOR_FLOAT32) { + return FDDataType::FP32; + } + if (type == rknn_tensor_type::RKNN_TENSOR_INT8) { + return FDDataType::INT8; + } + if (type == rknn_tensor_type::RKNN_TENSOR_INT16) { + return FDDataType::INT16; + } + if (type == rknn_tensor_type::RKNN_TENSOR_INT32) { + return FDDataType::INT32; + } + if (type == rknn_tensor_type::RKNN_TENSOR_UINT8) { + return FDDataType::UINT8; + } + if (type == rknn_tensor_type::RKNN_TENSOR_BOOL) { + return FDDataType::BOOL; + } + FDERROR << "FDDataType don't support this type" << std::endl; + return FDDataType::UNKNOWN1; +} + +/*************************************************************** + * @name FDDataTypeToRknnTensorType + * @brief Change FDDataType To RknnTensorType + * @param FDDataType + * @return None + * @note None + ***************************************************************/ +rknn_tensor_type +RKNPU2Backend::FDDataTypeToRknnTensorType(fastdeploy::FDDataType type) { + if (type == FDDataType::FP16) { + return rknn_tensor_type::RKNN_TENSOR_FLOAT16; + } + if (type == FDDataType::FP32) { + return rknn_tensor_type::RKNN_TENSOR_FLOAT32; + } + if (type == FDDataType::INT8) { + return rknn_tensor_type::RKNN_TENSOR_INT8; + } + if (type == FDDataType::INT16) { + return rknn_tensor_type::RKNN_TENSOR_INT16; + } + if (type == FDDataType::INT32) { + return rknn_tensor_type::RKNN_TENSOR_INT32; + } + if (type == FDDataType::UINT8) { + return rknn_tensor_type::RKNN_TENSOR_UINT8; + } + if (type == FDDataType::BOOL) { + return rknn_tensor_type::RKNN_TENSOR_BOOL; + } + FDERROR << "rknn_tensor_type don't support this type" << std::endl; + return RKNN_TENSOR_TYPE_MAX; +} +} // namespace fastdeploy \ No newline at end of file diff --git a/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h b/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h new file mode 100644 index 000000000..68467294d --- /dev/null +++ b/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h @@ -0,0 +1,96 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "fastdeploy/backends/backend.h" +#include "fastdeploy/core/fd_tensor.h" +#include "rknn_api.h" // NOLINT +#include "rknpu2_config.h" +#include // for memset +#include +#include +#include +#include + +namespace fastdeploy { +struct RKNPU2BackendOption { + rknpu2::CpuName cpu_name = rknpu2::CpuName::RK3588; + + //The specification of NPU core setting.It has the following choices : + // RKNN_NPU_CORE_AUTO : Referring to automatic mode, meaning that it will + // select the idle core inside the NPU. + // RKNN_NPU_CORE_0 : Running on the NPU0 core + // RKNN_NPU_CORE_1: Runing on the NPU1 core + // RKNN_NPU_CORE_2: Runing on the NPU2 core + // RKNN_NPU_CORE_0_1: Running on both NPU0 and NPU1 core simultaneously. + // RKNN_NPU_CORE_0_1_2: Running on both NPU0, NPU1 and NPU2 simultaneously. + rknpu2::CoreMask core_mask = rknpu2::CoreMask::RKNN_NPU_CORE_AUTO; +}; + +class RKNPU2Backend : public BaseBackend { + public: + RKNPU2Backend() = default; + + virtual ~RKNPU2Backend(); + + // RKNN API + bool LoadModel(void* model); + + bool GetSDKAndDeviceVersion(); + + bool SetCoreMask(rknpu2::CoreMask& core_mask) const; + + bool GetModelInputOutputInfos(); + + // BaseBackend API + void BuildOption(const RKNPU2BackendOption& option); + + bool InitFromRKNN(const std::string& model_file, + const RKNPU2BackendOption& option = RKNPU2BackendOption()); + + int NumInputs() const override { + return static_cast(inputs_desc_.size()); + } + + int NumOutputs() const override { + return static_cast(outputs_desc_.size()); + } + + TensorInfo GetInputInfo(int index) override; + TensorInfo GetOutputInfo(int index) override; + std::vector GetInputInfos() override; + std::vector GetOutputInfos() override; + bool Infer(std::vector& inputs, + std::vector* outputs) override; + + private: + // The object of rknn context. + rknn_context ctx{}; + // The structure rknn_sdk_version is used to indicate the version information of the RKNN SDK. + rknn_sdk_version sdk_ver{}; + // The structure rknn_input_output_num represents the number of input and output Tensor + rknn_input_output_num io_num{}; + std::vector inputs_desc_; + std::vector outputs_desc_; + + rknn_tensor_attr* input_attrs = nullptr; + rknn_tensor_attr* output_attrs = nullptr; + + RKNPU2BackendOption option_; + + static void DumpTensorAttr(rknn_tensor_attr& attr); + static FDDataType RknnTensorTypeToFDDataType(rknn_tensor_type type); + static rknn_tensor_type FDDataTypeToRknnTensorType(FDDataType type); +}; +} // namespace fastdeploy diff --git a/fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h b/fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h new file mode 100644 index 000000000..9b38b5a8b --- /dev/null +++ b/fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h @@ -0,0 +1,40 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef RKNPU2_CONFIG_H +#define RKNPU2_CONFIG_H + +namespace fastdeploy { +namespace rknpu2 { +typedef enum _rknpu2_cpu_name { + RK356X = 0, /* run on RK356X. */ + RK3588 = 1, /* default,run on RK3588. */ + UNDEFINED, +} CpuName; + +/*! RKNPU2 core mask for mobile device. */ +typedef enum _rknpu2_core_mask { + RKNN_NPU_CORE_AUTO = 0, ///< default, run on NPU core randomly. + RKNN_NPU_CORE_0 = 1, ///< run on NPU core 0. + RKNN_NPU_CORE_1 = 2, ///< run on NPU core 1. + RKNN_NPU_CORE_2 = 4, ///< run on NPU core 2. + RKNN_NPU_CORE_0_1 = + RKNN_NPU_CORE_0 | RKNN_NPU_CORE_1, ///< run on NPU core 1 and core 2. + RKNN_NPU_CORE_0_1_2 = + RKNN_NPU_CORE_0_1 | RKNN_NPU_CORE_2, ///< run on NPU core 1 and core 2. + RKNN_NPU_CORE_UNDEFINED, +} CoreMask; +} // namespace RKNN +} // namespace fastdeploy +#endif //RKNPU2_CONFIG_H diff --git a/fastdeploy/core/fd_type.cc b/fastdeploy/core/fd_type.cc index afebfd973..45ca90a1b 100644 --- a/fastdeploy/core/fd_type.cc +++ b/fastdeploy/core/fd_type.cc @@ -53,6 +53,9 @@ std::string Str(const Device& d) { case Device::GPU: out = "Device::GPU"; break; + case Device::RKNPU: + out = "Device::RKNPU"; + break; case Device::IPU: out = "Device::IPU"; break; @@ -70,6 +73,9 @@ std::ostream& operator<<(std::ostream& out,const Device& d){ case Device::GPU: out << "Device::GPU"; break; + case Device::RKNPU: + out << "Device::RKNPU"; + break; default: out << "Device::UNKOWN"; } diff --git a/fastdeploy/core/fd_type.h b/fastdeploy/core/fd_type.h index e98d0b9ec..5236601b0 100644 --- a/fastdeploy/core/fd_type.h +++ b/fastdeploy/core/fd_type.h @@ -22,7 +22,7 @@ namespace fastdeploy { -enum FASTDEPLOY_DECL Device { CPU, GPU, IPU }; +enum FASTDEPLOY_DECL Device { CPU, GPU, RKNPU, IPU}; FASTDEPLOY_DECL std::string Str(const Device& d); diff --git a/fastdeploy/fastdeploy_model.cc b/fastdeploy/fastdeploy_model.cc index b95c5bde8..c3f0641d2 100644 --- a/fastdeploy/fastdeploy_model.cc +++ b/fastdeploy/fastdeploy_model.cc @@ -41,6 +41,7 @@ bool FastDeployModel::InitRuntime() { #ifndef WITH_IPU use_ipu = false; #endif + bool use_rknpu = (runtime_option.device == Device::RKNPU); // whether the model is supported by the setted backend bool is_supported = false; @@ -51,6 +52,13 @@ bool FastDeployModel::InitRuntime() { break; } } + } else if (use_rknpu) { + for (auto& item : valid_rknpu_backends) { + if (item == runtime_option.backend) { + is_supported = true; + break; + } + } } else if(use_ipu) { for (auto& item : valid_ipu_backends) { if (item == runtime_option.backend) { @@ -101,6 +109,8 @@ bool FastDeployModel::InitRuntime() { << std::endl; return false; #endif + } else if (runtime_option.device == Device::RKNPU) { + return CreateRKNPUBackend(); } else if (runtime_option.device == Device::IPU) { #ifdef WITH_IPU return CreateIpuBackend(); @@ -110,7 +120,7 @@ bool FastDeployModel::InitRuntime() { return false; #endif } - FDERROR << "Only support CPU/GPU now." << std::endl; + FDERROR << "Only support CPU/GPU/NPU now." << std::endl; return false; } @@ -138,7 +148,7 @@ bool FastDeployModel::CreateCpuBackend() { } bool FastDeployModel::CreateGpuBackend() { - if (valid_gpu_backends.size() == 0) { + if (valid_gpu_backends.empty()) { FDERROR << "There's no valid gpu backends for model: " << ModelName() << std::endl; return false; @@ -161,6 +171,30 @@ bool FastDeployModel::CreateGpuBackend() { return false; } +bool FastDeployModel::CreateRKNPUBackend() { + if (valid_rknpu_backends.empty()) { + FDERROR << "There's no valid npu backends for model: " << ModelName() + << std::endl; + return false; + } + + for (size_t i = 0; i < valid_rknpu_backends.size(); ++i) { + if (!IsBackendAvailable(valid_rknpu_backends[i])) { + continue; + } + runtime_option.backend = valid_rknpu_backends[i]; + runtime_ = std::unique_ptr(new Runtime()); + if (!runtime_->Init(runtime_option)) { + return false; + } + runtime_initialized_ = true; + return true; + } + FDERROR << "Cannot find an available npu backend to load this model." + << std::endl; + return false; +} + bool FastDeployModel::CreateIpuBackend() { if (valid_ipu_backends.size() == 0) { FDERROR << "There's no valid ipu backends for model: " << ModelName() diff --git a/fastdeploy/fastdeploy_model.h b/fastdeploy/fastdeploy_model.h index 3f5ff4c35..6a2c13032 100644 --- a/fastdeploy/fastdeploy_model.h +++ b/fastdeploy/fastdeploy_model.h @@ -38,6 +38,12 @@ class FASTDEPLOY_DECL FastDeployModel { /** Model's valid ipu backends. This member defined all the ipu backends have successfully tested for the model */ std::vector valid_ipu_backends = {Backend::PDINFER}; + + + /** Model's valid hardware backends. This member defined all the gpu backends have successfully tested for the model + */ + std::vector valid_rknpu_backends = {}; + /// Get number of inputs for this model virtual int NumInputsOfRuntime() { return runtime_->NumInputs(); } /// Get number of outputs for this model @@ -99,6 +105,8 @@ class FASTDEPLOY_DECL FastDeployModel { virtual bool CreateCpuBackend(); virtual bool CreateGpuBackend(); virtual bool CreateIpuBackend(); + virtual bool CreateRKNPUBackend(); + bool initialized = false; std::vector valid_external_backends; diff --git a/fastdeploy/pybind/main.cc.in b/fastdeploy/pybind/main.cc.in index 222d75fd9..74fe90433 100644 --- a/fastdeploy/pybind/main.cc.in +++ b/fastdeploy/pybind/main.cc.in @@ -164,6 +164,9 @@ PYBIND11_MODULE(@PY_LIBRARY_NAME@, m) { m.def_submodule("text", "Text module of FastDeploy."); BindText(text_module); #endif + auto rknpu2_module = + m.def_submodule("rknpu2", "RKNPU2 config module of FastDeploy."); + BindRKNPU2Config(rknpu2_module); } } // namespace fastdeploy diff --git a/fastdeploy/pybind/main.h b/fastdeploy/pybind/main.h index 4cd21b7ea..4499b3e0f 100644 --- a/fastdeploy/pybind/main.h +++ b/fastdeploy/pybind/main.h @@ -39,6 +39,7 @@ void BindBackend(pybind11::module&); void BindVision(pybind11::module&); void BindText(pybind11::module& m); void BindPipeline(pybind11::module& m); +void BindRKNPU2Config(pybind11::module&); pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType& fd_dtype); diff --git a/fastdeploy/pybind/rknpu2_config_pybind.cc b/fastdeploy/pybind/rknpu2_config_pybind.cc new file mode 100644 index 000000000..4880b2db6 --- /dev/null +++ b/fastdeploy/pybind/rknpu2_config_pybind.cc @@ -0,0 +1,33 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_config.h" +#include "fastdeploy/pybind/main.h" +namespace fastdeploy { +void BindRKNPU2Config(pybind11::module& m) { + pybind11::enum_(m, "CpuName", pybind11::arithmetic(), + "CpuName for inference.") + .value("RK356X", fastdeploy::rknpu2::CpuName::RK356X) + .value("RK3588", fastdeploy::rknpu2::CpuName::RK3588) + .value("UNDEFINED", fastdeploy::rknpu2::CpuName::UNDEFINED); + pybind11::enum_(m, "CoreMask", pybind11::arithmetic(), + "CoreMask for inference.") + .value("RKNN_NPU_CORE_AUTO", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO) + .value("RKNN_NPU_CORE_0", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0) + .value("RKNN_NPU_CORE_1", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_1) + .value("RKNN_NPU_CORE_2", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_2) + .value("RKNN_NPU_CORE_0_1", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0_1) + .value("RKNN_NPU_CORE_0_1_2", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0_1_2) + .value("RKNN_NPU_CORE_UNDEFINED", fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_UNDEFINED); +} +} // namespace fastdeploy \ No newline at end of file diff --git a/fastdeploy/pybind/runtime.cc b/fastdeploy/pybind/runtime.cc index 41afbcdac..a694be970 100755 --- a/fastdeploy/pybind/runtime.cc +++ b/fastdeploy/pybind/runtime.cc @@ -22,6 +22,7 @@ void BindRuntime(pybind11::module& m) { .def("set_model_path", &RuntimeOption::SetModelPath) .def("use_gpu", &RuntimeOption::UseGpu) .def("use_cpu", &RuntimeOption::UseCpu) + .def("use_rknpu2", &RuntimeOption::UseRKNPU2) .def("set_external_stream", &RuntimeOption::SetExternalStream) .def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum) .def("use_paddle_backend", &RuntimeOption::UsePaddleBackend) @@ -174,17 +175,20 @@ void BindRuntime(pybind11::module& m) { .value("TRT", Backend::TRT) .value("POROS", Backend::POROS) .value("PDINFER", Backend::PDINFER) + .value("RKNPU2", Backend::RKNPU2) .value("LITE", Backend::LITE); pybind11::enum_(m, "ModelFormat", pybind11::arithmetic(), "ModelFormat for inference.") .value("PADDLE", ModelFormat::PADDLE) .value("TORCHSCRIPT", ModelFormat::TORCHSCRIPT) + .value("RKNN", ModelFormat::RKNN) .value("ONNX", ModelFormat::ONNX); pybind11::enum_(m, "Device", pybind11::arithmetic(), "Device for inference.") .value("CPU", Device::CPU) .value("GPU", Device::GPU) - .value("IPU", Device::IPU); + .value("IPU", Device::IPU) + .value("RKNPU", Device::RKNPU); pybind11::enum_(m, "FDDataType", pybind11::arithmetic(), "Data type of FastDeploy.") diff --git a/fastdeploy/runtime.cc b/fastdeploy/runtime.cc index 31c3bd6e9..86c533f6e 100755 --- a/fastdeploy/runtime.cc +++ b/fastdeploy/runtime.cc @@ -41,6 +41,10 @@ #include "fastdeploy/backends/lite/lite_backend.h" #endif +#ifdef ENABLE_RKNPU2_BACKEND +#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h" +#endif + namespace fastdeploy { std::vector GetAvailableBackends() { @@ -62,6 +66,9 @@ std::vector GetAvailableBackends() { #endif #ifdef ENABLE_LITE_BACKEND backends.push_back(Backend::LITE); +#endif +#ifdef ENABLE_RKNPU2_BACKEND + backends.push_back(Backend::RKNPU2); #endif return backends; } @@ -85,7 +92,9 @@ std::string Str(const Backend& b) { return "Backend::PDINFER"; } else if (b == Backend::POROS) { return "Backend::POROS"; - } else if (b == Backend::OPENVINO) { + } else if (b == Backend::RKNPU2) { + return "Backend::RKNPU2"; + }else if (b == Backend::OPENVINO) { return "Backend::OPENVINO"; } else if (b == Backend::LITE) { return "Backend::LITE"; @@ -98,6 +107,8 @@ std::string Str(const ModelFormat& f) { return "ModelFormat::PADDLE"; } else if (f == ModelFormat::ONNX) { return "ModelFormat::ONNX"; + }else if (f == ModelFormat::RKNN) { + return "ModelFormat::RKNN"; } else if (f == ModelFormat::TORCHSCRIPT) { return "ModelFormat::TORCHSCRIPT"; } @@ -113,7 +124,9 @@ std::ostream& operator<<(std::ostream& out, const Backend& backend) { out << "Backend::PDINFER"; } else if (backend == Backend::OPENVINO) { out << "Backend::OPENVINO"; - } else if (backend == Backend::POROS) { + } else if (backend == Backend::RKNPU2) { + out << "Backend::RKNPU2"; + }else if (backend == Backend::POROS) { out << "Backend::POROS"; } else if (backend == Backend::LITE) { out << "Backend::LITE"; @@ -127,6 +140,8 @@ std::ostream& operator<<(std::ostream& out, const ModelFormat& format) { out << "ModelFormat::PADDLE"; } else if (format == ModelFormat::ONNX) { out << "ModelFormat::ONNX"; + } else if (format == ModelFormat::RKNN) { + out << "ModelFormat::RKNN"; } else if (format == ModelFormat::TORCHSCRIPT) { out << "ModelFormat::TORCHSCRIPT"; } @@ -152,6 +167,14 @@ bool CheckModelFormat(const std::string& model_file, << model_file << std::endl; return false; } + } else if (model_format == ModelFormat::RKNN) { + if (model_file.size() < 5 || + model_file.substr(model_file.size() - 5, 5) != ".rknn") { + FDERROR << "With model format of ModelFormat::RKNN, the model file " + "should ends with `.rknn`, but now it's " + << model_file << std::endl; + return false; + } } else if (model_format == ModelFormat::TORCHSCRIPT) { if (model_file.size() < 3 || model_file.substr(model_file.size() - 3, 3) != ".pt") { @@ -162,7 +185,7 @@ bool CheckModelFormat(const std::string& model_file, } } else { FDERROR << "Only support model format with frontend ModelFormat::PADDLE / " - "ModelFormat::ONNX / ModelFormat::TORCHSCRIPT." + "ModelFormat::ONNX / ModelFormat::RKNN / ModelFormat::TORCHSCRIPT." << std::endl; return false; } @@ -182,6 +205,10 @@ ModelFormat GuessModelFormat(const std::string& model_file) { model_file.substr(model_file.size() - 3, 3) == ".pt") { FDINFO << "Model Format: Torchscript." << std::endl; return ModelFormat::TORCHSCRIPT; + } else if (model_file.size() > 5 && + model_file.substr(model_file.size() - 5, 5) == ".rknn") { + FDINFO << "Model Format: RKNN." << std::endl; + return ModelFormat::RKNN; } FDERROR << "Cannot guess which model format you are using, please set " @@ -223,6 +250,13 @@ void RuntimeOption::UseGpu(int gpu_id) { void RuntimeOption::UseCpu() { device = Device::CPU; } +void RuntimeOption::UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name, + fastdeploy::rknpu2::CoreMask rknpu2_core) { + rknpu2_cpu_name_ = rknpu2_name; + rknpu2_core_mask_ = rknpu2_core; + device = Device::RKNPU; +} + void RuntimeOption::SetExternalStream(void* external_stream) { external_stream_ = external_stream; } @@ -234,7 +268,8 @@ void RuntimeOption::SetCpuThreadNum(int thread_num) { void RuntimeOption::SetOrtGraphOptLevel(int level) { std::vector supported_level{-1, 0, 1, 2}; - auto valid_level = std::find(supported_level.begin(), supported_level.end(), level) != supported_level.end(); + auto valid_level = std::find(supported_level.begin(), supported_level.end(), + level) != supported_level.end(); FDASSERT(valid_level, "The level must be -1, 0, 1, 2."); ort_graph_opt_level = level; } @@ -321,18 +356,16 @@ void RuntimeOption::EnableLiteFP16() { lite_enable_fp16 = true; } -void RuntimeOption::DisableLiteFP16() { - lite_enable_fp16 = false; +void RuntimeOption::DisableLiteFP16() { + lite_enable_fp16 = false; } - void RuntimeOption::EnableLiteInt8() { lite_enable_int8 = true; } -void RuntimeOption::DisableLiteInt8() { - lite_enable_int8 = false; +void RuntimeOption::DisableLiteInt8() { + lite_enable_int8 = false; } - void RuntimeOption::SetLitePowerMode(LitePowerMode mode) { lite_power_mode = mode; } @@ -406,7 +439,7 @@ bool Runtime::Compile(std::vector>& prewarm_tensors, "ENABLE_POROS_BACKEND=ON."); #endif return true; -} +} void RuntimeOption::EnablePaddleTrtCollectShape() { pd_collect_shape = true; @@ -454,6 +487,8 @@ bool Runtime::Init(const RuntimeOption& _option) { option.backend = Backend::POROS; } else if (IsBackendAvailable(Backend::OPENVINO)) { option.backend = Backend::OPENVINO; + } else if (IsBackendAvailable(Backend::RKNPU2)) { + option.backend = Backend::RKNPU2; } else { FDERROR << "Please define backend in RuntimeOption, current it's " "Backend::UNKNOWN." @@ -506,6 +541,13 @@ bool Runtime::Init(const RuntimeOption& _option) { CreateLiteBackend(); FDINFO << "Runtime initialized with Backend::LITE in " << Str(option.device) << "." << std::endl; + } else if (option.backend == Backend::RKNPU2) { + FDASSERT(option.device == Device::RKNPU, + "Backend::RKNPU2 only supports Device::RKNPU2"); + CreateRKNPU2Backend(); + + FDINFO << "Runtime initialized with Backend::RKNPU2 in " + << Str(option.device) << "." << std::endl; } else { FDERROR << "Runtime only support " "Backend::ORT/Backend::TRT/Backend::PDINFER/Backend::POROS as " @@ -720,4 +762,21 @@ void Runtime::CreateLiteBackend() { #endif } +void Runtime::CreateRKNPU2Backend() { +#ifdef ENABLE_RKNPU2_BACKEND + auto rknpu2_option = RKNPU2BackendOption(); + rknpu2_option.cpu_name = option.rknpu2_cpu_name_; + rknpu2_option.core_mask = option.rknpu2_core_mask_; + FDASSERT(option.model_format == ModelFormat::RKNN, + "RKNPU2Backend only support model format of ModelFormat::RKNN"); + backend_ = utils::make_unique(); + auto casted_backend = dynamic_cast(backend_.get()); + FDASSERT(casted_backend->InitFromRKNN(option.model_file, rknpu2_option), + "Load model from nb file failed while initializing LiteBackend."); +#else + FDASSERT(false, "RKNPU2Backend is not available, please compiled with " + "ENABLE_RKNPU2_BACKEND=ON."); +#endif +} + } // namespace fastdeploy diff --git a/fastdeploy/runtime.h b/fastdeploy/runtime.h index 3d432909b..634c1f633 100755 --- a/fastdeploy/runtime.h +++ b/fastdeploy/runtime.h @@ -20,12 +20,13 @@ #pragma once +#include #include #include -#include #include "fastdeploy/backends/backend.h" #include "fastdeploy/utils/perf.h" +#include "backends/rknpu/rknpu2/rknpu2_config.h" /** \brief All C++ FastDeploy APIs are defined inside this namespace * @@ -34,21 +35,23 @@ namespace fastdeploy { /*! Inference backend supported in FastDeploy */ enum Backend { - UNKNOWN, ///< Unknown inference backend - ORT, ///< ONNX Runtime, support Paddle/ONNX format model, CPU / Nvidia GPU - TRT, ///< TensorRT, support Paddle/ONNX format model, Nvidia GPU only - PDINFER, ///< Paddle Inference, support Paddle format model, CPU / Nvidia GPU - POROS, ///< Poros, support TorchScript format model, CPU / Nvidia GPU - OPENVINO, ///< Intel OpenVINO, support Paddle/ONNX format, CPU only - LITE, ///< Paddle Lite, support Paddle format model, ARM CPU only + UNKNOWN, ///< Unknown inference backend + ORT, ///< ONNX Runtime, support Paddle/ONNX format model, CPU / Nvidia GPU + TRT, ///< TensorRT, support Paddle/ONNX format model, Nvidia GPU only + PDINFER, ///< Paddle Inference, support Paddle format model, CPU / Nvidia GPU + POROS, ///< Poros, support TorchScript format model, CPU / Nvidia GPU + OPENVINO, ///< Intel OpenVINO, support Paddle/ONNX format, CPU only + LITE, ///< Paddle Lite, support Paddle format model, ARM CPU only + RKNPU2, ///< RKNPU2, support RKNN format model, Rockchip NPU only }; /*! Deep learning model format */ enum ModelFormat { - AUTOREC, ///< Auto recognize the model format by model file name - PADDLE, ///< Model with paddlepaddle format - ONNX, ///< Model with ONNX format - TORCHSCRIPT, ///< Model with TorchScript format + AUTOREC, ///< Auto recognize the model format by model file name + PADDLE, ///< Model with paddlepaddle format + ONNX, ///< Model with ONNX format + RKNN, ///< Model with RKNN format + TORCHSCRIPT, ///< Model with TorchScript format }; FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, @@ -58,12 +61,12 @@ FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out, /*! Paddle Lite power mode for mobile device. */ enum LitePowerMode { - LITE_POWER_HIGH = 0, ///< Use Lite Backend with high power mode - LITE_POWER_LOW = 1, ///< Use Lite Backend with low power mode - LITE_POWER_FULL = 2, ///< Use Lite Backend with full power mode - LITE_POWER_NO_BIND = 3, ///< Use Lite Backend with no bind power mode - LITE_POWER_RAND_HIGH = 4, ///< Use Lite Backend with rand high mode - LITE_POWER_RAND_LOW = 5 ///< Use Lite Backend with rand low power mode + LITE_POWER_HIGH = 0, ///< Use Lite Backend with high power mode + LITE_POWER_LOW = 1, ///< Use Lite Backend with low power mode + LITE_POWER_FULL = 2, ///< Use Lite Backend with full power mode + LITE_POWER_NO_BIND = 3, ///< Use Lite Backend with no bind power mode + LITE_POWER_RAND_HIGH = 4, ///< Use Lite Backend with rand high mode + LITE_POWER_RAND_LOW = 5 ///< Use Lite Backend with rand low power mode }; FASTDEPLOY_DECL std::string Str(const Backend& b); @@ -86,7 +89,7 @@ ModelFormat GuessModelFormat(const std::string& model_file); /*! @brief Option object used when create a new Runtime object */ struct FASTDEPLOY_DECL RuntimeOption { - /** \brief Set path of model file and parameter file + /** \brief Set path of model file and parameter file * * \param[in] model_path Path of model file, e.g ResNet50/model.pdmodel for Paddle format model / ResNet50/model.onnx for ONNX format model * \param[in] params_path Path of parameter file, this only used when the model format is Paddle, e.g Resnet50/model.pdiparams @@ -102,6 +105,9 @@ struct FASTDEPLOY_DECL RuntimeOption { /// Use Nvidia GPU to inference void UseGpu(int gpu_id = 0); + void UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name = fastdeploy::rknpu2::CpuName::RK3588, + fastdeploy::rknpu2::CoreMask rknpu2_core = fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0); + void SetExternalStream(void* external_stream); /* @@ -173,14 +179,14 @@ struct FASTDEPLOY_DECL RuntimeOption { */ void DisableLiteFP16(); - /** - * @brief enable int8 precision while use paddle lite backend - */ + /** + * @brief enable int8 precision while use paddle lite backend + */ void EnableLiteInt8(); /** - * @brief disable int8 precision, change to full precision(float32) - */ + * @brief disable int8 precision, change to full precision(float32) + */ void DisableLiteInt8(); /** @@ -325,9 +331,13 @@ struct FASTDEPLOY_DECL RuntimeOption { int unconst_ops_thres = -1; std::string poros_file = ""; - std::string model_file = ""; // Path of model file - std::string params_file = ""; // Path of parameters file, can be empty - ModelFormat model_format = ModelFormat::AUTOREC; // format of input model + // ======Only for RKNPU2 Backend======= + fastdeploy::rknpu2::CpuName rknpu2_cpu_name_ = fastdeploy::rknpu2::CpuName::RK3588; + fastdeploy::rknpu2::CoreMask rknpu2_core_mask_ = fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO; + + std::string model_file = ""; // Path of model file + std::string params_file = ""; // Path of parameters file, can be empty + ModelFormat model_format = ModelFormat::AUTOREC; // format of input model // inside parameters, only for inside usage // remove multiclass_nms in Paddle2ONNX @@ -388,6 +398,7 @@ struct FASTDEPLOY_DECL Runtime { void CreateTrtBackend(); void CreateOpenVINOBackend(); void CreateLiteBackend(); + void CreateRKNPU2Backend(); std::unique_ptr backend_; }; -} // namespace fastdeploy +} // namespace fastdeploy diff --git a/fastdeploy/vision/segmentation/ppseg/model.cc b/fastdeploy/vision/segmentation/ppseg/model.cc index 222f30d2f..3c6318232 100755 --- a/fastdeploy/vision/segmentation/ppseg/model.cc +++ b/fastdeploy/vision/segmentation/ppseg/model.cc @@ -28,6 +28,7 @@ PaddleSegModel::PaddleSegModel(const std::string& model_file, config_file_ = config_file; valid_cpu_backends = {Backend::OPENVINO, Backend::PDINFER, Backend::ORT, Backend::LITE}; valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT}; + valid_rknpu_backends = {Backend::RKNPU2}; runtime_option = custom_option; runtime_option.model_format = model_format; runtime_option.model_file = model_file; @@ -67,16 +68,17 @@ bool PaddleSegModel::BuildPreprocessPipelineFromConfig() { FDASSERT(op.IsMap(), "Require the transform information in yaml be Map type."); if (op["type"].as() == "Normalize") { - std::vector mean = {0.5, 0.5, 0.5}; - std::vector std = {0.5, 0.5, 0.5}; - if (op["mean"]) { - mean = op["mean"].as>(); + if(!(this->disable_normalize_and_permute)){ + std::vector mean = {0.5, 0.5, 0.5}; + std::vector std = {0.5, 0.5, 0.5}; + if (op["mean"]) { + mean = op["mean"].as>(); + } + if (op["std"]) { + std = op["std"].as>(); + } + processors_.push_back(std::make_shared(mean, std)); } - if (op["std"]) { - std = op["std"].as>(); - } - processors_.push_back(std::make_shared(mean, std)); - } else if (op["type"].as() == "Resize") { yml_contain_resize_op = true; const auto& target_size = op["target_size"]; @@ -101,7 +103,7 @@ bool PaddleSegModel::BuildPreprocessPipelineFromConfig() { if (input_height == -1 || input_width == -1) { FDWARNING << "The exported PaddleSeg model is with dynamic shape input, " << "which is not supported by ONNX Runtime and Tensorrt. " - << "Only OpenVINO and Paddle Inference are available now. " + << "Only OpenVINO and Paddle Inference are available now. " << "For using ONNX Runtime or Tensorrt, " << "Please refer to https://github.com/PaddlePaddle/PaddleSeg/blob/develop/docs/model_export.md" << " to export model with fixed input shape." @@ -130,7 +132,9 @@ bool PaddleSegModel::BuildPreprocessPipelineFromConfig() { << "." << std::endl; } } - processors_.push_back(std::make_shared()); + if(!(this->disable_normalize_and_permute)){ + processors_.push_back(std::make_shared()); + } return true; } @@ -357,6 +361,14 @@ bool PaddleSegModel::Predict(cv::Mat* im, SegmentationResult* result) { return true; } +void PaddleSegModel::DisableNormalizeAndPermute(){ + this->disable_normalize_and_permute = true; + // the DisableNormalizeAndPermute function will be invalid if the configuration file is loaded during preprocessing + if (!BuildPreprocessPipelineFromConfig()) { + FDERROR << "Failed to build preprocess pipeline from configuration file." << std::endl; + } +} + } // namespace segmentation } // namespace vision } // namespace fastdeploy diff --git a/fastdeploy/vision/segmentation/ppseg/model.h b/fastdeploy/vision/segmentation/ppseg/model.h index 1ae8b9b24..b55ef91c1 100644 --- a/fastdeploy/vision/segmentation/ppseg/model.h +++ b/fastdeploy/vision/segmentation/ppseg/model.h @@ -60,6 +60,9 @@ class FASTDEPLOY_DECL PaddleSegModel : public FastDeployModel { */ bool is_vertical_screen = false; + + // This function will disable normalize and hwc2chw in preprocessing step. + void DisableNormalizeAndPermute(); private: bool Initialize(); @@ -76,6 +79,9 @@ class FASTDEPLOY_DECL PaddleSegModel : public FastDeployModel { std::vector> processors_; std::string config_file_; + + // for recording the switch of normalize and hwc2chw + bool disable_normalize_and_permute = false; }; } // namespace segmentation diff --git a/fastdeploy/vision/segmentation/ppseg/ppseg_pybind.cc b/fastdeploy/vision/segmentation/ppseg/ppseg_pybind.cc index 51bec778f..ea7a28ae8 100644 --- a/fastdeploy/vision/segmentation/ppseg/ppseg_pybind.cc +++ b/fastdeploy/vision/segmentation/ppseg/ppseg_pybind.cc @@ -27,6 +27,7 @@ void BindPPSeg(pybind11::module& m) { self.Predict(&mat, res); return res; }) + .def("disable_normalize_and_permute",&vision::segmentation::PaddleSegModel::DisableNormalizeAndPermute) .def_readwrite("apply_softmax", &vision::segmentation::PaddleSegModel::apply_softmax) .def_readwrite("is_vertical_screen", diff --git a/python/fastdeploy/__init__.py b/python/fastdeploy/__init__.py index e8a84d285..c477fcec0 100644 --- a/python/fastdeploy/__init__.py +++ b/python/fastdeploy/__init__.py @@ -16,10 +16,11 @@ import logging import os import sys -from .c_lib_wrap import (ModelFormat, Backend, FDDataType, TensorInfo, Device, +from .c_lib_wrap import (ModelFormat, Backend, rknpu2, + FDDataType, TensorInfo, Device, FDTensor, is_built_with_gpu, is_built_with_ort, ModelFormat, is_built_with_paddle, is_built_with_trt, - get_default_cuda_directory) + get_default_cuda_directory, ) from .runtime import Runtime, RuntimeOption from .model import FastDeployModel diff --git a/python/fastdeploy/runtime.py b/python/fastdeploy/runtime.py index 69e45462e..e8a6058a4 100755 --- a/python/fastdeploy/runtime.py +++ b/python/fastdeploy/runtime.py @@ -16,7 +16,7 @@ import logging import numpy as np from . import ModelFormat from . import c_lib_wrap as C - +from . import rknpu2 class Runtime: """FastDeploy Runtime object. @@ -207,6 +207,11 @@ class RuntimeOption: """ return self._option.use_cpu() + def use_rknpu2(self,rknpu2_name=rknpu2.CpuName.RK3588,rknpu2_core=rknpu2.CoreMask.RKNN_NPU_CORE_0): + """Inference with CPU + """ + return self._option.use_rknpu2(rknpu2_name,rknpu2_core) + def set_cpu_thread_num(self, thread_num=-1): """Set number of threads if inference with CPU diff --git a/python/fastdeploy/vision/segmentation/ppseg/__init__.py b/python/fastdeploy/vision/segmentation/ppseg/__init__.py index c7d2de3d1..37e1a6017 100644 --- a/python/fastdeploy/vision/segmentation/ppseg/__init__.py +++ b/python/fastdeploy/vision/segmentation/ppseg/__init__.py @@ -35,7 +35,7 @@ class PaddleSegModel(FastDeployModel): """ super(PaddleSegModel, self).__init__(runtime_option) - assert model_format == ModelFormat.PADDLE, "PaddleSeg only support model format of ModelFormat.Paddle now." + # assert model_format == ModelFormat.PADDLE, "PaddleSeg only support model format of ModelFormat.Paddle now." self._model = C.vision.segmentation.PaddleSegModel( model_file, params_file, config_file, self._runtime_option, model_format) @@ -49,6 +49,9 @@ class PaddleSegModel(FastDeployModel): """ return self._model.predict(input_image) + def disable_normalize_and_permute(self): + return self._model.disable_normalize_and_permute() + @property def apply_softmax(self): """Atrribute of PaddleSeg model. Stating Whether applying softmax operator in the postprocess, default value is False diff --git a/python/setup.py b/python/setup.py index 8b56328f5..10f57a2cb 100755 --- a/python/setup.py +++ b/python/setup.py @@ -48,6 +48,8 @@ with open(os.path.join(TOP_DIR, "python", "requirements.txt")) as fin: setup_configs = dict() setup_configs["ENABLE_PADDLE_FRONTEND"] = os.getenv("ENABLE_PADDLE_FRONTEND", "ON") +setup_configs["ENABLE_RKNPU2_BACKEND"] = os.getenv("ENABLE_RKNPU2_BACKEND", + "OFF") setup_configs["ENABLE_ORT_BACKEND"] = os.getenv("ENABLE_ORT_BACKEND", "OFF") setup_configs["ENABLE_OPENVINO_BACKEND"] = os.getenv("ENABLE_OPENVINO_BACKEND", "OFF") @@ -69,6 +71,7 @@ setup_configs["LIBRARY_NAME"] = PACKAGE_NAME setup_configs["PY_LIBRARY_NAME"] = PACKAGE_NAME + "_main" setup_configs["OPENCV_DIRECTORY"] = os.getenv("OPENCV_DIRECTORY", "") setup_configs["ORT_DIRECTORY"] = os.getenv("ORT_DIRECTORY", "") +setup_configs["RKNN2_TARGET_SOC"] = os.getenv("RKNN2_TARGET_SOC", "") if setup_configs["WITH_GPU"] == "ON" or setup_configs[ "BUILD_ON_JETSON"] == "ON": diff --git a/tools/rknpu2/config/ppseg_config.yaml b/tools/rknpu2/config/ppseg_config.yaml new file mode 100644 index 000000000..a029bdb95 --- /dev/null +++ b/tools/rknpu2/config/ppseg_config.yaml @@ -0,0 +1,7 @@ +model_path: ./portrait_pp_humansegv2_lite_256x144_pretrained.onnx +output_folder: ./ +target_platform: RK3588 +normalize: + mean: [0.5,0.5,0.5] + std: [0.5,0.5,0.5] +outputs: None diff --git a/tools/rknpu2/export.py b/tools/rknpu2/export.py new file mode 100644 index 000000000..12dfe18c9 --- /dev/null +++ b/tools/rknpu2/export.py @@ -0,0 +1,75 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import yaml +import argparse +from rknn.api import RKNN + + +def get_config(): + parser = argparse.ArgumentParser() + parser.add_argument("--verbose", default=True, help="rknntoolkit verbose") + parser.add_argument("--config_path") + args = parser.parse_args() + return args + + +if __name__ == "__main__": + config = get_config() + with open(config.config_path) as file: + file_data = file.read() + yaml_config = yaml.safe_load(file_data) + print(yaml_config) + model = RKNN(config.verbose) + + # Config + mean_values = [[255 * mean for mean in yaml_config["normalize"]["mean"]]] + std_values = [[255 * std for std in yaml_config["normalize"]["std"]]] + model.config(mean_values=mean_values, + std_values=std_values, + target_platform=yaml_config["target_platform"]) + + # Load ONNX model + print(type(yaml_config["outputs"])) + print("yaml_config[\"outputs\"] = ", yaml_config["outputs"]) + if yaml_config["outputs"] == "None": + ret = model.load_onnx(model=yaml_config["model_path"]) + else: + ret = model.load_onnx(model=yaml_config["model_path"], + outputs=yaml_config["outputs"]) + assert ret == 0, "Load model failed!" + + # Build model + ret = model.build(do_quantization=None) + assert ret == 0, "Build model failed!" + + # Init Runtime + ret = model.init_runtime() + assert ret == 0, "Init runtime environment failed!" + + # Export + if not os.path.exists(yaml_config["output_folder"]): + os.mkdir(yaml_config["output_folder"]) + + model_base_name = os.path.basename(yaml_config["model_path"]).split(".")[0] + model_device_name = yaml_config["target_platform"].lower() + model_save_name = model_base_name + "_" + model_device_name + ".rknn" + ret = model.export_rknn( + os.path.join(yaml_config["output_folder"], model_save_name)) + assert ret == 0, "Export rknn model failed!" + print("Export OK!")