diff --git a/CMakeLists.txt b/CMakeLists.txt index 7c2c4707d..9fb46fd4b 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,10 +39,11 @@ if(NOT MSVC) set(CMAKE_CXX_FLAGS "-Wno-format") endif(NOT MSVC) -if(UNIX AND (NOT APPLE) AND (NOT ANDROID)) +if(UNIX AND (NOT APPLE) AND (NOT ANDROID) AND (NOT ENABLE_TIMVX)) include(${PROJECT_SOURCE_DIR}/cmake/patchelf.cmake) endif() + if(ANDROID) # To reduce the volume of the library set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g0 -Os -ffunction-sections -fdata-sections") @@ -69,6 +70,8 @@ option(WITH_LITE_STATIC "Use Paddle Lite static lib for Android." OFF) option(WITH_LITE_FULL_API "Use Paddle Lite full API lib for Android." ON) option(WITH_LITE_FP16 "Use Paddle Lite lib with fp16 enabled for Android." OFF) +option(ENABLE_TIMVX "Whether to compile for TIMVX deploy." OFF) + # Please don't open this flag now, some bugs exists. # Only support Linux Now # option(ENABLE_OPENCV_CUDA "Whether to enable opencv with cuda, this will allow process image with GPU." OFF) @@ -136,6 +139,11 @@ set(HEAD_DIR "${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}") include_directories(${HEAD_DIR}) include_directories(${CMAKE_CURRENT_BINARY_DIR}) +if (ENABLE_TIMVX) + include(${PROJECT_SOURCE_DIR}/cmake/timvx.cmake) +endif() + + if(ANDROID OR IOS) if(ENABLE_ORT_BACKEND) message(FATAL_ERROR "Not support ONNXRuntime backend for Andorid/IOS now. Please set ENABLE_ORT_BACKEND=OFF.") @@ -562,6 +570,7 @@ install( DESTINATION ${CMAKE_INSTALL_PREFIX} ) + install( DIRECTORY ${PROJECT_SOURCE_DIR}/examples DESTINATION ${CMAKE_INSTALL_PREFIX} diff --git a/cmake/opencv.cmake b/cmake/opencv.cmake old mode 100644 new mode 100755 index 90ecfa095..485936de5 --- a/cmake/opencv.cmake +++ b/cmake/opencv.cmake @@ -41,6 +41,8 @@ elseif(IOS) else() if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") set(OPENCV_FILENAME "opencv-linux-aarch64-3.4.14") + elseif(TARGET_ABI MATCHES "armhf") + set(OPENCV_FILENAME "opencv-armv7hf") else() set(OPENCV_FILENAME "opencv-linux-x64-3.4.16") endif() @@ -55,6 +57,8 @@ endif() set(OPENCV_INSTALL_DIR ${THIRD_PARTY_PATH}/install/) if(ANDROID) set(OPENCV_URL_PREFIX "https://bj.bcebos.com/fastdeploy/third_libs") +elseif(TARGET_ABI MATCHES "armhf") + set(OPENCV_URL_PREFIX "https://bj.bcebos.com/fastdeploy/test") else() # TODO: use fastdeploy/third_libs instead. set(OPENCV_URL_PREFIX "https://bj.bcebos.com/paddle2onnx/libs") endif() @@ -181,6 +185,9 @@ else() file(RENAME ${THIRD_PARTY_PATH}/install/${OPENCV_FILENAME}/ ${THIRD_PARTY_PATH}/install/opencv) set(OPENCV_FILENAME opencv) set(OpenCV_DIR ${THIRD_PARTY_PATH}/install/${OPENCV_FILENAME}) + if(TARGET_ABI MATCHES "armhf") + set(OpenCV_DIR ${OpenCV_DIR}/lib/cmake/opencv4) + endif() if (WIN32) set(OpenCV_DIR ${OpenCV_DIR}/build) endif() diff --git a/cmake/paddlelite.cmake b/cmake/paddlelite.cmake old mode 100644 new mode 100755 index 1f807bfce..5eabd59c1 --- a/cmake/paddlelite.cmake +++ b/cmake/paddlelite.cmake @@ -66,6 +66,8 @@ elseif(ANDROID) else() # Linux if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64") set(PADDLELITE_URL "${PADDLELITE_URL_PREFIX}/lite-linux-arm64-20220920.tgz") + elseif(TARGET_ABI MATCHES "armhf") + set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/test/lite-linux_armhf_1101.tgz") else() message(FATAL_ERROR "Only support Linux aarch64 now, x64 is not supported with backend Paddle Lite.") endif() diff --git a/cmake/timvx.cmake b/cmake/timvx.cmake new file mode 100755 index 000000000..153f0de19 --- /dev/null +++ b/cmake/timvx.cmake @@ -0,0 +1,55 @@ +if (NOT DEFINED TARGET_ABI) + set(CMAKE_SYSTEM_NAME Linux) + set(CMAKE_SYSTEM_PROCESSOR arm) + set(CMAKE_C_COMPILER "arm-linux-gnueabihf-gcc") + set(CMAKE_CXX_COMPILER "arm-linux-gnueabihf-g++") + set(CMAKE_CXX_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_CXX_FLAGS}") + set(CMAKE_C_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_C_FLAGS}" ) + set(TARGET_ABI armhf) + set(CMAKE_BUILD_TYPE MinSizeRel) +else() + if(NOT ${ENABLE_LITE_BACKEND}) + message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_LITE_BACKEND=ON") + set(ENABLE_LITE_BACKEND ON) + endif() + if(${ENABLE_PADDLE_FRONTEND}) + message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_PADDLE_FRONTEND=OFF") + set(ENABLE_PADDLE_FRONTEND OFF) + endif() + if(${ENABLE_ORT_BACKEND}) + message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_ORT_BACKEND=OFF") + set(ENABLE_ORT_BACKEND OFF) + endif() + if(${ENABLE_PADDLE_BACKEND}) + message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_PADDLE_BACKEND=OFF") + set(ENABLE_PADDLE_BACKEND OFF) + endif() + if(${ENABLE_OPENVINO_BACKEND}) + message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_OPENVINO_BACKEND=OFF") + set(ENABLE_OPENVINO_BACKEND OFF) + endif() + if(${ENABLE_TRT_BACKEND}) + message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_TRT_BACKEND=OFF") + set(ENABLE_TRT_BACKEND OFF) + endif() + + if(${WITH_GPU}) + message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DWITH_GPU=OFF") + set(WITH_GPU OFF) + endif() + + if(${ENABLE_OPENCV_CUDA}) + message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_OPENCV_CUDA=OFF") + set(ENABLE_OPENCV_CUDA OFF) + endif() + + if(${ENABLE_TEXT}) + set(ENABLE_TEXT OFF CACHE BOOL "Force ENABLE_TEXT OFF" FORCE) + message(STATUS "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_TEXT=OFF") + endif() + if (DEFINED CMAKE_INSTALL_PREFIX) + install(FILES ${PROJECT_SOURCE_DIR}/cmake/timvx.cmake DESTINATION ${CMAKE_INSTALL_PREFIX}) + endif() +endif() + + diff --git a/docs/cn/build_and_install/README.md b/docs/cn/build_and_install/README.md old mode 100644 new mode 100755 index 43b98602d..e45ae819e --- a/docs/cn/build_and_install/README.md +++ b/docs/cn/build_and_install/README.md @@ -5,6 +5,7 @@ - [CPU部署环境编译安装](cpu.md) - [Jetson部署环境编译安装](jetson.md) - [Android平台部署环境编译安装](android.md) +- [瑞芯微RK1126部署环境编译安装](rk1126.md) ## FastDeploy编译选项说明 diff --git a/docs/cn/build_and_install/rk1126.md b/docs/cn/build_and_install/rk1126.md new file mode 100755 index 000000000..4eda01981 --- /dev/null +++ b/docs/cn/build_and_install/rk1126.md @@ -0,0 +1,63 @@ +# 瑞芯微 RK1126 部署环境编译安装 + +FastDeploy基于 Paddle-Lite 后端支持在瑞芯微(Rockchip)Soc 上进行部署推理。 +更多详细的信息请参考:[PaddleLite部署示例](https://paddle-lite.readthedocs.io/zh/develop/demo_guides/verisilicon_timvx.html)。 + +本文档介绍如何编译基于 PaddleLite 的 C++ FastDeploy 交叉编译库。 + +相关编译选项说明如下: +|编译选项|默认值|说明|备注| +|:---|:---|:---|:---| +|ENABLE_LITE_BACKEND|OFF|编译RK库时需要设置为ON| - | + +更多编译选项请参考[FastDeploy编译选项说明](./README.md) + +## 交叉编译环境搭建 + +### 宿主机环境需求 +- os:Ubuntu == 16.04 +- cmake: version >= 3.10.0 + +### 环境搭建 +```bash + # 1. Install basic software +apt update +apt-get install -y --no-install-recommends \ + gcc g++ git make wget python unzip + +# 2. Install arm gcc toolchains +apt-get install -y --no-install-recommends \ + g++-arm-linux-gnueabi gcc-arm-linux-gnueabi \ + g++-arm-linux-gnueabihf gcc-arm-linux-gnueabihf \ + gcc-aarch64-linux-gnu g++-aarch64-linux-gnu + +# 3. Install cmake 3.10 or above +wget -c https://mms-res.cdn.bcebos.com/cmake-3.10.3-Linux-x86_64.tar.gz && \ + tar xzf cmake-3.10.3-Linux-x86_64.tar.gz && \ + mv cmake-3.10.3-Linux-x86_64 /opt/cmake-3.10 && \ + ln -s /opt/cmake-3.10/bin/cmake /usr/bin/cmake && \ + ln -s /opt/cmake-3.10/bin/ccmake /usr/bin/ccmake +``` + +## 基于 PaddleLite 的 FastDeploy 交叉编译库编译 +搭建好交叉编译环境之后,编译命令如下: +```bash +# Download the latest source code +git clone https://github.com/PaddlePaddle/FastDeploy.git +cd FastDeploy +mkdir build && cd build + +# CMake configuration with RK toolchain +cmake -DCMAKE_TOOLCHAIN_FILE=./../cmake/timvx.cmake \ + -DENABLE_TIMVX=ON \ + -DCMAKE_INSTALL_PREFIX=fastdeploy-tmivx \ + -DENABLE_VISION=ON \ # 是否编译集成视觉模型的部署模块,可选择开启 + -Wno-dev .. + +# Build FastDeploy RK1126 C++ SDK +make -j8 +make install +``` +编译完成之后,会生成 fastdeploy-tmivx 目录,表示基于 PadddleLite TIM-VX 的 FastDeploy 库编译完成。 + +RK1126 上部署 PaddleClas 分类模型请参考:[PaddleClas RK1126开发板 C++ 部署示例](../../../examples/vision/classification/paddleclas/rk1126/README.md) diff --git a/examples/vision/classification/paddleclas/rk1126/README.md b/examples/vision/classification/paddleclas/rk1126/README.md new file mode 100755 index 000000000..bac6d7bf8 --- /dev/null +++ b/examples/vision/classification/paddleclas/rk1126/README.md @@ -0,0 +1,11 @@ +# PaddleClas 量化模型在 RK1126 上的部署 +目前 FastDeploy 已经支持基于 PaddleLite 部署 PaddleClas 量化模型到 RK1126 上。 + +模型的量化和量化模型的下载请参考:[模型量化](../quantize/README.md) + + +## 详细部署文档 + +在 RK1126 上只支持 C++ 的部署。 + +- [C++部署](cpp) diff --git a/examples/vision/classification/paddleclas/rk1126/cpp/CMakeLists.txt b/examples/vision/classification/paddleclas/rk1126/cpp/CMakeLists.txt new file mode 100755 index 000000000..baaf8331f --- /dev/null +++ b/examples/vision/classification/paddleclas/rk1126/cpp/CMakeLists.txt @@ -0,0 +1,38 @@ +PROJECT(infer_demo C CXX) +CMAKE_MINIMUM_REQUIRED (VERSION 3.10) + +# 指定下载解压后的fastdeploy库路径 +option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.") + +include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) + +# 添加FastDeploy依赖头文件 +include_directories(${FASTDEPLOY_INCS}) +include_directories(${FastDeploy_INCLUDE_DIRS}) + +add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc) +# 添加FastDeploy库依赖 +target_link_libraries(infer_demo ${FASTDEPLOY_LIBS}) + +set(CMAKE_INSTALL_PREFIX ${CMAKE_SOURCE_DIR}/build/install) + +install(TARGETS infer_demo DESTINATION ./) + +install(DIRECTORY models DESTINATION ./) +install(DIRECTORY images DESTINATION ./) +# install(DIRECTORY run_with_adb.sh DESTINATION ./) + +file(GLOB FASTDEPLOY_LIBS ${FASTDEPLOY_INSTALL_DIR}/lib/*) +install(PROGRAMS ${FASTDEPLOY_LIBS} DESTINATION lib) + +file(GLOB OPENCV_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/opencv/lib/lib*) +install(PROGRAMS ${OPENCV_LIBS} DESTINATION lib) + +file(GLOB PADDLELITE_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/paddlelite/lib/lib*) +install(PROGRAMS ${PADDLELITE_LIBS} DESTINATION lib) + +file(GLOB TIMVX_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/paddlelite/lib/verisilicon_timvx/*) +install(PROGRAMS ${TIMVX_LIBS} DESTINATION lib) + +file(GLOB ADB_TOOLS run_with_adb.sh) +install(PROGRAMS ${ADB_TOOLS} DESTINATION ./) diff --git a/examples/vision/classification/paddleclas/rk1126/cpp/README.md b/examples/vision/classification/paddleclas/rk1126/cpp/README.md new file mode 100755 index 000000000..6d8ecd151 --- /dev/null +++ b/examples/vision/classification/paddleclas/rk1126/cpp/README.md @@ -0,0 +1,53 @@ +# PaddleClas RK1126开发板 C++ 部署示例 +本目录下提供的 `infer.cc`,可以帮助用户快速完成 PaddleClas 量化模型在 RK1126 上的部署推理加速。 + +## 部署准备 +### FastDeploy 交叉编译环境准备 +- 1. 软硬件环境满足要求,以及交叉编译环境的准备,请参考:[FastDeploy 交叉编译环境准备](../../../../../../docs/cn/build_and_install/rk1126.md#交叉编译环境搭建) + +### 量化模型准备 +- 1. 用户可以直接使用由 FastDeploy 提供的量化模型进行部署。 +- 2. 用户可以使用 FastDeploy 提供的[一键模型自动化压缩工具](../../../../../../tools/auto_compression/),自行进行模型量化, 并使用产出的量化模型进行部署。(注意: 推理量化后的分类模型仍然需要FP32模型文件夹下的inference_cls.yaml文件, 自行量化的模型文件夹内不包含此 yaml 文件, 用户从 FP32 模型文件夹下复制此 yaml 文件到量化后的模型文件夹内即可.) +- 更多量化相关相关信息可查阅[模型量化](../../quantize/README.md) + +## 在 RK1126 上部署量化后的 ResNet50_Vd 分类模型 +请按照以下步骤完成在 RK1126 上部署 ResNet50_Vd 量化模型: +1. 交叉编译编译 FastDeploy 库,具体请参考:[交叉编译 FastDeploy](../../../../../../docs/cn/build_and_install/rk1126.md#基于-paddlelite-的-fastdeploy-交叉编译库编译) + +2. 将编译后的库拷贝到当前目录,可使用如下命令: +```bash +cp -r FastDeploy/build/fastdeploy-tmivx/ FastDeploy/examples/vision/classification/paddleclas/rk1126/cpp/ +``` + +3. 在当前路径下载部署所需的模型和示例图片: +```bash +mkdir models && mkdir images +wget https://bj.bcebos.com/paddlehub/fastdeploy/ResNet50_vd_infer.tgz +tar -xvf ResNet50_vd_infer.tgz +cp -r ResNet50_vd_infer models +wget https://gitee.com/paddlepaddle/PaddleClas/raw/release/2.4/deploy/images/ImageNet/ILSVRC2012_val_00000010.jpeg +cp -r ILSVRC2012_val_00000010.jpeg images +``` + +4. 编译部署示例,可使入如下命令: +```bash +mkdir build && cd build +cmake -DCMAKE_TOOLCHAIN_FILE=../fastdeploy-tmivx/timvx.cmake -DFASTDEPLOY_INSTALL_DIR=fastdeploy-tmivx .. +make -j8 +make install +# 成功编译之后,会生成 install 文件夹,里面有一个运行 demo 和部署所需的库 +``` + +5. 基于 adb 工具部署 ResNet50_vd 分类模型到 Rockchip RV1126,可使用如下命令: +```bash +# 进入 install 目录 +cd FastDeploy/examples/vision/classification/paddleclas/rk1126/cpp/build/install/ +# 如下命令表示:bash run_with_adb.sh 需要运行的demo 模型路径 图片路径 设备的DEVICE_ID +bash run_with_adb.sh infer_demo ResNet50_vd_infer ILSVRC2012_val_00000010.jpeg $DEVICE_ID +``` + +部署成功后运行结果如下: + + + +需要特别注意的是,在 RK1126 上部署的模型需要是量化后的模型,模型的量化请参考:[模型量化](../../../../../../docs/cn/quantize.md) diff --git a/examples/vision/classification/paddleclas/rk1126/cpp/infer.cc b/examples/vision/classification/paddleclas/rk1126/cpp/infer.cc new file mode 100755 index 000000000..1d412776a --- /dev/null +++ b/examples/vision/classification/paddleclas/rk1126/cpp/infer.cc @@ -0,0 +1,61 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include "fastdeploy/vision.h" +#ifdef WIN32 +const char sep = '\\'; +#else +const char sep = '/'; +#endif + +void InitAndInfer(const std::string& model_dir, const std::string& image_file) { + auto model_file = model_dir + sep + "inference.pdmodel"; + auto params_file = model_dir + sep + "inference.pdiparams"; + auto config_file = model_dir + sep + "inference_cls.yaml"; + + fastdeploy::RuntimeOption option; + option.UseTimVX(); + + auto model = fastdeploy::vision::classification::PaddleClasModel( + model_file, params_file, config_file, option); + + assert(model.Initialized()); + + auto im = cv::imread(image_file); + + fastdeploy::vision::ClassifyResult res; + if (!model.Predict(&im, &res)) { + std::cerr << "Failed to predict." << std::endl; + return; + } + + std::cout << res.Str() << std::endl; + +} + +int main(int argc, char* argv[]) { + if (argc < 3) { + std::cout << "Usage: infer_demo path/to/quant_model " + "path/to/image " + "run_option, " + "e.g ./infer_demo ./ResNet50_vd_quant ./test.jpeg" + << std::endl; + return -1; + } + + std::string model_dir = argv[1]; + std::string test_image = argv[2]; + InitAndInfer(model_dir, test_image); + return 0; +} diff --git a/examples/vision/classification/paddleclas/rk1126/cpp/run_with_adb.sh b/examples/vision/classification/paddleclas/rk1126/cpp/run_with_adb.sh new file mode 100755 index 000000000..aacaed4c5 --- /dev/null +++ b/examples/vision/classification/paddleclas/rk1126/cpp/run_with_adb.sh @@ -0,0 +1,47 @@ +#!/bin/bash +HOST_SPACE=${PWD} +echo ${HOST_SPACE} +WORK_SPACE=/data/local/tmp/test + +# The first parameter represents the demo name +DEMO_NAME=image_classification_demo +if [ -n "$1" ]; then + DEMO_NAME=$1 +fi + +# The second parameter represents the model name +MODEL_NAME=mobilenet_v1_fp32_224 +if [ -n "$2" ]; then + MODEL_NAME=$2 +fi + +# The third parameter indicates the name of the image to be tested +IMAGE_NAME=0001.jpg +if [ -n "$3" ]; then + IMAGE_NAME=$3 +fi + +# The fourth parameter represents the ID of the device +ADB_DEVICE_NAME= +if [ -n "$4" ]; then + ADB_DEVICE_NAME="-s $4" +fi + +# Set the environment variables required during the running process +EXPORT_ENVIRONMENT_VARIABLES="export GLOG_v=5; export VIV_VX_ENABLE_GRAPH_TRANSFORM=-pcq:1; export VIV_VX_SET_PER_CHANNEL_ENTROPY=100; export TIMVX_BATCHNORM_FUSION_MAX_ALLOWED_QUANT_SCALE_DEVIATION=300000; export VSI_NN_LOG_LEVEL=5;" + +EXPORT_ENVIRONMENT_VARIABLES="${EXPORT_ENVIRONMENT_VARIABLES}export LD_LIBRARY_PATH=${WORK_SPACE}/lib:\$LD_LIBRARY_PATH;" + +# Please install adb, and DON'T run this in the docker. +set -e +adb $ADB_DEVICE_NAME shell "rm -rf $WORK_SPACE" +adb $ADB_DEVICE_NAME shell "mkdir -p $WORK_SPACE" + +# Upload the demo, librarys, model and test images to the device +adb $ADB_DEVICE_NAME push ${HOST_SPACE}/lib $WORK_SPACE +adb $ADB_DEVICE_NAME push ${HOST_SPACE}/${DEMO_NAME} $WORK_SPACE +adb $ADB_DEVICE_NAME push models $WORK_SPACE +adb $ADB_DEVICE_NAME push images $WORK_SPACE + +# Execute the deployment demo +adb $ADB_DEVICE_NAME shell "cd $WORK_SPACE; ${EXPORT_ENVIRONMENT_VARIABLES} chmod +x ./${DEMO_NAME}; ./${DEMO_NAME} ./models/${MODEL_NAME} ./images/$IMAGE_NAME" diff --git a/fastdeploy/backends/lite/lite_backend.cc b/fastdeploy/backends/lite/lite_backend.cc old mode 100644 new mode 100755 index a43347715..bdfad299c --- a/fastdeploy/backends/lite/lite_backend.cc +++ b/fastdeploy/backends/lite/lite_backend.cc @@ -63,6 +63,24 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) { FDWARNING << "This device is not supported fp16, will skip fp16 option."; } } + if (!option_.nnadapter_subgraph_partition_config_path.empty()) { + std::vector nnadapter_subgraph_partition_config_buffer; + if (ReadFile(option_.nnadapter_subgraph_partition_config_path, &nnadapter_subgraph_partition_config_buffer, false)) { + if (!nnadapter_subgraph_partition_config_buffer.empty()) { + std::string nnadapter_subgraph_partition_config_string(nnadapter_subgraph_partition_config_buffer.data(), nnadapter_subgraph_partition_config_buffer.size()); + config_.set_nnadapter_subgraph_partition_config_buffer(nnadapter_subgraph_partition_config_string); + } + } + } + if(option_.enable_timvx){ + config_.set_nnadapter_device_names({"verisilicon_timvx"}); + valid_places.push_back( + paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)}); + valid_places.push_back( + paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)}); + valid_places.push_back( + paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)}); + } valid_places.push_back( paddle::lite_api::Place{TARGET(kARM), PRECISION(kFloat)}); config_.set_valid_places(valid_places); @@ -75,6 +93,30 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) { } } +bool LiteBackend::ReadFile(const std::string& filename, + std::vector* contents, + const bool binary) { + FILE *fp = fopen(filename.c_str(), binary ? "rb" : "r"); + if (!fp){ + FDERROR << "Cannot open file " << filename << "." << std::endl; + return false; + } + fseek(fp, 0, SEEK_END); + size_t size = ftell(fp); + fseek(fp, 0, SEEK_SET); + contents->clear(); + contents->resize(size); + size_t offset = 0; + char *ptr = reinterpret_cast(&(contents->at(0))); + while (offset < size) { + size_t already_read = fread(ptr, 1, size - offset, fp); + offset += already_read; + ptr += already_read; + } + fclose(fp); + return true; +} + bool LiteBackend::InitFromPaddle(const std::string& model_file, const std::string& params_file, const LiteBackendOption& option) { diff --git a/fastdeploy/backends/lite/lite_backend.h b/fastdeploy/backends/lite/lite_backend.h old mode 100644 new mode 100755 index fa18cfe47..2922d4ea3 --- a/fastdeploy/backends/lite/lite_backend.h +++ b/fastdeploy/backends/lite/lite_backend.h @@ -43,6 +43,8 @@ struct LiteBackendOption { std::string optimized_model_dir = ""; // TODO(qiuyanjun): support more options for lite backend. // Such as fp16, different device target (kARM/kXPU/kNPU/...) + std::string nnadapter_subgraph_partition_config_path = ""; + bool enable_timvx = false; }; // Convert data type from paddle lite to fastdeploy @@ -77,5 +79,8 @@ class LiteBackend : public BaseBackend { std::map inputs_order_; LiteBackendOption option_; bool supported_fp16_ = false; + bool ReadFile(const std::string& filename, + std::vector* contents, + const bool binary = true); }; } // namespace fastdeploy diff --git a/fastdeploy/core/fd_type.cc b/fastdeploy/core/fd_type.cc old mode 100644 new mode 100755 index 5712bb278..ba3319dbc --- a/fastdeploy/core/fd_type.cc +++ b/fastdeploy/core/fd_type.cc @@ -59,6 +59,9 @@ std::string Str(const Device& d) { case Device::IPU: out = "Device::IPU"; break; + case Device::TIMVX: + out = "Device::TIMVX"; + break; default: out = "Device::UNKOWN"; } @@ -76,6 +79,9 @@ std::ostream& operator<<(std::ostream& out,const Device& d){ case Device::RKNPU: out << "Device::RKNPU"; break; + case Device::TIMVX: + out << "Device::TIMVX"; + break; default: out << "Device::UNKOWN"; } diff --git a/fastdeploy/core/fd_type.h b/fastdeploy/core/fd_type.h old mode 100644 new mode 100755 index 131de20d4..ba18d8e36 --- a/fastdeploy/core/fd_type.h +++ b/fastdeploy/core/fd_type.h @@ -22,7 +22,7 @@ namespace fastdeploy { -enum FASTDEPLOY_DECL Device { CPU, GPU, RKNPU, IPU}; +enum FASTDEPLOY_DECL Device { CPU, GPU, RKNPU, IPU, TIMVX}; FASTDEPLOY_DECL std::string Str(const Device& d); diff --git a/fastdeploy/fastdeploy_model.cc b/fastdeploy/fastdeploy_model.cc index 828bc4878..ad4a61c06 100755 --- a/fastdeploy/fastdeploy_model.cc +++ b/fastdeploy/fastdeploy_model.cc @@ -42,6 +42,7 @@ bool FastDeployModel::InitRuntime() { use_ipu = false; #endif bool use_rknpu = (runtime_option.device == Device::RKNPU); + bool use_timvx = (runtime_option.device == Device::TIMVX); // whether the model is supported by the setted backend bool is_supported = false; @@ -59,7 +60,14 @@ bool FastDeployModel::InitRuntime() { break; } } - } else if(use_ipu) { + } else if (use_timvx) { + for (auto& item : valid_timvx_backends) { + if (item == runtime_option.backend) { + is_supported = true; + break; + } + } + }else if(use_ipu) { for (auto& item : valid_ipu_backends) { if (item == runtime_option.backend) { is_supported = true; @@ -111,6 +119,8 @@ bool FastDeployModel::InitRuntime() { #endif } else if (runtime_option.device == Device::RKNPU) { return CreateRKNPUBackend(); + } else if (runtime_option.device == Device::TIMVX) { + return CreateTimVXBackend(); } else if (runtime_option.device == Device::IPU) { #ifdef WITH_IPU return CreateIpuBackend(); @@ -195,6 +205,29 @@ bool FastDeployModel::CreateRKNPUBackend() { return false; } +bool FastDeployModel::CreateTimVXBackend() { + if (valid_timvx_backends.size() == 0) { + FDERROR << "There's no valid timvx backends for model: " << ModelName() + << std::endl; + return false; + } + + for (size_t i = 0; i < valid_timvx_backends.size(); ++i) { + if (!IsBackendAvailable(valid_timvx_backends[i])) { + continue; + } + runtime_option.backend = valid_timvx_backends[i]; + runtime_ = std::unique_ptr(new Runtime()); + if (!runtime_->Init(runtime_option)) { + return false; + } + runtime_initialized_ = true; + return true; + } + FDERROR << "Found no valid backend for model: " << ModelName() << std::endl; + return false; +} + bool FastDeployModel::CreateIpuBackend() { if (valid_ipu_backends.size() == 0) { FDERROR << "There's no valid ipu backends for model: " << ModelName() diff --git a/fastdeploy/fastdeploy_model.h b/fastdeploy/fastdeploy_model.h index 75e67578e..21c4a44a1 100755 --- a/fastdeploy/fastdeploy_model.h +++ b/fastdeploy/fastdeploy_model.h @@ -42,6 +42,9 @@ class FASTDEPLOY_DECL FastDeployModel { /** Model's valid ipu backends. This member defined all the ipu backends have successfully tested for the model */ std::vector valid_ipu_backends = {Backend::PDINFER}; + /** Model's valid timvx backends. This member defined all the timvx backends have successfully tested for the model + */ + std::vector valid_timvx_backends = {}; /** Model's valid hardware backends. This member defined all the gpu backends have successfully tested for the model @@ -117,6 +120,7 @@ class FASTDEPLOY_DECL FastDeployModel { virtual bool CreateGpuBackend(); virtual bool CreateIpuBackend(); virtual bool CreateRKNPUBackend(); + virtual bool CreateTimVXBackend(); bool initialized = false; std::vector valid_external_backends_; diff --git a/fastdeploy/runtime.cc b/fastdeploy/runtime.cc index 4dd1bac59..73edafc43 100755 --- a/fastdeploy/runtime.cc +++ b/fastdeploy/runtime.cc @@ -230,6 +230,12 @@ void RuntimeOption::UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name, device = Device::RKNPU; } +void RuntimeOption::UseTimVX() { + enable_timvx = true; + device = Device::TIMVX; + UseLiteBackend(); +} + void RuntimeOption::SetExternalStream(void* external_stream) { external_stream_ = external_stream; } @@ -348,6 +354,11 @@ void RuntimeOption::SetLiteOptimizedModelDir( lite_optimized_model_dir = optimized_model_dir; } +void RuntimeOption::SetLiteSubgraphPartitionPath( + const std::string& nnadapter_subgraph_partition_config_path) { + lite_nnadapter_subgraph_partition_config_path = nnadapter_subgraph_partition_config_path; +} + void RuntimeOption::SetTrtInputShape(const std::string& input_name, const std::vector& min_shape, const std::vector& opt_shape, @@ -516,8 +527,8 @@ bool Runtime::Init(const RuntimeOption& _option) { FDINFO << "Runtime initialized with Backend::OPENVINO in " << Str(option.device) << "." << std::endl; } else if (option.backend == Backend::LITE) { - FDASSERT(option.device == Device::CPU, - "Backend::LITE only supports Device::CPU"); + FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX, + "Backend::LITE only supports Device::CPU/Device::TIMVX."); CreateLiteBackend(); FDINFO << "Runtime initialized with Backend::LITE in " << Str(option.device) << "." << std::endl; @@ -726,6 +737,8 @@ void Runtime::CreateLiteBackend() { lite_option.enable_fp16 = option.lite_enable_fp16; lite_option.power_mode = static_cast(option.lite_power_mode); lite_option.optimized_model_dir = option.lite_optimized_model_dir; + lite_option.nnadapter_subgraph_partition_config_path = option.lite_nnadapter_subgraph_partition_config_path; + lite_option.enable_timvx = option.enable_timvx; FDASSERT(option.model_format == ModelFormat::PADDLE, "LiteBackend only support model format of ModelFormat::PADDLE"); backend_ = utils::make_unique(); diff --git a/fastdeploy/runtime.h b/fastdeploy/runtime.h old mode 100644 new mode 100755 index 8330f412e..dc9732624 --- a/fastdeploy/runtime.h +++ b/fastdeploy/runtime.h @@ -99,6 +99,9 @@ struct FASTDEPLOY_DECL RuntimeOption { fastdeploy::rknpu2::CoreMask rknpu2_core = fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0); + /// Use TimVX to inference + void UseTimVX(); + void SetExternalStream(void* external_stream); /* @@ -160,6 +163,12 @@ struct FASTDEPLOY_DECL RuntimeOption { */ void SetLiteOptimizedModelDir(const std::string& optimized_model_dir); + /** + * @brief Set nnadapter subgraph partition path for Paddle Lite backend. + */ + void SetLiteSubgraphPartitionPath( + const std::string& nnadapter_subgraph_partition_config_path); + /** * @brief enable half precision while use paddle lite backend */ @@ -312,6 +321,8 @@ struct FASTDEPLOY_DECL RuntimeOption { bool lite_enable_fp16 = false; // optimized model dir for CxxConfig std::string lite_optimized_model_dir = ""; + std::string lite_nnadapter_subgraph_partition_config_path = ""; + bool enable_timvx = false; // ======Only for Trt Backend======= std::map> trt_max_shape; diff --git a/fastdeploy/vision/classification/ppcls/model.cc b/fastdeploy/vision/classification/ppcls/model.cc index a9a8182e3..160c9ae85 100755 --- a/fastdeploy/vision/classification/ppcls/model.cc +++ b/fastdeploy/vision/classification/ppcls/model.cc @@ -26,6 +26,8 @@ PaddleClasModel::PaddleClasModel(const std::string& model_file, valid_cpu_backends = {Backend::ORT, Backend::OPENVINO, Backend::PDINFER, Backend::LITE}; valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT}; + valid_timvx_backends = {Backend::LITE}; + runtime_option = custom_option; runtime_option.model_format = model_format; runtime_option.model_file = model_file; diff --git a/fastdeploy/vision/tracking/pptracking/model.cc b/fastdeploy/vision/tracking/pptracking/model.cc old mode 100644 new mode 100755