[Backend] Add Rockchip RV1126 deploy support through PaddleLite TIM-VX (#439)

* add rk1126 support * update lib * fix compile bugs * update doc * fix complie bug * update doc * update doc * update code * support model bigger than 2G * update code * update code * update code * update doc * update code * fix bug * update code * update code * update code * update doc * update info * code style check * update code * update doc Co-authored-by: Jason <jiangjiajun@baidu.com>
2025-10-31 11:56:44 +08:00 · 2022-11-10 10:05:47 +08:00
parent ca0de42d0b
commit fa807340be
21 changed files with 468 additions and 5 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -39,10 +39,11 @@ if(NOT MSVC)
  set(CMAKE_CXX_FLAGS "-Wno-format")
 endif(NOT MSVC)

-if(UNIX AND (NOT APPLE) AND (NOT ANDROID))
+if(UNIX AND (NOT APPLE) AND (NOT ANDROID) AND (NOT ENABLE_TIMVX))
  include(${PROJECT_SOURCE_DIR}/cmake/patchelf.cmake)
 endif()

+
 if(ANDROID)
  # To reduce the volume of the library
  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g0 -Os -ffunction-sections -fdata-sections")
@@ -69,6 +70,8 @@ option(WITH_LITE_STATIC "Use Paddle Lite static lib for Android." OFF)
 option(WITH_LITE_FULL_API "Use Paddle Lite full API lib for Android." ON)
 option(WITH_LITE_FP16 "Use Paddle Lite lib with fp16 enabled for Android." OFF)

+option(ENABLE_TIMVX "Whether to compile for TIMVX deploy." OFF)
+
 # Please don't open this flag now, some bugs exists.
 # Only support Linux Now
 # option(ENABLE_OPENCV_CUDA "Whether to enable opencv with cuda, this will allow process image with GPU." OFF)
@@ -136,6 +139,11 @@ set(HEAD_DIR "${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}")
 include_directories(${HEAD_DIR})
 include_directories(${CMAKE_CURRENT_BINARY_DIR})

+if (ENABLE_TIMVX)
+  include(${PROJECT_SOURCE_DIR}/cmake/timvx.cmake)
+endif()
+
+
 if(ANDROID OR IOS)
  if(ENABLE_ORT_BACKEND)
    message(FATAL_ERROR "Not support ONNXRuntime backend for Andorid/IOS now. Please set ENABLE_ORT_BACKEND=OFF.")
@@ -562,6 +570,7 @@ install(
  DESTINATION ${CMAKE_INSTALL_PREFIX}
 )

+
 install(
  DIRECTORY ${PROJECT_SOURCE_DIR}/examples
  DESTINATION ${CMAKE_INSTALL_PREFIX}
--- a/cmake/opencv.cmake
+++ b/cmake/opencv.cmake
@@ -41,6 +41,8 @@ elseif(IOS)
 else()
  if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
    set(OPENCV_FILENAME "opencv-linux-aarch64-3.4.14")
+  elseif(TARGET_ABI MATCHES "armhf")
+    set(OPENCV_FILENAME "opencv-armv7hf")
  else()
    set(OPENCV_FILENAME "opencv-linux-x64-3.4.16")
  endif()
@@ -55,6 +57,8 @@ endif()
 set(OPENCV_INSTALL_DIR ${THIRD_PARTY_PATH}/install/)
 if(ANDROID)
  set(OPENCV_URL_PREFIX "https://bj.bcebos.com/fastdeploy/third_libs")
+elseif(TARGET_ABI MATCHES "armhf")
+  set(OPENCV_URL_PREFIX "https://bj.bcebos.com/fastdeploy/test")
 else() # TODO: use fastdeploy/third_libs instead.
  set(OPENCV_URL_PREFIX "https://bj.bcebos.com/paddle2onnx/libs")
 endif()
@@ -181,6 +185,9 @@ else()
    file(RENAME ${THIRD_PARTY_PATH}/install/${OPENCV_FILENAME}/ ${THIRD_PARTY_PATH}/install/opencv)
    set(OPENCV_FILENAME opencv)
    set(OpenCV_DIR ${THIRD_PARTY_PATH}/install/${OPENCV_FILENAME})
+    if(TARGET_ABI MATCHES "armhf")
+      set(OpenCV_DIR ${OpenCV_DIR}/lib/cmake/opencv4)
+    endif()
    if (WIN32)
      set(OpenCV_DIR ${OpenCV_DIR}/build)
    endif()
--- a/cmake/paddlelite.cmake
+++ b/cmake/paddlelite.cmake
@@ -66,6 +66,8 @@ elseif(ANDROID)
 else() # Linux
  if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
    set(PADDLELITE_URL "${PADDLELITE_URL_PREFIX}/lite-linux-arm64-20220920.tgz")
+  elseif(TARGET_ABI MATCHES "armhf")
+    set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/test/lite-linux_armhf_1101.tgz")
  else()
    message(FATAL_ERROR "Only support Linux aarch64 now, x64 is not supported with backend Paddle Lite.")
  endif()
--- a/cmake/timvx.cmake
+++ b/cmake/timvx.cmake
@@ -0,0 +1,55 @@
+if (NOT DEFINED TARGET_ABI)
+    set(CMAKE_SYSTEM_NAME Linux)
+    set(CMAKE_SYSTEM_PROCESSOR arm)
+    set(CMAKE_C_COMPILER "arm-linux-gnueabihf-gcc")
+    set(CMAKE_CXX_COMPILER "arm-linux-gnueabihf-g++")
+    set(CMAKE_CXX_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_CXX_FLAGS}")
+    set(CMAKE_C_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_C_FLAGS}" )
+    set(TARGET_ABI armhf)
+    set(CMAKE_BUILD_TYPE MinSizeRel)
+else()
+    if(NOT ${ENABLE_LITE_BACKEND})
+        message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_LITE_BACKEND=ON")
+        set(ENABLE_LITE_BACKEND ON)
+    endif()
+    if(${ENABLE_PADDLE_FRONTEND})
+        message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_PADDLE_FRONTEND=OFF")
+        set(ENABLE_PADDLE_FRONTEND OFF)
+    endif()
+    if(${ENABLE_ORT_BACKEND})
+        message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_ORT_BACKEND=OFF")
+        set(ENABLE_ORT_BACKEND OFF)
+    endif()
+    if(${ENABLE_PADDLE_BACKEND})
+        message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_PADDLE_BACKEND=OFF")
+        set(ENABLE_PADDLE_BACKEND OFF)
+    endif()
+    if(${ENABLE_OPENVINO_BACKEND})
+        message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_OPENVINO_BACKEND=OFF")
+        set(ENABLE_OPENVINO_BACKEND OFF)
+    endif()
+    if(${ENABLE_TRT_BACKEND})
+        message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_TRT_BACKEND=OFF")
+        set(ENABLE_TRT_BACKEND OFF)
+    endif()
+
+    if(${WITH_GPU})
+        message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DWITH_GPU=OFF")
+        set(WITH_GPU OFF)
+    endif()
+
+    if(${ENABLE_OPENCV_CUDA})
+        message(WARNING "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_OPENCV_CUDA=OFF") 
+        set(ENABLE_OPENCV_CUDA OFF) 
+    endif()
+
+    if(${ENABLE_TEXT})
+        set(ENABLE_TEXT OFF CACHE BOOL "Force ENABLE_TEXT OFF" FORCE)
+        message(STATUS "While compiling with -DWITH_TIMVX=ON, will force to set -DENABLE_TEXT=OFF")
+    endif()
+    if (DEFINED CMAKE_INSTALL_PREFIX)
+        install(FILES ${PROJECT_SOURCE_DIR}/cmake/timvx.cmake DESTINATION ${CMAKE_INSTALL_PREFIX})
+    endif()
+endif()
+
+
--- a/docs/cn/build_and_install/README.md
+++ b/docs/cn/build_and_install/README.md
@@ -5,6 +5,7 @@
 - [CPU部署环境编译安装](cpu.md)
 - [Jetson部署环境编译安装](jetson.md)
 - [Android平台部署环境编译安装](android.md)
+- [瑞芯微RK1126部署环境编译安装](rk1126.md)


 ## FastDeploy编译选项说明
--- a/docs/cn/build_and_install/rk1126.md
+++ b/docs/cn/build_and_install/rk1126.md
@@ -0,0 +1,63 @@
+# 瑞芯微 RK1126 部署环境编译安装
+
+FastDeploy基于 Paddle-Lite 后端支持在瑞芯微（Rockchip）Soc 上进行部署推理。
+更多详细的信息请参考：[PaddleLite部署示例](https://paddle-lite.readthedocs.io/zh/develop/demo_guides/verisilicon_timvx.html)。
+
+本文档介绍如何编译基于 PaddleLite 的 C++ FastDeploy 交叉编译库。
+
+相关编译选项说明如下：  
+|编译选项|默认值|说明|备注|  
+|:---|:---|:---|:---|  
+|ENABLE_LITE_BACKEND|OFF|编译RK库时需要设置为ON| - |
+
+更多编译选项请参考[FastDeploy编译选项说明](./README.md)
+
+## 交叉编译环境搭建
+
+### 宿主机环境需求  
+- os：Ubuntu == 16.04
+- cmake： version >= 3.10.0  
+
+### 环境搭建
+```bash
+ # 1. Install basic software
+apt update
+apt-get install -y --no-install-recommends \
+  gcc g++ git make wget python unzip
+
+# 2. Install arm gcc toolchains
+apt-get install -y --no-install-recommends \
+  g++-arm-linux-gnueabi gcc-arm-linux-gnueabi \
+  g++-arm-linux-gnueabihf gcc-arm-linux-gnueabihf \
+  gcc-aarch64-linux-gnu g++-aarch64-linux-gnu
+
+# 3. Install cmake 3.10 or above
+wget -c https://mms-res.cdn.bcebos.com/cmake-3.10.3-Linux-x86_64.tar.gz && \
+  tar xzf cmake-3.10.3-Linux-x86_64.tar.gz && \
+  mv cmake-3.10.3-Linux-x86_64 /opt/cmake-3.10 && \
+  ln -s /opt/cmake-3.10/bin/cmake /usr/bin/cmake && \
+  ln -s /opt/cmake-3.10/bin/ccmake /usr/bin/ccmake
+```
+
+## 基于 PaddleLite 的 FastDeploy 交叉编译库编译
+搭建好交叉编译环境之后，编译命令如下：
+```bash
+# Download the latest source code
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy  
+mkdir build && cd build
+
+# CMake configuration with RK toolchain
+cmake -DCMAKE_TOOLCHAIN_FILE=./../cmake/timvx.cmake \
+      -DENABLE_TIMVX=ON  \
+      -DCMAKE_INSTALL_PREFIX=fastdeploy-tmivx \
+      -DENABLE_VISION=ON \ # 是否编译集成视觉模型的部署模块，可选择开启
+      -Wno-dev ..
+
+# Build FastDeploy RK1126 C++ SDK
+make -j8
+make install
+```  
+编译完成之后，会生成 fastdeploy-tmivx 目录，表示基于 PadddleLite TIM-VX 的 FastDeploy 库编译完成。
+
+RK1126 上部署 PaddleClas 分类模型请参考：[PaddleClas RK1126开发板 C++ 部署示例](../../../examples/vision/classification/paddleclas/rk1126/README.md)
--- a/examples/vision/classification/paddleclas/rk1126/README.md
+++ b/examples/vision/classification/paddleclas/rk1126/README.md
@@ -0,0 +1,11 @@
+# PaddleClas 量化模型在 RK1126 上的部署
+目前 FastDeploy 已经支持基于 PaddleLite 部署 PaddleClas 量化模型到 RK1126 上。
+
+模型的量化和量化模型的下载请参考：[模型量化](../quantize/README.md)
+
+
+## 详细部署文档
+
+在 RK1126 上只支持 C++ 的部署。
+
+- [C++部署](cpp)
--- a/examples/vision/classification/paddleclas/rk1126/cpp/CMakeLists.txt
+++ b/examples/vision/classification/paddleclas/rk1126/cpp/CMakeLists.txt
@@ -0,0 +1,38 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+# 指定下载解压后的fastdeploy库路径
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+# 添加FastDeploy依赖头文件
+include_directories(${FASTDEPLOY_INCS})
+include_directories(${FastDeploy_INCLUDE_DIRS})
+
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
+# 添加FastDeploy库依赖
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
+
+set(CMAKE_INSTALL_PREFIX ${CMAKE_SOURCE_DIR}/build/install)
+
+install(TARGETS infer_demo DESTINATION ./)
+
+install(DIRECTORY models DESTINATION ./)
+install(DIRECTORY images DESTINATION ./)
+# install(DIRECTORY run_with_adb.sh DESTINATION ./)
+
+file(GLOB FASTDEPLOY_LIBS ${FASTDEPLOY_INSTALL_DIR}/lib/*)
+install(PROGRAMS ${FASTDEPLOY_LIBS} DESTINATION lib)
+
+file(GLOB OPENCV_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/opencv/lib/lib*)
+install(PROGRAMS ${OPENCV_LIBS} DESTINATION lib)
+
+file(GLOB PADDLELITE_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/paddlelite/lib/lib*)
+install(PROGRAMS ${PADDLELITE_LIBS} DESTINATION lib)
+
+file(GLOB TIMVX_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/paddlelite/lib/verisilicon_timvx/*)
+install(PROGRAMS ${TIMVX_LIBS} DESTINATION lib)
+
+file(GLOB ADB_TOOLS run_with_adb.sh)
+install(PROGRAMS ${ADB_TOOLS} DESTINATION ./)
--- a/examples/vision/classification/paddleclas/rk1126/cpp/README.md
+++ b/examples/vision/classification/paddleclas/rk1126/cpp/README.md
@@ -0,0 +1,53 @@
+# PaddleClas RK1126开发板 C++ 部署示例
+本目录下提供的 `infer.cc`，可以帮助用户快速完成 PaddleClas 量化模型在 RK1126 上的部署推理加速。
+
+## 部署准备
+### FastDeploy 交叉编译环境准备
+- 1. 软硬件环境满足要求，以及交叉编译环境的准备，请参考：[FastDeploy 交叉编译环境准备](../../../../../../docs/cn/build_and_install/rk1126.md#交叉编译环境搭建)  
+
+### 量化模型准备
+- 1. 用户可以直接使用由 FastDeploy 提供的量化模型进行部署。
+- 2. 用户可以使用 FastDeploy 提供的[一键模型自动化压缩工具](../../../../../../tools/auto_compression/)，自行进行模型量化, 并使用产出的量化模型进行部署。(注意: 推理量化后的分类模型仍然需要FP32模型文件夹下的inference_cls.yaml文件, 自行量化的模型文件夹内不包含此 yaml 文件, 用户从 FP32 模型文件夹下复制此 yaml 文件到量化后的模型文件夹内即可.)
+- 更多量化相关相关信息可查阅[模型量化](../../quantize/README.md)
+
+## 在 RK1126 上部署量化后的 ResNet50_Vd 分类模型
+请按照以下步骤完成在 RK1126 上部署 ResNet50_Vd 量化模型：
+1. 交叉编译编译 FastDeploy 库，具体请参考：[交叉编译 FastDeploy](../../../../../../docs/cn/build_and_install/rk1126.md#基于-paddlelite-的-fastdeploy-交叉编译库编译)
+
+2. 将编译后的库拷贝到当前目录，可使用如下命令：
+```bash
+cp -r FastDeploy/build/fastdeploy-tmivx/ FastDeploy/examples/vision/classification/paddleclas/rk1126/cpp/
+```
+
+3. 在当前路径下载部署所需的模型和示例图片：
+```bash
+mkdir models && mkdir images
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ResNet50_vd_infer.tgz
+tar -xvf ResNet50_vd_infer.tgz
+cp -r ResNet50_vd_infer models
+wget https://gitee.com/paddlepaddle/PaddleClas/raw/release/2.4/deploy/images/ImageNet/ILSVRC2012_val_00000010.jpeg
+cp -r ILSVRC2012_val_00000010.jpeg images
+```
+
+4. 编译部署示例，可使入如下命令：
+```bash
+mkdir build && cd build
+cmake -DCMAKE_TOOLCHAIN_FILE=../fastdeploy-tmivx/timvx.cmake -DFASTDEPLOY_INSTALL_DIR=fastdeploy-tmivx ..
+make -j8
+make install
+# 成功编译之后，会生成 install 文件夹，里面有一个运行 demo 和部署所需的库
+```
+
+5. 基于 adb 工具部署 ResNet50_vd 分类模型到 Rockchip RV1126，可使用如下命令：
+```bash
+# 进入 install 目录
+cd FastDeploy/examples/vision/classification/paddleclas/rk1126/cpp/build/install/
+# 如下命令表示：bash run_with_adb.sh 需要运行的demo 模型路径 图片路径 设备的DEVICE_ID
+bash run_with_adb.sh infer_demo ResNet50_vd_infer ILSVRC2012_val_00000010.jpeg $DEVICE_ID
+```
+
+部署成功后运行结果如下：
+
+<img width="640" src="https://user-images.githubusercontent.com/30516196/200767389-26519e50-9e4f-4fe1-8d52-260718f73476.png">
+
+需要特别注意的是，在 RK1126 上部署的模型需要是量化后的模型，模型的量化请参考：[模型量化](../../../../../../docs/cn/quantize.md)
--- a/examples/vision/classification/paddleclas/rk1126/cpp/infer.cc
+++ b/examples/vision/classification/paddleclas/rk1126/cpp/infer.cc
@@ -0,0 +1,61 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <string>
+#include "fastdeploy/vision.h"
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void InitAndInfer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + sep + "inference.pdmodel";
+  auto params_file = model_dir + sep + "inference.pdiparams";
+  auto config_file = model_dir + sep + "inference_cls.yaml";
+  
+  fastdeploy::RuntimeOption option;
+  option.UseTimVX();
+
+  auto model = fastdeploy::vision::classification::PaddleClasModel(
+      model_file, params_file, config_file, option);
+
+  assert(model.Initialized());
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::ClassifyResult res;
+  if (!model.Predict(&im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << res.Str() << std::endl;
+
+}
+
+int main(int argc, char* argv[]) {
+  if (argc < 3) {
+    std::cout << "Usage: infer_demo path/to/quant_model "
+                 "path/to/image "
+                 "run_option, "
+                 "e.g ./infer_demo ./ResNet50_vd_quant ./test.jpeg"
+              << std::endl;
+    return -1;
+  }
+
+  std::string model_dir = argv[1];
+  std::string test_image = argv[2];
+  InitAndInfer(model_dir, test_image);
+  return 0;
+}
--- a/examples/vision/classification/paddleclas/rk1126/cpp/run_with_adb.sh
+++ b/examples/vision/classification/paddleclas/rk1126/cpp/run_with_adb.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+HOST_SPACE=${PWD}
+echo ${HOST_SPACE}
+WORK_SPACE=/data/local/tmp/test
+
+# The first parameter represents the demo name
+DEMO_NAME=image_classification_demo
+if [ -n "$1" ]; then
+  DEMO_NAME=$1
+fi
+
+# The second parameter represents the model name
+MODEL_NAME=mobilenet_v1_fp32_224
+if [ -n "$2" ]; then
+  MODEL_NAME=$2
+fi
+
+# The third parameter indicates the name of the image to be tested
+IMAGE_NAME=0001.jpg
+if [ -n "$3" ]; then
+  IMAGE_NAME=$3
+fi
+
+# The fourth parameter represents the ID of the device
+ADB_DEVICE_NAME=
+if [ -n "$4" ]; then
+  ADB_DEVICE_NAME="-s $4"
+fi
+
+# Set the environment variables required during the running process
+EXPORT_ENVIRONMENT_VARIABLES="export GLOG_v=5; export VIV_VX_ENABLE_GRAPH_TRANSFORM=-pcq:1; export VIV_VX_SET_PER_CHANNEL_ENTROPY=100; export TIMVX_BATCHNORM_FUSION_MAX_ALLOWED_QUANT_SCALE_DEVIATION=300000; export VSI_NN_LOG_LEVEL=5;"
+
+EXPORT_ENVIRONMENT_VARIABLES="${EXPORT_ENVIRONMENT_VARIABLES}export LD_LIBRARY_PATH=${WORK_SPACE}/lib:\$LD_LIBRARY_PATH;"
+
+# Please install adb, and DON'T run this in the docker.
+set -e
+adb $ADB_DEVICE_NAME shell "rm -rf $WORK_SPACE"
+adb $ADB_DEVICE_NAME shell "mkdir -p $WORK_SPACE"
+
+# Upload the demo, librarys, model and test images to the device
+adb $ADB_DEVICE_NAME push ${HOST_SPACE}/lib $WORK_SPACE
+adb $ADB_DEVICE_NAME push ${HOST_SPACE}/${DEMO_NAME} $WORK_SPACE
+adb $ADB_DEVICE_NAME push models $WORK_SPACE
+adb $ADB_DEVICE_NAME push images $WORK_SPACE
+
+# Execute the deployment demo
+adb $ADB_DEVICE_NAME shell "cd $WORK_SPACE; ${EXPORT_ENVIRONMENT_VARIABLES} chmod +x ./${DEMO_NAME}; ./${DEMO_NAME} ./models/${MODEL_NAME} ./images/$IMAGE_NAME"
--- a/fastdeploy/backends/lite/lite_backend.cc
+++ b/fastdeploy/backends/lite/lite_backend.cc
@@ -63,6 +63,24 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
      FDWARNING << "This device is not supported fp16, will skip fp16 option.";
    }
  }
+  if (!option_.nnadapter_subgraph_partition_config_path.empty()) {
+    std::vector<char> nnadapter_subgraph_partition_config_buffer;
+    if (ReadFile(option_.nnadapter_subgraph_partition_config_path, &nnadapter_subgraph_partition_config_buffer, false)) {
+      if (!nnadapter_subgraph_partition_config_buffer.empty()) {
+        std::string nnadapter_subgraph_partition_config_string(nnadapter_subgraph_partition_config_buffer.data(), nnadapter_subgraph_partition_config_buffer.size());
+        config_.set_nnadapter_subgraph_partition_config_buffer(nnadapter_subgraph_partition_config_string);
+      }
+    }
+  }
+  if(option_.enable_timvx){
+    config_.set_nnadapter_device_names({"verisilicon_timvx"});
+    valid_places.push_back(
+          paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)});
+    valid_places.push_back(
+        paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)});
+    valid_places.push_back(
+        paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)});
+  }
  valid_places.push_back(
      paddle::lite_api::Place{TARGET(kARM), PRECISION(kFloat)});
  config_.set_valid_places(valid_places);
@@ -75,6 +93,30 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
  }
 }

+bool LiteBackend::ReadFile(const std::string& filename,
+               std::vector<char>* contents,
+               const bool binary) {
+  FILE *fp = fopen(filename.c_str(), binary ? "rb" : "r");
+  if (!fp){
+     FDERROR << "Cannot open file " << filename << "." << std::endl;
+     return false;
+  } 
+  fseek(fp, 0, SEEK_END);
+  size_t size = ftell(fp);
+  fseek(fp, 0, SEEK_SET);
+  contents->clear();
+  contents->resize(size);
+  size_t offset = 0;
+  char *ptr = reinterpret_cast<char *>(&(contents->at(0)));
+  while (offset < size) {
+    size_t already_read = fread(ptr, 1, size - offset, fp);
+    offset += already_read;
+    ptr += already_read;
+  }
+  fclose(fp);
+  return true;
+}
+
 bool LiteBackend::InitFromPaddle(const std::string& model_file,
                                 const std::string& params_file,
                                 const LiteBackendOption& option) {
--- a/fastdeploy/backends/lite/lite_backend.h
+++ b/fastdeploy/backends/lite/lite_backend.h
@@ -43,6 +43,8 @@ struct LiteBackendOption {
  std::string optimized_model_dir = "";
  // TODO(qiuyanjun): support more options for lite backend.
  // Such as fp16, different device target (kARM/kXPU/kNPU/...)
+  std::string nnadapter_subgraph_partition_config_path = "";
+  bool enable_timvx = false;
 };

 // Convert data type from paddle lite to fastdeploy
@@ -77,5 +79,8 @@ class LiteBackend : public BaseBackend {
  std::map<std::string, int> inputs_order_;
  LiteBackendOption option_;
  bool supported_fp16_ = false;
+  bool ReadFile(const std::string& filename,
+               std::vector<char>* contents,
+               const bool binary = true);
 };
 }  // namespace fastdeploy
--- a/fastdeploy/core/fd_type.cc
+++ b/fastdeploy/core/fd_type.cc
@@ -59,6 +59,9 @@ std::string Str(const Device& d) {
    case Device::IPU:
      out = "Device::IPU";
      break;
+    case Device::TIMVX:
+      out = "Device::TIMVX";
+      break;
    default:
      out = "Device::UNKOWN";
  }
@@ -76,6 +79,9 @@ std::ostream& operator<<(std::ostream& out,const Device& d){
  case Device::RKNPU:
    out << "Device::RKNPU";
    break;
+  case Device::TIMVX:
+    out << "Device::TIMVX";
+    break;
  default:
    out << "Device::UNKOWN";
  }
--- a/fastdeploy/core/fd_type.h
+++ b/fastdeploy/core/fd_type.h
@@ -22,7 +22,7 @@

 namespace fastdeploy {

-enum FASTDEPLOY_DECL Device { CPU, GPU, RKNPU, IPU};
+enum FASTDEPLOY_DECL Device { CPU, GPU, RKNPU, IPU, TIMVX};

 FASTDEPLOY_DECL std::string Str(const Device& d);

--- a/fastdeploy/fastdeploy_model.cc
+++ b/fastdeploy/fastdeploy_model.cc
@@ -42,6 +42,7 @@ bool FastDeployModel::InitRuntime() {
    use_ipu = false;
 #endif
    bool use_rknpu = (runtime_option.device == Device::RKNPU);
+    bool use_timvx = (runtime_option.device == Device::TIMVX);

    // whether the model is supported by the setted backend
    bool is_supported = false;
@@ -59,6 +60,13 @@ bool FastDeployModel::InitRuntime() {
          break;
        }
      }
+    } else if (use_timvx) {
+      for (auto& item : valid_timvx_backends) {
+        if (item == runtime_option.backend) {
+          is_supported = true;
+          break;
+        }
+      }
    }else if(use_ipu) {
      for (auto& item : valid_ipu_backends) {
        if (item == runtime_option.backend) {
@@ -111,6 +119,8 @@ bool FastDeployModel::InitRuntime() {
 #endif
  } else if (runtime_option.device == Device::RKNPU) {
    return CreateRKNPUBackend();
+  } else if (runtime_option.device == Device::TIMVX) {
+    return CreateTimVXBackend();
  } else if (runtime_option.device == Device::IPU) {
 #ifdef WITH_IPU
    return CreateIpuBackend();
@@ -195,6 +205,29 @@ bool FastDeployModel::CreateRKNPUBackend() {
  return false;
 }

+bool FastDeployModel::CreateTimVXBackend() {
+  if (valid_timvx_backends.size() == 0) {
+    FDERROR << "There's no valid timvx backends for model: " << ModelName()
+            << std::endl;
+    return false;
+  }
+
+  for (size_t i = 0; i < valid_timvx_backends.size(); ++i) {
+    if (!IsBackendAvailable(valid_timvx_backends[i])) {
+      continue;
+    }
+    runtime_option.backend = valid_timvx_backends[i];
+    runtime_ = std::unique_ptr<Runtime>(new Runtime());
+    if (!runtime_->Init(runtime_option)) {
+      return false;
+    }
+    runtime_initialized_ = true;
+    return true;
+  }
+  FDERROR << "Found no valid backend for model: " << ModelName() << std::endl;
+  return false;
+}
+
 bool FastDeployModel::CreateIpuBackend() {
  if (valid_ipu_backends.size() == 0) {
    FDERROR << "There's no valid ipu backends for model: " << ModelName()
--- a/fastdeploy/fastdeploy_model.h
+++ b/fastdeploy/fastdeploy_model.h
@@ -42,6 +42,9 @@ class FASTDEPLOY_DECL FastDeployModel {
  /** Model's valid ipu backends. This member defined all the ipu backends have successfully tested for the model
   */
  std::vector<Backend> valid_ipu_backends = {Backend::PDINFER};
+  /** Model's valid timvx backends. This member defined all the timvx backends have successfully tested for the model
+   */
+  std::vector<Backend> valid_timvx_backends = {};


  /** Model's valid hardware backends. This member defined all the gpu backends have successfully tested for the model
@@ -117,6 +120,7 @@ class FASTDEPLOY_DECL FastDeployModel {
  virtual bool CreateGpuBackend();
  virtual bool CreateIpuBackend();
  virtual bool CreateRKNPUBackend();
+  virtual bool CreateTimVXBackend();

  bool initialized = false;
  std::vector<Backend> valid_external_backends_;
--- a/fastdeploy/runtime.cc
+++ b/fastdeploy/runtime.cc
@@ -230,6 +230,12 @@ void RuntimeOption::UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name,
  device = Device::RKNPU;
 }

+void RuntimeOption::UseTimVX() {
+  enable_timvx = true;
+  device = Device::TIMVX;
+  UseLiteBackend();
+}
+
 void RuntimeOption::SetExternalStream(void* external_stream) {
  external_stream_ = external_stream;
 }
@@ -348,6 +354,11 @@ void RuntimeOption::SetLiteOptimizedModelDir(
  lite_optimized_model_dir = optimized_model_dir;
 }

+void RuntimeOption::SetLiteSubgraphPartitionPath(
+    const std::string& nnadapter_subgraph_partition_config_path) {
+  lite_nnadapter_subgraph_partition_config_path = nnadapter_subgraph_partition_config_path;
+}
+
 void RuntimeOption::SetTrtInputShape(const std::string& input_name,
                                     const std::vector<int32_t>& min_shape,
                                     const std::vector<int32_t>& opt_shape,
@@ -516,8 +527,8 @@ bool Runtime::Init(const RuntimeOption& _option) {
    FDINFO << "Runtime initialized with Backend::OPENVINO in "
           << Str(option.device) << "." << std::endl;
  } else if (option.backend == Backend::LITE) {
-    FDASSERT(option.device == Device::CPU,
-             "Backend::LITE only supports Device::CPU");
+    FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX,
+             "Backend::LITE only supports Device::CPU/Device::TIMVX.");
    CreateLiteBackend();
    FDINFO << "Runtime initialized with Backend::LITE in " << Str(option.device)
           << "." << std::endl;
@@ -726,6 +737,8 @@ void Runtime::CreateLiteBackend() {
  lite_option.enable_fp16 = option.lite_enable_fp16;
  lite_option.power_mode = static_cast<int>(option.lite_power_mode);
  lite_option.optimized_model_dir = option.lite_optimized_model_dir;
+  lite_option.nnadapter_subgraph_partition_config_path = option.lite_nnadapter_subgraph_partition_config_path;
+  lite_option.enable_timvx = option.enable_timvx;
  FDASSERT(option.model_format == ModelFormat::PADDLE,
           "LiteBackend only support model format of ModelFormat::PADDLE");
  backend_ = utils::make_unique<LiteBackend>();
--- a/fastdeploy/runtime.h
+++ b/fastdeploy/runtime.h
@@ -99,6 +99,9 @@ struct FASTDEPLOY_DECL RuntimeOption {
                 fastdeploy::rknpu2::CoreMask rknpu2_core
                             = fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0);

+  /// Use TimVX to inference
+  void UseTimVX();
+
  void SetExternalStream(void* external_stream);

  /*
@@ -160,6 +163,12 @@ struct FASTDEPLOY_DECL RuntimeOption {
   */
  void SetLiteOptimizedModelDir(const std::string& optimized_model_dir);

+  /**
+   * @brief Set nnadapter subgraph partition path for Paddle Lite backend.
+   */
+  void SetLiteSubgraphPartitionPath(
+    const std::string& nnadapter_subgraph_partition_config_path);
+
  /**
   * @brief enable half precision while use paddle lite backend
   */
@@ -312,6 +321,8 @@ struct FASTDEPLOY_DECL RuntimeOption {
  bool lite_enable_fp16 = false;
  // optimized model dir for CxxConfig
  std::string lite_optimized_model_dir = "";
+  std::string lite_nnadapter_subgraph_partition_config_path = "";
+  bool enable_timvx = false;

  // ======Only for Trt Backend=======
  std::map<std::string, std::vector<int32_t>> trt_max_shape;
--- a/fastdeploy/vision/classification/ppcls/model.cc
+++ b/fastdeploy/vision/classification/ppcls/model.cc
@@ -26,6 +26,8 @@ PaddleClasModel::PaddleClasModel(const std::string& model_file,
  valid_cpu_backends = {Backend::ORT, Backend::OPENVINO, Backend::PDINFER,
                        Backend::LITE};
  valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
+  valid_timvx_backends = {Backend::LITE};
+  
  runtime_option = custom_option;
  runtime_option.model_format = model_format;
  runtime_option.model_file = model_file;
--- a/fastdeploy/vision/tracking/pptracking/model.cc
+++ b/fastdeploy/vision/tracking/pptracking/model.cc