【Hackthon_4th 180】Support HORIZON BPU Backend for FastDeploy (#1822)

* add horizon backend and PPYOLOE examples * 更改horizon头文件编码规范 * 更改horizon头文件编码规范 * 更改horizon头文件编码规范 * 增加horizon packages下载及自动安装 * Add UseHorizonNPUBackend Method * 删除编译FD SDK后多余的头文件,同时更改部分规范 * Update horizon.md * Update horizon.md --------- Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
2025-10-05 16:48:03 +08:00 · 2023-05-06 16:10:37 +08:00
parent 9164796645
commit df8dd3e3ac
29 changed files with 1693 additions and 7 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -63,6 +63,7 @@ option(ENABLE_OPENVINO_BACKEND "Whether to enable openvino backend." OFF)
 option(ENABLE_RKNPU2_BACKEND "Whether to enable RKNPU2 backend." OFF)
 option(ENABLE_SOPHGO_BACKEND "Whether to enable SOPHON backend." OFF)
 option(ENABLE_LITE_BACKEND "Whether to enable paddle lite backend." OFF)
 option(ENABLE_HORIZON_BACKEND "Whether to enable HORIZON backend." OFF)
 option(ENABLE_VISION "Whether to enable vision models usage." OFF)
 option(ENABLE_TEXT "Whether to enable text models usage." OFF)
 option(ENABLE_FLYCV "Whether to enable flycv to boost image preprocess." OFF)
@@ -165,6 +166,7 @@ file(GLOB_RECURSE DEPLOY_POROS_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fast
 file(GLOB_RECURSE DEPLOY_TRT_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/tensorrt/*.cc ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/tensorrt/*.cpp)
 file(GLOB_RECURSE DEPLOY_OPENVINO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/openvino/*.cc)
 file(GLOB_RECURSE DEPLOY_RKNPU2_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/rknpu2/*.cc)
 file(GLOB_RECURSE DEPLOY_HORIZON_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/horizon/*.cc)
 file(GLOB_RECURSE DEPLOY_SOPHGO_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/sophgo/*.cc)
 file(GLOB_RECURSE DEPLOY_LITE_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/runtime/backends/lite/*.cc)
 file(GLOB_RECURSE DEPLOY_ENCRYPTION_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/encryption/*.cc)
@@ -185,7 +187,7 @@ list(REMOVE_ITEM ALL_DEPLOY_SRCS ${DEPLOY_ORT_SRCS} ${DEPLOY_PADDLE_SRCS}
                                 ${DEPLOY_OPENVINO_SRCS} ${DEPLOY_LITE_SRCS}
                                 ${DEPLOY_VISION_SRCS} ${DEPLOY_TEXT_SRCS}
                                 ${DEPLOY_PIPELINE_SRCS} ${DEPLOY_RKNPU2_SRCS}
-                                 ${DEPLOY_SOPHGO_SRCS} ${DEPLOY_ENCRYPTION_SRCS})
+                                 ${DEPLOY_SOPHGO_SRCS} ${DEPLOY_ENCRYPTION_SRCS} ${DEPLOY_HORIZON_SRCS})
 set(DEPEND_LIBS "")
@@ -253,6 +255,13 @@ if(ENABLE_RKNPU2_BACKEND)
  list(APPEND DEPEND_LIBS ${RKNN_RT_LIB})
 endif()
 if(ENABLE_HORIZON_BACKEND)
  add_definitions(-DENABLE_HORIZON_BACKEND)
  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_HORIZON_SRCS})
  include(${PROJECT_SOURCE_DIR}/cmake/horizon.cmake)
  list(APPEND DEPEND_LIBS ${BPU_libs})
 endif()
 if(ENABLE_SOPHGO_BACKEND)
  add_definitions(-DENABLE_SOPHGO_BACKEND)
  list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_SOPHGO_SRCS})
--- a/FastDeploy.cmake.in
+++ b/FastDeploy.cmake.in
@@ -23,6 +23,7 @@ set(RKNN2_TARGET_SOC "@RKNN2_TARGET_SOC@")
 # Inference backend and FastDeploy Moudle
 set(ENABLE_ORT_BACKEND @ENABLE_ORT_BACKEND@)
 set(ENABLE_RKNPU2_BACKEND @ENABLE_RKNPU2_BACKEND@)
 set(ENABLE_HORIZON_BACKEND @ENABLE_HORIZON_BACKEND@)
 set(ENABLE_SOPHGO_BACKEND @ENABLE_SOPHGO_BACKEND@)
 set(ENABLE_LITE_BACKEND @ENABLE_LITE_BACKEND@)
 set(ENABLE_PADDLE_BACKEND @ENABLE_PADDLE_BACKEND@)
@@ -167,6 +168,21 @@ if(ENABLE_RKNPU2_BACKEND)
  list(APPEND FASTDEPLOY_LIBS ${RKNPU2_LIB})
 endif()
 if(ENABLE_HORIZON_BACKEND)
  set(DNN_PATH ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/dnn)
  set(APPSDK_PATH ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/appsdk/appuser/)
  set(DNN_LIB_PATH ${DNN_PATH}/lib)
  set(APPSDK_LIB_PATH ${APPSDK_PATH}/lib/hbbpu)
  set(BPU_libs dnn cnn_intf hbrt_bernoulli_aarch64)
  link_directories(${DNN_LIB_PATH}
                  ${APPSDK_PATH}/lib/hbbpu
                  ${APPSDK_PATH}/lib)
  list(APPEND FASTDEPLOY_LIBS ${BPU_libs})
 endif()
 if(ENABLE_LITE_BACKEND)
  set(LITE_DIR ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/${PADDLELITE_FILENAME})
  if(ANDROID)
--- a/cmake/horizon.cmake
+++ b/cmake/horizon.cmake
@@ -0,0 +1,24 @@
 # get Horizon_URL
 set(HORIZON_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
 set(HORIZON_VERSION "2.5.2")
 set(HORIZON_FILE "horizon_runtime-xj3-aarch64-${HORIZON_VERSION}.tgz")
 set(HORIZON_URL "${HORIZON_URL_BASE}${HORIZON_FILE}")
 # download_and_decompress
 download_and_decompress(${HORIZON_URL} ${CMAKE_CURRENT_BINARY_DIR}/${HORIZON_FILE} ${THIRD_PARTY_PATH}/install)
 # set path
 set(HORIZON_RUNTIME_PATH ${THIRD_PARTY_PATH}/install/)
 set(DNN_PATH ${HORIZON_RUNTIME_PATH}/dnn/)
 set(APPSDK_PATH ${HORIZON_RUNTIME_PATH}/appsdk/appuser/)
 set(DNN_LIB_PATH ${DNN_PATH}/lib)
 set(APPSDK_LIB_PATH ${APPSDK_PATH}/lib/hbbpu)
 set(BPU_libs dnn cnn_intf hbrt_bernoulli_aarch64)
 include_directories(${DNN_PATH}/include
                    ${APPSDK_PATH}/include)
 link_directories(${DNN_LIB_PATH}
                ${APPSDK_PATH}/lib/hbbpu
                ${APPSDK_PATH}/lib)
--- a/cmake/summary.cmake
+++ b/cmake/summary.cmake
@@ -33,6 +33,7 @@ function(fastdeploy_summary)
  message(STATUS "  FastDeploy version        : ${FASTDEPLOY_VERSION}")
  message(STATUS "  ENABLE_ORT_BACKEND        : ${ENABLE_ORT_BACKEND}")
  message(STATUS "  ENABLE_RKNPU2_BACKEND     : ${ENABLE_RKNPU2_BACKEND}")
  message(STATUS "  ENABLE_HORIZON_BACKEND    : ${ENABLE_HORIZON_BACKEND}")
  message(STATUS "  ENABLE_SOPHGO_BACKEND     : ${ENABLE_SOPHGO_BACKEND}")
  message(STATUS "  ENABLE_PADDLE_BACKEND     : ${ENABLE_PADDLE_BACKEND}")
  message(STATUS "  ENABLE_LITE_BACKEND       : ${ENABLE_LITE_BACKEND}")
--- a/docs/cn/build_and_install/horizon.md
+++ b/docs/cn/build_and_install/horizon.md
@@ -0,0 +1,76 @@
 [English](../../en/build_and_install/horizon.md) | 简体中文
 # FastDeploy Horizon 导航文档
 Horizon指的是地平线推出的旭日X3系列芯片的BPU。
 目前FastDeploy已经初步支持使用Horizon来部署模型。
 如果您在使用的过程中出现问题，请附带上您的运行环境，在Issues中反馈。
 ## FastDeploy Horizon 环境安装简介
 如果您想在FastDeploy中使用Horizon推理引擎，你需要配置以下几个环境。
 | 工具名          | 是否必须 | 安装设备  | 用途                              |  
 |--------------|------|-------|---------------------------------|
 | Paddle2ONNX  | 必装   | PC    | 用于转换PaddleInference模型到ONNX模型    |  
 | 地平线XJ3芯片工具链镜像 | 必装   | PC    | 用于转换ONNX模型到地平线模型               |  
 | 地平线 XJ3 OpenExplorer       | 必装   | PC | 地平线模型转换的关键头文件和动态库 |
 ## 安装模型转换环境
 地平线提供了一套完整的模型转换环境（XJ3芯片工具链镜像），FastDeploy采用的镜像版本为[2.5.2](ftp://vrftp.horizon.ai/Open_Explorer_gcc_9.3.0/2.5.2/docker_openexplorer_ubuntu_20_xj3_gpu_v2.5.2_py38.tar.gz)，你可以通过地平线开发者平台获取。
 ## 安装必备的依赖软件包
 地平线同样提供了一整套工具包(地平线 XJ3 OpenExplorer)，FastDeploy采用的开发包版本为[2.5.2](ftp://vrftp.horizon.ai/Open_Explorer_gcc_9.3.0/2.5.2/horizon_xj3_openexplorer_v2.5.2_py38_20230331.tar.gz),你可以通过地平线开发者平台获取。
 由于板端CPU性能较弱，所以推荐在PC机上进行交叉编译。以下教程在地平线提供的docker环境下完成。
 ### 启动docker环境
 将地平线XJ3芯片工具链镜像下载到本地之后，执行如下命令，将镜像包导入docker环境：
 ```bash
 docker load < docker_openexplorer_ubuntu_20_xj3_gpu_v2.5.2_py38.tar.gz
 ```
 将依赖的软件包下载至本地之后，解压：
 ```bash
 tar -xvf horizon_xj3_openexplorer_v2.5.2_py38_20230331.tar.gz
 ```
 解压完成之后，cd至改目录：
 ```bash
 cd horizon_xj3_open_explorer_v2.5.2-py38_20230331/
 ```
 根目录下有运行docker的脚本，运行以下命令：
 ```bash
 sh run_docker.sh /home gpu
 ```
 第一个目录为要挂载到容器上的目录，后一个参数为该docker启用gpu进行加速。
 至此，所需环境准备完毕。
 ## 安装FastDeploy C++ SDK
 下载交叉编译工具，[gcc_linaro_6.5.0_2018.12_x86_64_aarch64_linux_gnu](https://bj.bcebos.com/fastdeploy/third_libs/gcc_linaro_6.5.0_2018.12_x86_64_aarch64_linux_gnu.tar.xz)，建议解压后放到`/opt`目录下。
 ```bash
 git clone https://github.com/PaddlePaddle/FastDeploy.git
 cd FastDeploy
 # 如果您使用的是develop分支输入以下命令
 git checkout develop
 mkdir build && cd build
 cmake ..  -DCMAKE_C_COMPILER=/opt/gcc_linaro_6.5.0_2018.12_x86_64_aarch64_linux_gnu/gcc-linaro-6.5.0-2018.12-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-gcc \
          -DCMAKE_CXX_COMPILER=/opt/gcc_linaro_6.5.0_2018.12_x86_64_aarch64_linux_gnu/gcc-linaro-6.5.0-2018.12-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-g++ \
          -DCMAKE_TOOLCHAIN_FILE=./../cmake/toolchain.cmake \
          -DTARGET_ABI=arm64 \
          -WITH_TIMVX=ON \
          -DENABLE_HORIZON_BACKEND=ON \
          -DENABLE_VISION=ON \
          -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.0 \
          -Wno-dev ..
 make -j16
 make install
 ```
--- a/docs/cn/faq/horizon/export.md
+++ b/docs/cn/faq/horizon/export.md
@@ -0,0 +1,179 @@
 [English](../../../en/faq/horizon/export.md) | 中文
 # 导出模型指南
 ## 简介
 地平线的模型转换以及量化工具均封装在提供的docker镜像中，在进行模型转换前请根据[Horizon导航文档](../../build_and_install/horizon.md)将环境安装成功。
 ## 转换流程
 由于地平线不支持直接从paddle模型到horizon模型的转换，因此，首先要将paddle的模型转换为ONNX模型，地平线目前主要支持的opset版本是opset10和opset11，ir_version <= 7，转换过程需特别注意，具体可参考[地平线提供的官方文档](https://developer.horizon.ai/api/v1/fileData/doc/ddk_doc/navigation/ai_toolchain/docs_cn/horizon_ai_toolchain_user_guide/model_conversion.html#fp-model-preparation)。
 paddle模型转ONNX模型可以运行以下命令
 ```bash
 ## 转为ONNX
 paddle2onnx --model_dir model/ \
            --model_filename inference.pdmodel \
            --params_filename inference.pdiparams \
            --save_file model.onnx \
            --enable_dev_version True \
            --opset_version 11
 ```
 更改ir_version，可参考以下代码
 ```python
 import onnx
 model = onnx.load("model.onnx")
 model.ir_version = 7
 onnx.save(model, "model.onnx")
 ```
 转换为ONNX模型之后，开始进行地平线模型的转换，可参考官方文档进行转换，此处给出MobileNetv2的模型转换示例。
 进入docker实例，cd至如下目录:
 ```bash
 cd ddk/samples/ai_toolchain/horizon_model_convert_sample/03_classification/04_mobilenet_onnx/mapper/
 ```
 该目录下有`01_check.sh`，`02_preprocess.sh`，`03_build.sh`，在模型转换阶段主要用这三个脚本就可完成，下面详细介绍使用这三个脚本的注意事项。
 `01_check.sh`，对模型以及运行环境进行检查，只需要修改`caffe_model`为自己的ONNX模型路径就可以完成。
 ```bash
 set -ex
 cd $(dirname $0) || exit
 model_type="onnx"
 caffe_model="../../../01_common/model_zoo/mapper/classification/mobilenet_onnx/mobilenetv2.onnx"
 march="bernoulli2"
 hb_mapper checker --model-type ${model_type} \
                  --model ${caffe_model} \
                  --march ${march}
 ```
 `02_preprocess.sh`，准备量化所需的数据格式，FastDeploy选择以下的配置。
 ```bash
 python3 ../../../data_preprocess.py \
  --src_dir ../../../01_common/calibration_data/imagenet \
  --dst_dir ./calibration_data_rgb \
  --pic_ext .rgb \
  --read_mode opencv \
  --saved_data_type uint8
 ```
 `03_build.sh`，将ONNX模型转换为Horizon可运行的模型，其中转换需要进行参数的配置，包括输入数据格式等。
 FastDeploy对模型路径的配置如下：
 ```yaml
 model_parameters:
  # the model file of floating-point ONNX neural network data
  onnx_model: '../../../01_common/model_zoo/mapper/classification/mobilenet_onnx/mobilenetv2.onnx'
  # the applicable BPU architecture
  march: "bernoulli2"
  # specifies whether or not to dump the intermediate results of all layers in conversion
  # if set to True, then the intermediate results of all layers shall be dumped
  layer_out_dump: False
  # the directory in which model conversion results are stored
  working_dir: 'model_output_rgb'
  # model conversion generated name prefix of those model files used for dev board execution
  output_model_file_prefix: 'mobilenetv2_224x224_rgb'
 ```
 对模型输入格式的配置如下：
 ```yaml
 input_parameters:
  # (Optional) node name of model input,
  # it shall be the same as the name of model file, otherwise an error will be reported,
  # the node name of model file will be used when left blank
  input_name: ""
  # the data formats to be passed into neural network when actually performing neural network
  # available options: nv12/rgb/bgr/yuv444/gray/featuremap,
  input_type_rt: 'rgb'
  # the data layout formats to be passed into neural network when actually performing neural network, available options: NHWC/NCHW
  # If input_type_rt is configured as nv12, then this parameter does not need to be configured
  input_layout_rt: 'NHWC'
  # the data formats in network training
  # available options: rgb/bgr/gray/featuremap/yuv444
  input_type_train: 'rgb'
  # the data layout in network training, available options: NHWC/NCHW
  input_layout_train: 'NCHW'
  # (Optional)the input size of model network, seperated by 'x'
  # note that the network input size of model file will be used if left blank
  # otherwise it will overwrite the input size of model file
  input_shape: ''
  # the data batch_size to be passed into neural network when actually performing neural network, default value: 1
  #input_batch: 1
  # preprocessing methods of network input, available options:
  # 'no_preprocess' indicates that no preprocess will be made 
  # 'data_mean' indicates that to minus the channel mean, i.e. mean_value
  # 'data_scale' indicates that image pixels to multiply data_scale ratio
  # 'data_mean_and_scale' indicates that to multiply scale ratio after channel mean is minused
  norm_type: 'data_mean_and_scale'
  # the mean value minused by image
  # note that values must be seperated by space if channel mean value is used
  mean_value: 123.675 116.28 103.53
  # scale value of image preprocess
  # note that values must be seperated by space if channel scale value is used
  scale_value: 0.01712 0.0175 0.01743
 ```
 对模型量化参数的配置如下：
 ```yaml
 calibration_parameters:
  # the directory where reference images of model quantization are stored
  # image formats include JPEG, BMP etc.
  # should be classic application scenarios, usually 20~100 images are picked out from test datasets
  # in addition, note that input images should cover typical scenarios
  # and try to avoid those overexposed, oversaturated, vague, 
  # pure blank or pure white images
  # use ';' to seperate when there are multiple input nodes
  cal_data_dir: './calibration_data_rgb'
  # calibration data binary file save type, available options: float32, uint8
 #   cal_data_type: 'float32'
  # In case the size of input image file is different from that of in model training
  # and that preprocess_on is set to True,
  # shall the default preprocess method(skimage resize) be used
  # i.e., to resize or crop input image into specified size
  # otherwise user must keep image size as that of in training in advance
  # preprocess_on: False
  # The algorithm type of model quantization, support default, mix, kl, max, load, usually use default can meet the requirements.
  # If it does not meet the expectation, you can try to change it to mix first. If there is still no expectation, try kl or max again.
  # When using QAT to export the model, this parameter should be set to load.
  # For more details of the parameters, please refer to the parameter details in PTQ Principle And Steps section of the user manual.
  calibration_type: 'max'
  # this is the parameter of the 'max' calibration method and it is used for adjusting the intercept point of the 'max' calibration.
  # this parameter will only become valid when the calibration_type is specified as 'max'.
  # RANGE: 0.0 - 1.0. Typical options includes: 0.99999/0.99995/0.99990/0.99950/0.99900.
  max_percentile: 0.9999
 ```
 其余参数选择默认值，运行`03_build.sh`,
 ```bash
 config_file="./mobilenetv2_config.yaml"
 model_type="onnx"
 # build model
 hb_mapper makertbin --config ${config_file}  \
                    --model-type  ${model_type}
 ```
 至此，在同路径下`model_output_rgb`会生成转换完成的模型文件（后缀为.bin）
--- a/docs/en/build_and_install/horizon.md
+++ b/docs/en/build_and_install/horizon.md
@@ -0,0 +1,82 @@
 English | [简体中文](../../cn/build_and_install/horizon.md)
 # How to Build Horizon Deployment Environment
 Horizon refers to the BPU of the Sunrise X3 series chips launched by Horizon. Currently, FastDeploy has initially supported deploying models using Horizon. If you encounter any problems during use, please provide your operating environment and feedback in the Issues section.
 ## Introduction
 If you want to use the Horizon inference engine in FastDeploy, you need to configure the following environments:
 | Tools                | Yes/No                   | Platform | Description                                         |
 |:------------------|:---------------------|:-------|---------------------------------|
 | Paddle2ONNX  | Yes   | PC    | Used to convert PaddleInference models to ONNX models    |  
 | Horizon Environments Docker | Yes   | PC    | Used to convert ONNX models to Horizon models               |  
 | Horizon XJ3 OpenExplorer       | Yes   | PC | header files and dynamic libraries |
 ## Model Conversion Environment
 Horizon provides a complete model transformation environment (XJ3 chip toolchain image), and FastDeploy adopts the image version of
 [2.5.2](ftp://vrftp.horizon.ai/Open_Explorer_gcc_9.3.0/2.5.2/docker_openexplorer_ubuntu_20_xj3_gpu_v2.5.2_py38.tar.gz), You can obtain it through the Horizon developer platform.
 ## Software Package
 Horizon also provides a complete toolkit (Horizon XJ3 OpenExplorer)
 , The development package version used by FastDeploy is
 [2.5.2](ftp://vrftp.horizon.ai/Open_Explorer_gcc_9.3.0/2.5.2/horizon_xj3_openexplorer_v2.5.2_py38_20230331.tar.gz),  You can obtain it through the Horizon developer platform.
 Due to the weak performance of the board CPU, it is recommended to perform cross compilation on a PC. The following tutorial is completed in the Docker environment provided by Horizon.
 ### Start Docker Environment
 After downloading the Horizon XJ3 chip toolchain image locally, execute the following command to import the image package into the Docker environment:
 ```bash
 docker load < docker_openexplorer_ubuntu_20_xj3_gpu_v2.5.2_py38.tar.gz
 ```
 After downloading the dependent software packages to the local machine, unzip them:
 ```bash
 tar -xvf horizon_xj3_openexplorer_v2.5.2_py38_20230331.tar.gz
 ```
 After the unzipping is complete, cd to that directory:
 ```bash
 cd horizon_xj3_open_explorer_v2.5.2-py38_20230331/
 ```
 Under the root directory, there is a script to run Docker. Run the following command:
 ```bash
 sh run_docker.sh /home gpu
 ```
 The first directory is the directory to be mounted on the container, and the latter parameter is to enable GPU acceleration for the Docker.
 At this point, the preparation of the required environment for compilation is complete.
 ## How to Build and Install C++ SDK
 Download the cross-compilation tool, [gcc_linaro_6.5.0_2018.12_x86_64_aarch64_linux_gnu](https://bj.bcebos.com/fastdeploy/third_libs/gcc_linaro_6.5.0_2018.12_x86_64_aarch64_linux_gnu.tar.xz), and it is recommended to extract it to the `/opt` directory.
 ```bash
 git clone https://github.com/PaddlePaddle/FastDeploy.git
 cd FastDeploy
 git checkout develop
 mkdir build && cd build
 cmake ..  -DCMAKE_C_COMPILER=/opt/gcc_linaro_6.5.0_2018.12_x86_64_aarch64_linux_gnu/gcc-linaro-6.5.0-2018.12-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-gcc \
          -DCMAKE_CXX_COMPILER=/opt/gcc_linaro_6.5.0_2018.12_x86_64_aarch64_linux_gnu/gcc-linaro-6.5.0-2018.12-x86_64_aarch64-linux-gnu/bin/aarch64-linux-gnu-g++ \
          -DCMAKE_TOOLCHAIN_FILE=./../cmake/toolchain.cmake \
          -DTARGET_ABI=arm64 \
          -WITH_TIMVX=ON \
          -DENABLE_HORIZON_BACKEND=ON \
          -DENABLE_VISION=ON \
          -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.0 \
          -Wno-dev ..
 make -j16
 make install
 ```
--- a/docs/en/faq/horizon/export.md
+++ b/docs/en/faq/horizon/export.md
@@ -0,0 +1,183 @@
 English | [中文](../../../cn/faq/horizon/export.md)
 # Export Model
 ## Introduction
 The Horizon model conversion and quantization tools are both encapsulated in the provided Docker image. Before performing model conversion, please ensure that the environment has been installed successfully according to [How to Build Horizon Deployment Environment](../../build_and_install/horizon.md).
 ## Model Conversion
 Due to the lack of direct support for converting Paddle models to Horizon models, the first step is to convert the Paddle model to an ONNX model. The main opset versions supported by Horizon currently are opset10 and opset11, and ir_version <= 7. The conversion process requires special attention, and [the official documentation provided by Horizon](https://developer.horizon.ai/api/v1/fileData/doc/ddk_doc/navigation/ai_toolchain/docs_cn/horizon_ai_toolchain_user_guide/model_conversion.html#fp-model-preparation) can be referred to for more details.。
 To convert a Paddle model to an ONNX model, you can run the following command:
 ```bash
 paddle2onnx --model_dir model/ \
            --model_filename inference.pdmodel \
            --params_filename inference.pdiparams \
            --save_file model.onnx \
            --enable_dev_version True \
            --opset_version 11
 ```
 To change the ir_version, you can refer to the following code snippet:
 ```python
 import onnx
 model = onnx.load("model.onnx")
 model.ir_version = 7
 onnx.save(model, "model.onnx")
 ```
 Once you have converted the model to ONNX format, you can begin the process of converting it to a Horizon model. The official documentation provides detailed instructions on how to do this. Here is an example of converting a MobileNetv2 model.
 Assuming you have already entered the Docker instance, you can navigate to the following directory by running the command:
 ```bash
 cd ddk/samples/ai_toolchain/horizon_model_convert_sample/03_classification/04_mobilenet_onnx/mapper/
 ```
 The directory contains three scripts that are mainly used for model conversion: `01_check.sh`, `02_preprocess.sh`, and `03_build.sh`. Below are some important points to keep in mind when using these scripts:
 `01_check.sh` script is used to check the model and the environment before starting the conversion process. You only need to modify the caffe_model variable to point to the path of your ONNX model.
 ```bash
 set -ex
 cd $(dirname $0) || exit
 model_type="onnx"
 caffe_model="../../../01_common/model_zoo/mapper/classification/mobilenet_onnx/mobilenetv2.onnx"
 march="bernoulli2"
 hb_mapper checker --model-type ${model_type} \
                  --model ${caffe_model} \
                  --march ${march}
 ```
 `02_preprocess.sh`, Preparing data for quantization requires selecting a configuration. For FastDeploy, the following configuration is selected.
 ```bash
 python3 ../../../data_preprocess.py \
  --src_dir ../../../01_common/calibration_data/imagenet \
  --dst_dir ./calibration_data_rgb \
  --pic_ext .rgb \
  --read_mode opencv \
  --saved_data_type uint8
 ```
 To convert the ONNX model to a Horizon runnable model, you can use the `03_build.sh` script. This script requires several input parameters to configure the conversion process, including the input data format, batch size, and input and output node names
 For configuring the model path in FastDeploy, you need to specify the following parameters:
 ```yaml
 model_parameters:
  # the model file of floating-point ONNX neural network data
  onnx_model: '../../../01_common/model_zoo/mapper/classification/mobilenet_onnx/mobilenetv2.onnx'
  # the applicable BPU architecture
  march: "bernoulli2"
  # specifies whether or not to dump the intermediate results of all layers in conversion
  # if set to True, then the intermediate results of all layers shall be dumped
  layer_out_dump: False
  # the directory in which model conversion results are stored
  working_dir: 'model_output_rgb'
  # model conversion generated name prefix of those model files used for dev board execution
  output_model_file_prefix: 'mobilenetv2_224x224_rgb'
 ```
 The configuration for the input format of the model is as follows:
 ```yaml
 input_parameters:
  # (Optional) node name of model input,
  # it shall be the same as the name of model file, otherwise an error will be reported,
  # the node name of model file will be used when left blank
  input_name: ""
  # the data formats to be passed into neural network when actually performing neural network
  # available options: nv12/rgb/bgr/yuv444/gray/featuremap,
  input_type_rt: 'rgb'
  # the data layout formats to be passed into neural network when actually performing neural network, available options: NHWC/NCHW
  # If input_type_rt is configured as nv12, then this parameter does not need to be configured
  input_layout_rt: 'NHWC'
  # the data formats in network training
  # available options: rgb/bgr/gray/featuremap/yuv444
  input_type_train: 'rgb'
  # the data layout in network training, available options: NHWC/NCHW
  input_layout_train: 'NCHW'
  # (Optional)the input size of model network, seperated by 'x'
  # note that the network input size of model file will be used if left blank
  # otherwise it will overwrite the input size of model file
  input_shape: ''
  # the data batch_size to be passed into neural network when actually performing neural network, default value: 1
  #input_batch: 1
  # preprocessing methods of network input, available options:
  # 'no_preprocess' indicates that no preprocess will be made 
  # 'data_mean' indicates that to minus the channel mean, i.e. mean_value
  # 'data_scale' indicates that image pixels to multiply data_scale ratio
  # 'data_mean_and_scale' indicates that to multiply scale ratio after channel mean is minused
  norm_type: 'data_mean_and_scale'
  # the mean value minused by image
  # note that values must be seperated by space if channel mean value is used
  mean_value: 123.675 116.28 103.53
  # scale value of image preprocess
  # note that values must be seperated by space if channel scale value is used
  scale_value: 0.01712 0.0175 0.01743
 ```
 The configuration for the quantization parameters of the model is as follows:
 ```yaml
 calibration_parameters:
  # the directory where reference images of model quantization are stored
  # image formats include JPEG, BMP etc.
  # should be classic application scenarios, usually 20~100 images are picked out from test datasets
  # in addition, note that input images should cover typical scenarios
  # and try to avoid those overexposed, oversaturated, vague, 
  # pure blank or pure white images
  # use ';' to seperate when there are multiple input nodes
  cal_data_dir: './calibration_data_rgb'
  # calibration data binary file save type, available options: float32, uint8
 #   cal_data_type: 'float32'
  # In case the size of input image file is different from that of in model training
  # and that preprocess_on is set to True,
  # shall the default preprocess method(skimage resize) be used
  # i.e., to resize or crop input image into specified size
  # otherwise user must keep image size as that of in training in advance
  # preprocess_on: False
  # The algorithm type of model quantization, support default, mix, kl, max, load, usually use default can meet the requirements.
  # If it does not meet the expectation, you can try to change it to mix first. If there is still no expectation, try kl or max again.
  # When using QAT to export the model, this parameter should be set to load.
  # For more details of the parameters, please refer to the parameter details in PTQ Principle And Steps section of the user manual.
  calibration_type: 'max'
  # this is the parameter of the 'max' calibration method and it is used for adjusting the intercept point of the 'max' calibration.
  # this parameter will only become valid when the calibration_type is specified as 'max'.
  # RANGE: 0.0 - 1.0. Typical options includes: 0.99999/0.99995/0.99990/0.99950/0.99900.
  max_percentile: 0.9999
 ```
 The remaining parameters are set to their default values, and run `03_build.sh`.
 ```bash
 config_file="./mobilenetv2_config.yaml"
 model_type="onnx"
 # build model
 hb_mapper makertbin --config ${config_file}  \
                    --model-type  ${model_type}
 ```
 By now, the converted model file (with the suffix .bin) will be generated in `model_output_rgb` in the same directory.
--- a/examples/vision/classification/paddleclas/horizon/README.md
+++ b/examples/vision/classification/paddleclas/horizon/README.md
@@ -0,0 +1,14 @@
 # PaddleClas 图像分类模地平线NPU部署方案-FastDeploy
 ## 1. 说明  
 本示例基于地平线X3派来介绍如何使用FastDeploy部署PaddleClas量化模型，支持如下芯片的部署：  
 - 地平线旭日X3M BPU
 ## 2. 使用预导出的模型列表  
 FastDeploy提供预先量化好的模型进行部署，欢迎用户参考[FastDeploy 一键模型自动化压缩工具](https://github.com/PaddlePaddle/FastDeploy/tree/develop/tools/common_tools/auto_compression) 来实现模型量化, 并完成部署。
 ### 3. 模型转换
 模型转换可以参考，[模型转换文档](../../../../../docs/cn/faq/horizon/export.md)
--- a/examples/vision/classification/paddleclas/horizon/cpp/CMakeLists.txt
+++ b/examples/vision/classification/paddleclas/horizon/cpp/CMakeLists.txt
@@ -0,0 +1,15 @@
 CMAKE_MINIMUM_REQUIRED(VERSION 3.10)
 project(infer_demo)
 set(CMAKE_CXX_STANDARD 14)
 # 指定下载解压后的fastdeploy库路径
 option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
 include(${FASTDEPLOY_INSTALL_DIR}/FastDeployConfig.cmake)
 include_directories(${FastDeploy_INCLUDE_DIRS})
 add_executable(infer_demo infer.cc)
 target_link_libraries(infer_demo
        ${FastDeploy_LIBS}
        )
--- a/examples/vision/classification/paddleclas/horizon/cpp/infer.cc
+++ b/examples/vision/classification/paddleclas/horizon/cpp/infer.cc
@@ -0,0 +1,60 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fastdeploy/vision.h"
 void HorizonInfer(const std::string &model_dir, const std::string &image_file) {
  auto model_file = model_dir + "/resnet_50_224x224_rgb.bin";
  auto params_file = "";
  auto config_file = model_dir + "/inference_cls.yaml";
  auto option = fastdeploy::RuntimeOption();
  option.UseHorizon();
  option.UseHorizonNPUBackend();
  auto format = fastdeploy::ModelFormat::HORIZON;
  auto model = fastdeploy::vision::classification::PaddleClasModel(
      model_file, params_file, config_file, option, format);
  if (!model.Initialized()) {
    std::cerr << "model Failed to initialize." << std::endl;
    return;
  }
  model.GetPreprocessor().DisablePermute();
  model.GetPreprocessor().DisableNormalize();
  auto im = cv::imread(image_file);
  fastdeploy::vision::ClassifyResult res;
  fastdeploy::TimeCounter tc;
  tc.Start();
  if (!model.Predict(im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }
  tc.End();
  tc.PrintInfo("PPClas in Horizon");
 }
 int main(int argc, char *argv[]) {
  if (argc < 3) {
    std::cout << "Usage: HorizonInfer path/to/model_dir path/to/image, "
                 "e.g ./infer_demo ./ppclas_model_dir "
                 "./images/ILSVRC2012_val_00000010.jpeg"
              << std::endl;
    return -1;
  }
  HorizonInfer(argv[1], argv[2]);
  return 0;
 }
--- a/examples/vision/detection/paddledetection/horizon/README.md
+++ b/examples/vision/detection/paddledetection/horizon/README.md
@@ -0,0 +1,197 @@
 English | [简体中文](./README_CN.md)
 # PaddleDetection Horizon Deployment Example
 ## Supported Models
 The following PaddleDetection models have been tested on Horizon:
 - PPYOLOE(float32)
 ## Preparing PaddleDetection Deployment Models and Conversion Models
 Before deploying PaddleDetection models on Horizon, you need to convert Paddle models to Horizon models. The specific steps are as follows:
 * Convert the Paddle dynamic graph model to the ONNX model. Please refer to the Exporting Model in PaddleDetection and set export.nms=True during conversion.
 * Convert the ONNX model to the Horizon model. Please refer to the [conversion document](../../../../../docs/en/faq/horizon/export.md) for conversion.
 ## Model Conversion Example
 ### Notes
 When deploying PPDetection models on Horizon, the following points should be noted:
 * The model export needs to include Decode.
 * Since Horizon does not support NMS, the output nodes must be trimmed before NMS.
 * Due to the limitations of the Div operator on Horizon, the output nodes of the model need to be trimmed before the Div operator.
 ### Converting Paddle models to ONNX models
 Since the model conversion tool provided by Horizon currently does not support exporting Paddle models directly as Horizon models, it is necessary to first export the Paddle model as an ONNX model, and then convert the ONNX model to a Horizon model.
 ```bash
 # Download the Paddle static graph model and extract it.
 wget https://bj.bcebos.com/fastdeploy/models/ppyoloe_plus_crn_m_80e_coco.tgz
 tar xvf ppyoloe_plus_crn_m_80e_coco.tgz
 # Convert the static graph to ONNX format. Note that the save_file argument should match the name of the compressed model file.
 paddle2onnx --model_dir ppyoloe_plus_crn_m_80e_coco \
            --model_filename model.pdmodel \
            --params_filename model.pdiparams \
            --save_file ppyoloe_plus_crn_m_80e_coco/ppyoloe_plus_crn_m_80e_coco.onnx \
            --enable_dev_version True \
            --opset_version 11
 # fuse shape
 python -m paddle2onnx.optimize --input_model ppyoloe_plus_crn_m_80e_coco/ppyoloe_plus_crn_m_80e_coco.onnx \
                                --output_model ppyoloe_plus_crn_m_80e_coco/ppyoloe_plus_crn_m_80e_coco.onnx \
                                --input_shape_dict "{'image':[1,3,640,640], 'scale_factor':[1,2]}"
 ```
 Since the exported ONNX IR Version may not match Horizon, the ONNX IR Version needs to be manually changed. You can refer to the following Python code:
 ```python
 import onnx
 model = onnx.load("ppyoloe_plus_crn_m_80e_coco/ppyoloe_plus_crn_m_80e_coco.onnx")
 model.ir_version = 7
 onnx.save(model, "ppyoloe_plus_crn_m_80e_coco/ppyoloe_plus_crn_m_80e_coco.onnx")
 ```
 ### Model pruning
 Due to the differences in Paddle2ONNX versions, the output node names for converted models may vary. To find the correct output node names for the model, you can use Netron to visualize the model and locate the NonMaxSuppression node marked in blue below. The name of the node in the red box is the target name.
 For example, after visualizing the model using Netron, you may get the following image:
 ![](ppyoloe-onnx.png)
 After finding the NonMaxSuppression node, you can see that the names of the two nodes marked in red are "p2o.Mul.290" and "p2o.Concat.29". Therefore, you need to cut the output up to these two nodes. You can refer to the following Python code to crop the output:
 ```python 
 import argparse
 import sys
 def parse_arguments():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--model',
        required=True,
        help='Path of directory saved the input model.')
    parser.add_argument(
        '--output_names',
        required=True,
        nargs='+',
        help='The outputs of pruned model.')
    parser.add_argument(
        '--save_file', required=True, help='Path to save the new onnx model.')
    return parser.parse_args()
 if __name__ == '__main__':
    args = parse_arguments()
    import onnx
    model = onnx.load(args.model)
    output_tensor_names = set()
    for node in model.graph.node:
        for out in node.output:
            output_tensor_names.add(out)
    for output_name in args.output_names:
        if output_name not in output_tensor_names:
            print(
                "[ERROR] Cannot find output tensor name '{}' in onnx model graph.".
                format(output_name))
            sys.exit(-1)
    if len(set(args.output_names)) < len(args.output_names):
        print(
            "[ERROR] There's dumplicate name in --output_names, which is not allowed."
        )
        sys.exit(-1)
    output_node_indices = set()
    output_to_node = dict()
    for i, node in enumerate(model.graph.node):
        for out in node.output:
            output_to_node[out] = i
            if out in args.output_names:
                output_node_indices.add(i)
    # from outputs find all the ancestors
    import copy
    reserved_node_indices = copy.deepcopy(output_node_indices)
    reserved_inputs = set()
    new_output_node_indices = copy.deepcopy(output_node_indices)
    while True and len(new_output_node_indices) > 0:
        output_node_indices = copy.deepcopy(new_output_node_indices)
        new_output_node_indices = set()
        for out_node_idx in output_node_indices:
            for ipt in model.graph.node[out_node_idx].input:
                if ipt in output_to_node:
                    reserved_node_indices.add(output_to_node[ipt])
                    new_output_node_indices.add(output_to_node[ipt])
                else:
                    reserved_inputs.add(ipt)
    num_inputs = len(model.graph.input)
    num_outputs = len(model.graph.output)
    num_nodes = len(model.graph.node)
    print(len(reserved_node_indices), "xxxx")
    for idx in range(num_nodes - 1, -1, -1):
        if idx not in reserved_node_indices:
            del model.graph.node[idx]
    for idx in range(num_inputs - 1, -1, -1):
        if model.graph.input[idx].name not in reserved_inputs:
            del model.graph.input[idx]
    for out in args.output_names:
        model.graph.output.extend([onnx.ValueInfoProto(name=out)])
    for i in range(num_outputs):
        del model.graph.output[0]
    from onnx_infer_shape import SymbolicShapeInference
    model = SymbolicShapeInference.infer_shapes(model, 2**31 - 1, True, False,
                                                1)
    onnx.checker.check_model(model)
    onnx.save(model, args.save_file)
    print("[Finished] The new model saved in {}.".format(args.save_file))
    print("[DEBUG INFO] The inputs of new model: {}".format(
        [x.name for x in model.graph.input]))
    print("[DEBUG INFO] The outputs of new model: {}".format(
        [x.name for x in model.graph.output]))
 ```
 If you name the script mentioned above as prune_onnx_model.py, you can run the following command to prune the model:
 ```bash
 python prune_onnx_model.py --model ppyoloe_plus_crn_m_80e_coco/ppyoloe_plus_crn_m_80e_coco.onnx \
            --output_names p2o.Mul.290 p2o.Concat.29 \
            --save_file ppyoloe_plus_crn_m_80e_coco/ppyoloe_plus_crn_m_80e_coco_cut.onnx
 ```
 Make sure to replace path/to/onnx/model.onnx and path/to/cropped/onnx/model.onnx with the actual paths to your input and output files, respectively.
 Great, so the paddle2onnx part is completed. You can refer to the [Export Model Guide](../../../../../docs/en/faq/horizon/export.md) for the process of converting the ONNX model to a Horizon model.
 ### Configure Conversion of yaml Files
 **Modify normalize parameter**
 If you need to perform normalize operation on NPU, please configure normalize parameters according to your model, such as:
 ```yaml
 norm_type: 'data_scale'
  # the mean value minused by image
  # note that values must be seperated by space if channel mean value is used
  mean_value: ''
  # scale value of image preprocess
  # note that values must be seperated by space if channel scale value is used
  scale_value: 0.003921568627451
 ```
 At this point, the model conversion is completed and can be deployed directly in FastDeploy.
 ## Other Links
 - [Cpp](./cpp)
 - [Visual model prediction results](../../../../../docs/api/vision_results/)
--- a/examples/vision/detection/paddledetection/horizon/README_CN.md
+++ b/examples/vision/detection/paddledetection/horizon/README_CN.md
@@ -0,0 +1,195 @@
 [English](README.md) | 简体中文
 # PaddleDetection 地平线部署示例
 ## 支持模型列表
 在Horizon上已经通过测试的PaddleDetection模型如下:
 - PPYOLOE(float32)
 ## 准备PaddleDetection部署模型以及转换模型
 Horizon部署模型前需要将Paddle模型转换成Horizon模型，具体步骤如下:
 * Paddle动态图模型转换为ONNX模型，请参考[PaddleDetection导出模型](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.4/deploy/EXPORT_MODEL.md)
 ,注意在转换时请设置**export.nms=True**.
 * ONNX模型转换Horizon模型的过程，请参考[转换文档](../../../../../docs/cn/faq/horizon/export.md)进行转换。
 ## 模型转换example
 ### 注意点
 PPDetection模型在地平线上部署时要注意以下几点:
 * 模型导出需要包含Decode
 * 由于地平线不支持NMS，因此输出节点必须裁剪至NMS之前
 * 由于地平线 Div算子的限制，模型的输出节点需要裁剪至Div算子之前
 ### Paddle模型转换为ONNX模型
 由于地平线提供的模型转换工具暂时不支持Paddle模型直接导出为Horizon模型，因此需要先将Paddle模型导出为ONNX模型，再将ONNX模型转为Horizon模型。
 ```bash
 # 以PP-YoloE+m为例
 # 下载Paddle静态图模型并解压
 wget https://bj.bcebos.com/fastdeploy/models/ppyoloe_plus_crn_m_80e_coco.tgz
 tar xvf ppyoloe_plus_crn_m_80e_coco.tgz
 # 静态图转ONNX模型，注意，这里的save_file请和压缩包名对齐
 paddle2onnx --model_dir ppyoloe_plus_crn_m_80e_coco \
            --model_filename model.pdmodel \
            --params_filename model.pdiparams \
            --save_file ppyoloe_plus_crn_m_80e_coco/ppyoloe_plus_crn_m_80e_coco.onnx \
            --enable_dev_version True \
            --opset_version 11
 # 固定shape
 python -m paddle2onnx.optimize --input_model ppyoloe_plus_crn_m_80e_coco/ppyoloe_plus_crn_m_80e_coco.onnx \
                                --output_model ppyoloe_plus_crn_m_80e_coco/ppyoloe_plus_crn_m_80e_coco.onnx \
                                --input_shape_dict "{'image':[1,3,640,640], 'scale_factor':[1,2]}"
 ```
 由于导出的ONNX IR Version和地平线不一致，因此，要手动更改ONNX IR Version，可参考以下Python代码，
 ```python
 import onnx
 model = onnx.load("ppyoloe_plus_crn_m_80e_coco/ppyoloe_plus_crn_m_80e_coco.onnx")
 model.ir_version = 7
 onnx.save(model, "ppyoloe_plus_crn_m_80e_coco/ppyoloe_plus_crn_m_80e_coco.onnx")
 ```
 ### 模型裁剪
 由于Paddle2ONNX版本的不同，转换模型的输出节点名称也有所不同，请使用[Netron](https://netron.app)对模型进行可视化，并找到以下蓝色方框标记的NonMaxSuppression节点，红色方框的节点名称即为目标名称。
 例如，使用Netron可视化后，得到以下图片:
 ![](ppyoloe-onnx.png)
 找到NonMaxSuppression节点，可以看到红色方框标记的两个节点名称为p2o.Mul.290和p2o.Concat.29,因此需要将输出截止到这两个结点。
 可以参考以下python代码，对输出进行剪裁，
 ```python 
 import argparse
 import sys
 def parse_arguments():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--model',
        required=True,
        help='Path of directory saved the input model.')
    parser.add_argument(
        '--output_names',
        required=True,
        nargs='+',
        help='The outputs of pruned model.')
    parser.add_argument(
        '--save_file', required=True, help='Path to save the new onnx model.')
    return parser.parse_args()
 if __name__ == '__main__':
    args = parse_arguments()
    import onnx
    model = onnx.load(args.model)
    output_tensor_names = set()
    for node in model.graph.node:
        for out in node.output:
            output_tensor_names.add(out)
    for output_name in args.output_names:
        if output_name not in output_tensor_names:
            print(
                "[ERROR] Cannot find output tensor name '{}' in onnx model graph.".
                format(output_name))
            sys.exit(-1)
    if len(set(args.output_names)) < len(args.output_names):
        print(
            "[ERROR] There's dumplicate name in --output_names, which is not allowed."
        )
        sys.exit(-1)
    output_node_indices = set()
    output_to_node = dict()
    for i, node in enumerate(model.graph.node):
        for out in node.output:
            output_to_node[out] = i
            if out in args.output_names:
                output_node_indices.add(i)
    # from outputs find all the ancestors
    import copy
    reserved_node_indices = copy.deepcopy(output_node_indices)
    reserved_inputs = set()
    new_output_node_indices = copy.deepcopy(output_node_indices)
    while True and len(new_output_node_indices) > 0:
        output_node_indices = copy.deepcopy(new_output_node_indices)
        new_output_node_indices = set()
        for out_node_idx in output_node_indices:
            for ipt in model.graph.node[out_node_idx].input:
                if ipt in output_to_node:
                    reserved_node_indices.add(output_to_node[ipt])
                    new_output_node_indices.add(output_to_node[ipt])
                else:
                    reserved_inputs.add(ipt)
    num_inputs = len(model.graph.input)
    num_outputs = len(model.graph.output)
    num_nodes = len(model.graph.node)
    print(len(reserved_node_indices), "xxxx")
    for idx in range(num_nodes - 1, -1, -1):
        if idx not in reserved_node_indices:
            del model.graph.node[idx]
    for idx in range(num_inputs - 1, -1, -1):
        if model.graph.input[idx].name not in reserved_inputs:
            del model.graph.input[idx]
    for out in args.output_names:
        model.graph.output.extend([onnx.ValueInfoProto(name=out)])
    for i in range(num_outputs):
        del model.graph.output[0]
    from onnx_infer_shape import SymbolicShapeInference
    model = SymbolicShapeInference.infer_shapes(model, 2**31 - 1, True, False,
                                                1)
    onnx.checker.check_model(model)
    onnx.save(model, args.save_file)
    print("[Finished] The new model saved in {}.".format(args.save_file))
    print("[DEBUG INFO] The inputs of new model: {}".format(
        [x.name for x in model.graph.input]))
    print("[DEBUG INFO] The outputs of new model: {}".format(
        [x.name for x in model.graph.output]))
 ```
 若将上述脚本命名为`prune_onnx_model.py`,则运行以下命令，对模型进行剪裁,
 ```bash
 python prune_onnx_model.py --model ppyoloe_plus_crn_m_80e_coco/ppyoloe_plus_crn_m_80e_coco.onnx \
            --output_names p2o.Mul.290 p2o.Concat.29 \
            --save_file ppyoloe_plus_crn_m_80e_coco/ppyoloe_plus_crn_m_80e_coco_cut.onnx
 ```
 至此，paddle2onnx部分完成，onnx模型转horizon模型的流程，可参考[导出模型指南](../../../../../docs/cn/faq/horizon/export.md)。
 ### 配置转换yaml文件
 **修改normalize参数**
 如果你需要在NPU上执行normalize操作，请根据你的模型配置normalize参数，例如:
 ```yaml
 norm_type: 'data_scale'
  # the mean value minused by image
  # note that values must be seperated by space if channel mean value is used
  mean_value: ''
  # scale value of image preprocess
  # note that values must be seperated by space if channel scale value is used
  scale_value: 0.003921568627451
 ```
 至此，模型转换完成，可直接在FastDeploy中进行部署。
 ## 其他链接
 - [Cpp部署](./cpp)
 - [视觉模型预测结果](../../../../../docs/api/vision_results/)
--- a/examples/vision/detection/paddledetection/horizon/cpp/CMakeLists.txt
+++ b/examples/vision/detection/paddledetection/horizon/cpp/CMakeLists.txt
@@ -0,0 +1,13 @@
 PROJECT(infer_demo)
 CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
 # 指定下载解压后的fastdeploy库路径
 option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
 include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
 # 添加FastDeploy依赖头文件
 include_directories(${FASTDEPLOY_INCS})
 add_executable(infer_ppyoloe_demo ${PROJECT_SOURCE_DIR}/infer_ppyoloe_demo.cc)
 target_link_libraries(infer_ppyoloe_demo ${FASTDEPLOY_LIBS})
--- a/examples/vision/detection/paddledetection/horizon/cpp/infer_ppyoloe_demo.cc
+++ b/examples/vision/detection/paddledetection/horizon/cpp/infer_ppyoloe_demo.cc
@@ -0,0 +1,62 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fastdeploy/vision.h"
 void HorizonInfer(const std::string& model_dir, const std::string& image_file) {
  auto model_file = model_dir + "/ppyoloe_640x640_rgb_s.bin";
  auto params_file = "";
  auto config_file = model_dir + "/infer_cfg.yml";
  auto option = fastdeploy::RuntimeOption();
  option.UseHorizon();
  option.UseHorizonNPUBackend();
  auto format = fastdeploy::ModelFormat::HORIZON;
  auto model = fastdeploy::vision::detection::PPYOLOE(
      model_file, params_file, config_file, option, format);
  model.GetPreprocessor().DisablePermute();
  model.GetPreprocessor().DisableNormalize();
  model.GetPostprocessor().ApplyNMS();
  fastdeploy::vision::DetectionResult res;
  auto im = cv::imread(image_file);
  fastdeploy::TimeCounter tc;
  tc.Start();
  if (!model.Predict(&im, &res)) {
    std::cerr << "Failed to predict." << std::endl;
    return;
  }
  tc.End();
  tc.PrintInfo("PPDet in Horizon");
  auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
  cv::imwrite("infer_horizon.jpg", vis_im);
  std::cout << "Visualized result saved in ./infer_horizon.jpg" << std::endl;
 }
 int main(int argc, char* argv[]) {
  if (argc < 3) {
    std::cout << "Usage: infer_ppyoloe_demo path/to/model_dir path/to/image, "
                 "e.g ./infer_ppyoloe_demo ./ppyoloe_model_dir ./test.jpeg"
              << std::endl;
    return -1;
  }
  HorizonInfer(argv[1], argv[2]);
  return 0;
 }
--- a/examples/vision/detection/paddledetection/horizon/ppyoloe-onnx.png
+++ b/examples/vision/detection/paddledetection/horizon/ppyoloe-onnx.png
--- a/fastdeploy/core/config.h.in
+++ b/fastdeploy/core/config.h.in
@@ -64,3 +64,7 @@
 #ifndef ENABLE_BENCHMARK
 #cmakedefine ENABLE_BENCHMARK
 #endif
 #ifndef ENABLE_HORIZON_BACKEND
 #cmakedefine ENABLE_HORIZON_BACKEND
 #endif
--- a/fastdeploy/fastdeploy_model.cc
+++ b/fastdeploy/fastdeploy_model.cc
@@ -77,6 +77,7 @@ bool FastDeployModel::InitRuntimeWithSpecifiedBackend() {
  bool use_gpu = (runtime_option.device == Device::GPU);
  bool use_ipu = (runtime_option.device == Device::IPU);
  bool use_rknpu = (runtime_option.device == Device::RKNPU);
  bool use_horizon = (runtime_option.device == Device::SUNRISENPU);
  bool use_sophgotpu = (runtime_option.device == Device::SOPHGOTPUD);
  bool use_timvx = (runtime_option.device == Device::TIMVX);
  bool use_ascend = (runtime_option.device == Device::ASCEND);
@@ -97,6 +98,13 @@ bool FastDeployModel::InitRuntimeWithSpecifiedBackend() {
              << " is not supported." << std::endl;
      return false;
    }
  } else if (use_horizon) {
    if (!IsSupported(valid_horizon_backends, runtime_option.backend)) {
      FDERROR << "The valid horizon backends of model " << ModelName()
              << " are " << Str(valid_horizon_backends) << ", "
              << runtime_option.backend << " is not supported." << std::endl;
      return false;
    }
  } else if (use_sophgotpu) {
    if (!IsSupported(valid_sophgonpu_backends, runtime_option.backend)) {
      FDERROR << "The valid sophgo backends of model " << ModelName() << " are "
@@ -169,6 +177,8 @@ bool FastDeployModel::InitRuntimeWithSpecifiedDevice() {
 #endif
  } else if (runtime_option.device == Device::RKNPU) {
    return CreateRKNPUBackend();
  } else if (runtime_option.device == Device::SUNRISENPU) {
    return CreateHorizonBackend();
  } else if (runtime_option.device == Device::TIMVX) {
    return CreateTimVXBackend();
  } else if (runtime_option.device == Device::ASCEND) {
@@ -188,9 +198,9 @@ bool FastDeployModel::InitRuntimeWithSpecifiedDevice() {
    return false;
 #endif
  }
-  FDERROR
+  FDERROR << "Only support "
-      << "Only support CPU/GPU/IPU/RKNPU/TIMVX/KunlunXin/ASCEND/DirectML now."
+             "CPU/GPU/IPU/RKNPU/HORIZONNPU/TIMVX/KunlunXin/ASCEND/DirectML now."
-      << std::endl;
+          << std::endl;
  return false;
 }
@@ -278,6 +288,28 @@ bool FastDeployModel::CreateRKNPUBackend() {
  return false;
 }
 bool FastDeployModel::CreateHorizonBackend() {
  if (valid_horizon_backends.empty()) {
    FDERROR << "There's no valid npu backends for model: " << ModelName()
            << std::endl;
    return false;
  }
  for (size_t i = 0; i < valid_horizon_backends.size(); ++i) {
    if (!IsBackendAvailable(valid_horizon_backends[i])) {
      continue;
    }
    runtime_option.backend = valid_horizon_backends[i];
    runtime_ = std::unique_ptr<Runtime>(new Runtime());
    if (!runtime_->Init(runtime_option)) {
      return false;
    }
    runtime_initialized_ = true;
    return true;
  }
  FDERROR << "Cannot find an available npu backend to load this model."
          << std::endl;
  return false;
 }
 bool FastDeployModel::CreateSophgoNPUBackend() {
  if (valid_sophgonpu_backends.empty()) {
    FDERROR << "There's no valid npu backends for model: " << ModelName()
--- a/fastdeploy/fastdeploy_model.h
+++ b/fastdeploy/fastdeploy_model.h
@@ -59,6 +59,7 @@ class FASTDEPLOY_DECL FastDeployModel {
  std::vector<Backend> valid_rknpu_backends = {};
  /** Model's valid hardware backends. This member defined all the sophgo npu backends have successfully tested for the model
   */
  std::vector<Backend> valid_horizon_backends = {};
  std::vector<Backend> valid_sophgonpu_backends = {};
  /// Get number of inputs for this model
@@ -156,6 +157,7 @@ class FASTDEPLOY_DECL FastDeployModel {
  bool CreateGpuBackend();
  bool CreateIpuBackend();
  bool CreateRKNPUBackend();
  bool CreateHorizonBackend();
  bool CreateSophgoNPUBackend();
  bool CreateTimVXBackend();
  bool CreateKunlunXinBackend();
--- a/fastdeploy/runtime/backends/horizon/horizon_backend.cc
+++ b/fastdeploy/runtime/backends/horizon/horizon_backend.cc
@@ -0,0 +1,399 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fastdeploy/runtime/backends/horizon/horizon_backend.h"
 namespace fastdeploy{
 HorizonBackend::~HorizonBackend() {
    int ret = -1;
    // Release memory uniformly here
    if (input_properties_ != nullptr){
        free(input_properties_);
    }
    if (output_properties_ != nullptr){
        free(output_properties_);
    }
    if(input_mems_ == nullptr){
        return;
    }
    for(int i = 0; i < NumInputs(); i++){
        ret = hbSysFreeMem(&(input_mems_[i].sysMem[0]));
        if(ret != 0){
            FDERROR << "release input mem fail! ret=" << ret << std::endl;
        }
        if(input_mems_ != nullptr){
            free(input_mems_);
        }
    }
    for(int i = 0; i < NumOutputs(); i++){
        ret = hbSysFreeMem(&(output_mems_[i].sysMem[0]));
        if(ret != 0){
            FDERROR << "release output mem fail! ret=" << ret << std::endl;
        }
        if(output_mems_ != nullptr){
            free(output_mems_);
        }
    }
    ret = hbDNNRelease(packed_dnn_handle_);
    if(ret != 0){
        FDERROR << "hbDNNRelease  fail! ret=" << ret << std::endl;
    }
 }
 bool HorizonBackend::GetModelInputOutputInfos(){
    const char **model_name_list;
    int model_count = 0;
    int ret;
    // get model name
    ret = hbDNNGetModelNameList(&model_name_list, &model_count, packed_dnn_handle_);
    if(ret != 0){
        FDERROR << "get model name fail! ret=" << ret << std::endl;
        return false;
    }
    // get dnn handle
    ret = hbDNNGetModelHandle(&dnn_handle_, packed_dnn_handle_, model_name_list[0]);
    if(ret != 0){
        FDERROR << "get dnn handle fail! ret=" << ret << std::endl;
        return false;
    }
    // get input infos
    // Get detailed input parameters
    int input_count = 0;
    ret = hbDNNGetInputCount(&input_count, dnn_handle_);
    if(ret != 0){
        FDERROR << "get input count fail! ret=" << ret << std::endl;
        return false;
    }
    input_properties_ = (hbDNNTensorProperties*)malloc(sizeof(hbDNNTensorProperties) * input_count);
    memset(input_properties_, 0, input_count * sizeof(hbDNNTensorProperties));
    inputs_desc_.resize(input_count);
    // get input info and copy to input tensor info
    for (uint32_t i = 0; i < input_count; i++) {
        ret = hbDNNGetInputTensorProperties(&input_properties_[i], dnn_handle_, i);
        if(ret != 0){
            FDERROR << "get input tensor properties fail! ret=" << ret << std::endl;
            return false;
        }
        if ((input_properties_[i].tensorLayout != HB_DNN_LAYOUT_NHWC)) {
            FDERROR << "horizon_backend only support input layout is NHWC"
                    << std::endl;
        }
        if(input_properties_[i].tensorType!= HB_DNN_IMG_TYPE_RGB){
            FDERROR << "horizon_backend only support input format is RGB"
                    << std::endl;
        }
        const char *name;
        ret = hbDNNGetInputName(&name, dnn_handle_, i);
        if(ret != 0){
            FDERROR << "get input tensor name fail! ret=" << ret << std::endl;
            return false;
        }
        // copy input proper to input tensor info
        std::string temp_name = name;
        std::vector<int> temp_shape{};
        int n_dims = input_properties_[i].validShape.numDimensions;
        temp_shape.resize(n_dims);
        for (int j = 0; j < n_dims; j++) {
            temp_shape[j] = (int)input_properties_[i].validShape.dimensionSize[j];
        }
        // Only support RGB format, so input type is UINT8
        FDDataType temp_dtype = FDDataType::UINT8;
        TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype};
        inputs_desc_[i] = temp_input_info;
    }
    // get output infos
    // Get detailed output parameters
    int output_count = 0;
    ret = hbDNNGetOutputCount(&output_count, dnn_handle_);
    if(ret != 0){
        FDERROR << "get output count fail! ret=" << ret << std::endl;
        return false;
    }
    output_properties_ = (hbDNNTensorProperties*)malloc(sizeof(hbDNNTensorProperties) * output_count);
    memset(output_properties_, 0, output_count * sizeof(hbDNNTensorProperties));
    outputs_desc_.resize(output_count);
    for (uint32_t i = 0; i < output_count; i++){
        // get model output size
        ret = hbDNNGetOutputTensorProperties(&output_properties_[i], dnn_handle_, i);
        const char *name;
        ret = hbDNNGetOutputName(&name, dnn_handle_, i);
        if(ret != 0){
            FDERROR << "get output tensor name fail! ret=" << ret << std::endl;
            return false;
        }
        // copy output proper to output tensor info
        std::string temp_name = name;
        std::vector<int> temp_shape{};
        int n_dims = output_properties_[i].validShape.numDimensions;
        if ((n_dims == 4) && (output_properties_[i].validShape.dimensionSize[3] == 1)) {
            n_dims--;
        }
        temp_shape.resize(n_dims);
        for (int j = 0; j < n_dims; j++) {
            temp_shape[j] = (int)output_properties_[i].validShape.dimensionSize[j];
        }
        FDDataType temp_dtype = HorizonTensorTypeToFDDataType(output_properties_[i].tensorType);
        TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype};
        outputs_desc_[i] = temp_input_info;
    }
    return true;
 }
 TensorInfo HorizonBackend::GetInputInfo(int index){
    FDASSERT(index < NumInputs(),
        "The index: %d should less than the number of inputs: %d.", index,
        NumInputs());
    return inputs_desc_[index];
 }
 std::vector<TensorInfo> HorizonBackend::GetInputInfos(){
    return inputs_desc_;
 }
 TensorInfo HorizonBackend::GetOutputInfo(int index){
    FDASSERT(index < NumOutputs(),
        "The index: %d should less than the number of outputs %d.", index,
        NumOutputs());
    return outputs_desc_[index];
 }
 std::vector<TensorInfo> HorizonBackend::GetOutputInfos(){
    return outputs_desc_;
 }
 bool HorizonBackend::LoadModel(const char *model){
    int ret = -1;
    ret = hbDNNInitializeFromFiles(&packed_dnn_handle_, &model , 1);
    if(ret != 0){
        FDERROR << "horizon_init fail! ret=" << ret << std::endl;
        return false;
    }
    return true;
 }
 bool HorizonBackend::Init(const RuntimeOption& runtime_option){
    // Init model from file
    if (!LoadModel((char*)runtime_option.model_file.data())) {
        FDERROR << "load model failed" << std::endl;
        return false;
    }
    // GetModelInputOutputInfos
    if (!GetModelInputOutputInfos()) {
        FDERROR << "get model input output infos failed" << std::endl;
        return false;
    }
    return true;
 }
 bool HorizonBackend::Infer(std::vector<FDTensor>& inputs,
                          std::vector<FDTensor>* outputs, bool copy_to_fd){
    // Judge whether the input and output size are the same
    if (inputs.size() != inputs_desc_.size()) {
        FDERROR << "[HorizonBackend] Size of the inputs(" << inputs.size()
                << ") should keep same with the inputs of this model("
                << inputs_desc_.size() << ")." << std::endl;
        return false;
    }
    RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
    int ret = -1;
    if(!infer_init_){
        // Create input tensor memory
        int input_count = NumInputs();
        int output_count = NumOutputs();
        input_mems_ = (hbDNNTensor*)malloc(sizeof(hbDNNTensor) * input_count);
        output_mems_ = (hbDNNTensor*)malloc(sizeof(hbDNNTensor) * output_count);
        for(uint32_t i = 0; i < input_count; i++){
            input_mems_[i].properties = input_properties_[i];
            input_mems_[i].properties.alignedShape = input_mems_[i].properties.validShape;
            auto current_shape = GetInputInfo(i).shape;
            auto &mem = input_mems_[i].sysMem[0];
            int intput_memSize = input_properties_[i].alignedByteSize;
            ret = hbSysAllocCachedMem(&mem, intput_memSize);
            if(ret != 0){
                FDERROR << "hbSysAllocCachedMem fails." << std::endl;
                return false;
            }
        }
        for(uint32_t i = 0; i < output_count; i++){
            output_mems_[i].properties = output_properties_[i];
            auto current_shape = GetOutputInfo(i).shape;
            auto &mem = output_mems_[i].sysMem[0];
            int output_memSize = output_properties_[i].alignedByteSize;
            ret = hbSysAllocCachedMem(&mem, output_memSize);
            if(ret != 0){
                FDERROR << "hbSysAllocCachedMem fails." << std::endl;
                return false;
            }
        }
        infer_init_ = true;
    }
    // Copy input data to input tensor memory
    for (uint32_t i = 0; i < NumInputs(); i++) {
        if (inputs[i].Data() == nullptr) {
            FDERROR << "inputs[i].Data is NULL." << std::endl;
            return false;
        }
        auto &mem = input_mems_[i].sysMem[0];
        memcpy(mem.virAddr, inputs[i].Data(), inputs[i].Nbytes());
        ret = hbSysFlushMem(&mem, HB_SYS_MEM_CACHE_CLEAN);
        if(ret != 0){
            FDERROR << "hbSysFlushMem fails." << std::endl;
            return false;
        }
    }
    hbDNNTaskHandle_t task_handle = nullptr;
    hbDNNInferCtrlParam infer_ctrl_param;
    HB_DNN_INITIALIZE_INFER_CTRL_PARAM(&infer_ctrl_param);
    RUNTIME_PROFILE_LOOP_BEGIN(1)
    ret = hbDNNInfer(&task_handle,
              &output_mems_,
              input_mems_,
              dnn_handle_,
              &infer_ctrl_param);
    RUNTIME_PROFILE_LOOP_END
    if(ret != 0){
        FDERROR << "hbDNNInference fails." << std::endl;
        return false;
    }
    ret = hbDNNWaitTaskDone(task_handle, 0);
    if(ret !=0){
        FDERROR << "hbDNNWaitTaskDone fails." << std::endl;
        return false;
    }
    ret = hbDNNReleaseTask(task_handle);
    if(ret !=0){
        FDERROR << "hbDNNReleaseTask fails." << std::endl;
        return false;
    }
    // get result
    outputs->resize(outputs_desc_.size());
    std::vector<int64_t> temp_shape(4);
    for (size_t i = 0; i < outputs_desc_.size(); ++i) {
        temp_shape.resize(outputs_desc_[i].shape.size());
        for (int j = 0; j < outputs_desc_[i].shape.size(); ++j) {
        temp_shape[j] = outputs_desc_[i].shape[j];
        }
        (*outputs)[i].Resize(temp_shape, outputs_desc_[i].dtype,
                            outputs_desc_[i].name);
        hbSysFlushMem(&(output_mems_[i].sysMem[0]), HB_SYS_MEM_CACHE_INVALIDATE);
        auto data = (float *)(output_mems_[i].sysMem[0].virAddr);
        auto shift = output_mems_[i].properties.shift.shiftData;
        auto scale = output_mems_[i].properties.scale.scaleData;
        for(int j = 0; j < (*outputs)[i].Nbytes(); j++){
            if (output_mems_[i].properties.quantiType == SHIFT) {
                data[j] = data[j] / (1 << shift[j]);
            } else if (output_mems_[i].properties.quantiType == SCALE) {
                data[j] = data[j] * scale[j];
            }
        }
        memcpy((*outputs)[i].MutableData(), (float*)output_mems_[i].sysMem[0].virAddr,
            (*outputs)[i].Nbytes());
    }
    RUNTIME_PROFILE_LOOP_H2D_D2H_END
    return true;
 }
 FDDataType HorizonBackend::HorizonTensorTypeToFDDataType(int32_t type){
    if (type == hbDNNDataType::HB_DNN_TENSOR_TYPE_F16) {
        return FDDataType::FP16;
    }
    if (type == hbDNNDataType::HB_DNN_TENSOR_TYPE_F32) {
        return FDDataType::FP32;
    }
    if (type == hbDNNDataType::HB_DNN_TENSOR_TYPE_S8) {
        return FDDataType::INT8;
    }
    if (type == hbDNNDataType::HB_DNN_TENSOR_TYPE_S16) {
        return FDDataType::INT16;
    }
    if (type == hbDNNDataType::HB_DNN_TENSOR_TYPE_S32) {
        return FDDataType::INT32;
    }
    if (type == hbDNNDataType::HB_DNN_TENSOR_TYPE_U8) {
        return FDDataType::UINT8;
    }
    FDERROR << "FDDataType don't support this type" << std::endl;
    return FDDataType::UNKNOWN1;
 }
 hbDNNDataType HorizonBackend::FDDataTypeToHorizonTensorType(FDDataType type){
    if (type == FDDataType::FP16) {
        return hbDNNDataType::HB_DNN_TENSOR_TYPE_F16;
    }
    if (type == FDDataType::FP32) {
        return hbDNNDataType::HB_DNN_TENSOR_TYPE_F32;
    }
    if (type == FDDataType::INT8) {
        return hbDNNDataType::HB_DNN_TENSOR_TYPE_S8;
    }
    if (type == FDDataType::INT16) {
        return hbDNNDataType::HB_DNN_TENSOR_TYPE_S16;
    }
    if (type == FDDataType::INT32) {
        return hbDNNDataType::HB_DNN_TENSOR_TYPE_S32;
    }
    if (type == FDDataType::UINT8) {
        return hbDNNDataType::HB_DNN_TENSOR_TYPE_U8;
    }
    FDERROR << "horizon_tensor_type don't support this type" << std::endl;
    return hbDNNDataType::HB_DNN_TENSOR_TYPE_MAX;
 }
 } //namespace fastdeploy
--- a/fastdeploy/runtime/backends/horizon/horizon_backend.h
+++ b/fastdeploy/runtime/backends/horizon/horizon_backend.h
@@ -0,0 +1,69 @@
 // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #pragma once
 #include <cstring>
 #include <iostream>
 #include <memory>
 #include <string>
 #include <vector>
 #include "fastdeploy/runtime/backends/backend.h"
 #include "fastdeploy/core/fd_tensor.h"
 #include "dnn/hb_dnn.h"
 namespace fastdeploy {
 class HorizonBackend : public BaseBackend {
 public:
    HorizonBackend() = default;
    ~HorizonBackend();
    // Horizon Backend implementation.
    bool Init(const RuntimeOption& runtime_option);
    int NumInputs() const override {
        return static_cast<int>(inputs_desc_.size());
    }
    int NumOutputs() const override {
        return static_cast<int>(outputs_desc_.size());
    }
    TensorInfo GetInputInfo(int index) override;
    TensorInfo GetOutputInfo(int index) override;
    std::vector<TensorInfo> GetInputInfos() override;
    std::vector<TensorInfo> GetOutputInfos() override;
    bool Infer(std::vector<FDTensor>& inputs, std::vector<FDTensor>* outputs,
                bool copy_to_fd = true) override;
 private:
    hbPackedDNNHandle_t packed_dnn_handle_;
    hbDNNHandle_t dnn_handle_;
    hbDNNTensorProperties *input_properties_ = nullptr;
    hbDNNTensorProperties *output_properties_ = nullptr;
    hbDNNTensor *input_mems_;
    hbDNNTensor *output_mems_;
    bool infer_init_ = false;
    std::vector<TensorInfo> inputs_desc_;
    std::vector<TensorInfo> outputs_desc_;
    bool GetModelInputOutputInfos();
    bool LoadModel(const char *model);
    static FDDataType HorizonTensorTypeToFDDataType(int32_t type);
    static hbDNNDataType FDDataTypeToHorizonTensorType(FDDataType type);
 };
 }  // namespace fastdeploy
--- a/fastdeploy/runtime/enum_variables.cc
+++ b/fastdeploy/runtime/enum_variables.cc
@@ -32,6 +32,8 @@ std::ostream& operator<<(std::ostream& out, const Backend& backend) {
    out << "Backend::POROS";
  } else if (backend == Backend::LITE) {
    out << "Backend::PDLITE";
  } else if(backend == Backend::HORIZONNPU){
    out << "Backend::HORIZONNPU";
  } else {
    out << "UNKNOWN-Backend";
  }
@@ -49,6 +51,9 @@ std::ostream& operator<<(std::ostream& out, const Device& d) {
    case Device::RKNPU:
      out << "Device::RKNPU";
      break;
    case Device::SUNRISENPU:
      out << "Device::SUNRISENPU";
      break;
    case Device::SOPHGOTPUD:
      out << "Device::SOPHGOTPUD";
      break;
@@ -81,7 +86,10 @@ std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
    out << "ModelFormat::SOPHGO";
  } else if (format == ModelFormat::TORCHSCRIPT) {
    out << "ModelFormat::TORCHSCRIPT";
-  } else {
+  } else if (format == ModelFormat::HORIZON) {
    out << "ModelFormat::HORIZON";
  }
  else {
    out << "UNKNOWN-ModelFormat";
  }
  return out;
@@ -110,6 +118,9 @@ std::vector<Backend> GetAvailableBackends() {
 #ifdef ENABLE_RKNPU2_BACKEND
  backends.push_back(Backend::RKNPU2);
 #endif
 #ifdef ENABLE_HORIZON_BACKEND
  backends.push_back(Backend::HORIZONNPU);
 #endif
 #ifdef ENABLE_SOPHGO_BACKEND
  backends.push_back(Backend::SOPHGOTPU);
 #endif
--- a/fastdeploy/runtime/enum_variables.h
+++ b/fastdeploy/runtime/enum_variables.h
@@ -38,6 +38,7 @@ enum Backend {
  LITE,       ///< Paddle Lite, support Paddle format model, ARM CPU only
  RKNPU2,     ///< RKNPU2, support RKNN format model, Rockchip NPU only
  SOPHGOTPU,  ///< SOPHGOTPU, support SOPHGO format model, Sophgo TPU only
  HORIZONNPU,     ///< HORIZONNPU, support Horizon format model, Horizon NPU
 };
 /**
@@ -60,7 +61,8 @@ enum FASTDEPLOY_DECL Device {
  KUNLUNXIN,
  ASCEND,
  SOPHGOTPUD,
-  DIRECTML
+  DIRECTML,
  SUNRISENPU,
 };
 /*! Deep learning model format */
@@ -71,6 +73,7 @@ enum ModelFormat {
  RKNN,         ///< Model with RKNN format
  TORCHSCRIPT,  ///< Model with TorchScript format
  SOPHGO,       ///< Model with SOPHGO format
  HORIZON,      ///< Model with HORIZON format
 };
 /// Describle all the supported backends for specified model format
@@ -80,6 +83,7 @@ static std::map<ModelFormat, std::vector<Backend>>
                      Backend::ORT, Backend::OPENVINO, Backend::TRT}},
  {ModelFormat::ONNX, {Backend::ORT, Backend::OPENVINO, Backend::TRT}},
  {ModelFormat::RKNN, {Backend::RKNPU2}},
  {ModelFormat::HORIZON, {Backend::HORIZONNPU}},
  {ModelFormat::TORCHSCRIPT, {Backend::POROS}},
  {ModelFormat::SOPHGO, {Backend::SOPHGOTPU}}
 };
@@ -91,6 +95,7 @@ static std::map<Device, std::vector<Backend>>
                Backend::OPENVINO, Backend::POROS}},
  {Device::GPU, {Backend::PDINFER, Backend::ORT, Backend::TRT, Backend::POROS}},
  {Device::RKNPU, {Backend::RKNPU2}},
  {Device::SUNRISENPU, {Backend::HORIZONNPU}},
  {Device::IPU, {Backend::PDINFER}},
  {Device::TIMVX, {Backend::LITE}},
  {Device::KUNLUNXIN, {Backend::LITE}},
--- a/fastdeploy/runtime/runtime.cc
+++ b/fastdeploy/runtime/runtime.cc
@@ -49,6 +49,10 @@
 #include "fastdeploy/runtime/backends/sophgo/sophgo_backend.h"
 #endif
 #ifdef ENABLE_HORIZON_BACKEND
 #include "fastdeploy/runtime/backends/horizon/horizon_backend.h"
 #endif
 namespace fastdeploy {
 bool AutoSelectBackend(RuntimeOption& option) {
@@ -155,7 +159,10 @@ bool Runtime::Init(const RuntimeOption& _option) {
    CreateSophgoNPUBackend();
  } else if (option.backend == Backend::POROS) {
    CreatePorosBackend();
-  } else {
+  } else if (option.backend == Backend::HORIZONNPU){
    CreateHorizonBackend();
  } 
  else {
    std::string msg = Str(GetAvailableBackends());
    FDERROR << "The compiled FastDeploy only supports " << msg << ", "
            << option.backend << " is not supported now." << std::endl;
@@ -335,6 +342,19 @@ void Runtime::CreateRKNPU2Backend() {
         << "." << std::endl;
 }
 void Runtime::CreateHorizonBackend(){
 #ifdef ENABLE_HORIZON_BACKEND
  backend_ = utils::make_unique<HorizonBackend>();
  FDASSERT(backend_->Init(option), "Failed to initialize Horizon backend.");
 #else
  FDASSERT(false,
           "HorizonBackend is not available, please compiled with ",
           " ENABLE_HORIZON_BACKEND=ON.");
 #endif          
  FDINFO << "Runtime initialized with Backend::HORIZONNPU in " << option.device
         << "." << std::endl;
 }
 void Runtime::CreateSophgoNPUBackend() {
 #ifdef ENABLE_SOPHGO_BACKEND
  backend_ = utils::make_unique<SophgoBackend>();
--- a/fastdeploy/runtime/runtime.h
+++ b/fastdeploy/runtime/runtime.h
@@ -115,6 +115,7 @@ struct FASTDEPLOY_DECL Runtime {
  void CreateOpenVINOBackend();
  void CreateLiteBackend();
  void CreateRKNPU2Backend();
  void CreateHorizonBackend();
  void CreateSophgoNPUBackend();
  void CreatePorosBackend();
  std::unique_ptr<BaseBackend> backend_;
--- a/fastdeploy/runtime/runtime_option.cc
+++ b/fastdeploy/runtime/runtime_option.cc
@@ -65,6 +65,10 @@ void RuntimeOption::UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name,
  device = Device::RKNPU;
 }
 void RuntimeOption::UseHorizon(){
  device = Device::SUNRISENPU;
 }
 void RuntimeOption::UseTimVX() {
  device = Device::TIMVX;
  paddle_lite_option.device = device;
@@ -185,6 +189,14 @@ void RuntimeOption::UseLiteBackend() {
 #endif
 }
 void RuntimeOption::UseHorizonNPUBackend(){
 #ifdef ENABLE_HORIZON_BACKEND
  backend = Backend::HORIZONNPU;
 #else
  FDASSERT(false, "The FastDeploy didn't compile with horizon");
 #endif
 }
 void RuntimeOption::SetPaddleMKLDNN(bool pd_mkldnn) {
  FDWARNING << "`RuntimeOption::SetPaddleMKLDNN` will be removed in v1.2.0, "
               "please modify its member variable directly, e.g "
--- a/fastdeploy/runtime/runtime_option.h
+++ b/fastdeploy/runtime/runtime_option.h
@@ -73,6 +73,8 @@ struct FASTDEPLOY_DECL RuntimeOption {
                     fastdeploy::rknpu2::CpuName::RK356X,
                 fastdeploy::rknpu2::CoreMask rknpu2_core =
                     fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO);
  // Use Horizon NPU to inference
  void UseHorizon();
  /// Use TimVX e.g RV1126/A311D to inference
  void UseTimVX();
  /// Use Huawei Ascend to inference
@@ -277,6 +279,7 @@ struct FASTDEPLOY_DECL RuntimeOption {
  void SetOrtGraphOptLevel(int level = -1);
  void UsePaddleBackend();
  void UseLiteBackend();
  void UseHorizonNPUBackend();
 };
 }  // namespace fastdeploy
--- a/fastdeploy/vision/classification/ppcls/model.cc
+++ b/fastdeploy/vision/classification/ppcls/model.cc
@@ -42,6 +42,7 @@ PaddleClasModel::PaddleClasModel(const std::string& model_file,
    valid_gpu_backends = {Backend::ORT, Backend::TRT};
    valid_rknpu_backends = {Backend::RKNPU2};
    valid_directml_backends = {Backend::ORT};
    valid_horizon_backends = {Backend::HORIZONNPU};
  }
  runtime_option = custom_option;
--- a/fastdeploy/vision/detection/ppdet/model.h
+++ b/fastdeploy/vision/detection/ppdet/model.h
@@ -99,6 +99,7 @@ class FASTDEPLOY_DECL PPYOLOE : public PPDetBase {
    valid_rknpu_backends = {Backend::RKNPU2};
    valid_ascend_backends = {Backend::LITE};
    valid_sophgonpu_backends = {Backend::SOPHGOTPU};
    valid_horizon_backends = {Backend::HORIZONNPU};
    initialized = Initialize();
  }