[Backend] Add Huawei Ascend NPU deploy using PaddleLite CANN. (#757)

* Add Huawei Ascend NPU deploy through PaddleLite CANN * Add NNAdapter interface for paddlelite * Modify Huawei Ascend Cmake * Update way for compiling Huawei Ascend NPU deployment * remove UseLiteBackend in UseCANN * Support compile python whlee * Change names of nnadapter API * Add nnadapter pybind and remove useless API * Support Python deployment on Huawei Ascend NPU * Add models suppor for ascend * Add PPOCR rec reszie for ascend * fix conflict for ascend * Rename CANN to Ascend * Rename CANN to Ascend * Improve ascend * fix ascend bug * improve ascend docs * improve ascend docs * improve ascend docs * Improve Ascend * Improve Ascend * Move ascend python demo * Imporve ascend * Improve ascend * Improve ascend * Improve ascend * Improve ascend * Imporve ascend * Imporve ascend * Improve ascend
2025-09-27 21:02:24 +08:00 · 2022-12-26 10:18:34 +08:00
parent 2d3d941372
commit d45382e3cc
42 changed files with 714 additions and 29 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -64,6 +64,7 @@ option(ENABLE_LITE_BACKEND "Whether to enable paddle lite backend." OFF)
 option(ENABLE_VISION "Whether to enable vision models usage." OFF)
 option(ENABLE_TEXT "Whether to enable text models usage." OFF)
 option(ENABLE_FLYCV "Whether to enable flycv to boost image preprocess." OFF)
+option(WITH_ASCEND "Whether to compile for Huawei Ascend deploy." OFF)
 option(WITH_TIMVX "Whether to compile for TIMVX deploy." OFF)
 option(WITH_XPU "Whether to compile for KunlunXin XPU deploy." OFF)
 option(WITH_TESTING "Whether to compile with unittest." OFF)
@@ -143,9 +144,12 @@ if (WITH_TIMVX)
  include(${PROJECT_SOURCE_DIR}/cmake/timvx.cmake)
 endif()

+if (WITH_ASCEND)
+  include(${PROJECT_SOURCE_DIR}/cmake/ascend.cmake)
+endif()
+
 if (WITH_XPU)
  if(NOT ENABLE_LITE_BACKEND)
-      message(WARNING "While compiling with -DWITH_XPU=ON, will force to set -DENABLE_LITE_BACKEND=ON")
      set(ENABLE_LITE_BACKEND ON)
  endif()
  if(NOT CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64")
@@ -156,7 +160,6 @@ if (WITH_XPU)
  endif()
 endif()

-
 if(ANDROID OR IOS)
  if(ENABLE_ORT_BACKEND)
    message(FATAL_ERROR "Not support ONNXRuntime backend for Andorid/IOS now. Please set ENABLE_ORT_BACKEND=OFF.")
@@ -617,6 +620,13 @@ else()
  )
 endif()

+if(WITH_ASCEND)
+  install(
+    FILES ${PROJECT_SOURCE_DIR}/scripts/ascend_init.sh
+    DESTINATION ${CMAKE_INSTALL_PREFIX}
+  )
+endif()
+
 ############################### Building: FastDeploy Python Wheel #############################
 if(BUILD_FASTDEPLOY_PYTHON)
  add_definitions(-DBUILD_FASTDEPLOY_PYTHON)
--- a/cmake/ascend.cmake
+++ b/cmake/ascend.cmake
@@ -0,0 +1,28 @@
+if(NOT CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
+  message(FATAL_ERROR "Huawei Ascend NPU is supported on Linux aarch64 platform for now.")
+endif()
+
+if(NOT ${ENABLE_LITE_BACKEND})
+  set(ENABLE_LITE_BACKEND ON)
+endif()
+
+if (NOT BUILD_FASTDEPLOY_PYTHON)
+  message(STATUS "Build FastDeploy Ascend C++ library.")
+  if(NOT PADDLELITE_URL)
+    set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/test/lite-linux_arm64_huawei_ascend_npu_1121.tgz")
+  endif()
+else ()
+  message(STATUS "Build FastDeploy Ascend Python library.")
+  if(NOT PADDLELITE_URL)
+    set(PADDLELITE_URL "https://bj.bcebos.com/fastdeploy/test/lite-linux_arm64_huawei_ascend_npu_python_1207.tgz")
+  endif()
+  execute_process(COMMAND sh -c "ls *.so*" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/paddlelite/lib
+  COMMAND sh -c "xargs ${PATCHELF_EXE} --set-rpath '$ORIGIN'" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/paddlelite/lib
+          RESULT_VARIABLE result
+                OUTPUT_VARIABLE curr_out
+                ERROR_VARIABLE  curr_out)
+  if(ret EQUAL "1")
+    message(FATAL_ERROR "Failed to patchelf Paddle Lite libraries when using Ascend.")
+  endif()
+  message(STATUS "result:${result} out:${curr_out}")
+endif()	
--- a/cmake/summary.cmake
+++ b/cmake/summary.cmake
@@ -37,6 +37,7 @@ function(fastdeploy_summary)
  message(STATUS "  ENABLE_POROS_BACKEND      : ${ENABLE_POROS_BACKEND}")
  message(STATUS "  ENABLE_TRT_BACKEND        : ${ENABLE_TRT_BACKEND}")
  message(STATUS "  ENABLE_OPENVINO_BACKEND   : ${ENABLE_OPENVINO_BACKEND}")
+  message(STATUS "  WITH_ASCEND               : ${WITH_ASCEND}")
  message(STATUS "  WITH_TIMVX                : ${WITH_TIMVX}")
  message(STATUS "  WITH_XPU                  : ${WITH_XPU}")
  if(ENABLE_ORT_BACKEND)
--- a/docs/README_CN.md
+++ b/docs/README_CN.md
@@ -12,6 +12,7 @@
 - [瑞芯微RV1126部署环境编译安装](cn/build_and_install/rv1126.md)
 - [瑞芯微RK3588部署环境编译安装](cn/build_and_install/rknpu2.md)
 - [晶晨A311D部署环境编译安装](cn/build_and_install/a311d.md)
+- [华为昇腾部署环境编译安装](cn/build_and_install/huawei_ascend.md)
 - [Jetson部署环境编译安装](cn/build_and_install/jetson.md)
 - [Android平台部署环境编译安装](cn/build_and_install/android.md)
 - [服务化部署镜像编译安装](../serving/docs/zh_CN/compile.md)
--- a/docs/README_EN.md
+++ b/docs/README_EN.md
@@ -0,0 +1,48 @@
+[简体中文](README_CN.md)| English
+
+# Tutorials
+
+## Install
+
+- [Install FastDeploy Prebuilt Libraries](en/build_and_install/download_prebuilt_libraries.md)
+- [Build and Install FastDeploy Library on GPU Platform](en/build_and_install/gpu.md)
+- [Build and Install FastDeploy Library on CPU Platform](en/build_and_install/cpu.md)
+- [Build and Install FastDeploy Library on IPU Platform](en/build_and_install/ipu.md)
+- [Build and Install FastDeploy Library on KunlunXin XPU Platform](en/build_and_install/xpu.md)
+- [Build and Install on RV1126 Platform](en/build_and_install/rv1126.md)
+- [Build and Install on RK3588 Platform](en/build_and_install/rknpu2.md)
+- [Build and Install on A311D Platform](en/build_and_install/a311d.md)
+- [Build and Install on Huawei Ascend Platform](en/build_and_install/huawei_ascend.md)
+- [Build and Install FastDeploy Library on  Nvidia Jetson Platform](en/build_and_install/jetson.md)
+- [Build and Install FastDeploy Library on Android Platform](en/build_and_install/android.md)
+- [Build and Install FastDeploy Serving Deployment Image](../serving/docs/EN/compile-en.md)
+
+## A Quick Start - Demos
+
+- [Python Deployment Demo](en/quick_start/models/python.md)
+- [C++ Deployment Demo](en/quick_start/models/cpp.md)
+- [A Quick Start on Runtime Python](en/quick_start/runtime/python.md)
+- [A Quick Start on Runtime C++](en/quick_start/runtime/cpp.md)
+
+## API
+
+- [Python API](https://baidu-paddle.github.io/fastdeploy-api/python/html/)
+- [C++ API](https://baidu-paddle.github.io/fastdeploy-api/cpp/html/)
+- [Android Java API](../java/android)
+
+## Performance Optimization
+
+- [Quantization Acceleration](en/quantize.md)
+
+## Frequent Q&As
+
+- [1. How to Change Inference Backends](en/faq/how_to_change_backend.md)
+- [2. How to Use FastDeploy C++ SDK on Windows Platform](en/faq/use_sdk_on_windows.md)
+- [3. How to Use FastDeploy C++ SDK on Android Platform](en/faq/use_cpp_sdk_on_android.md)
+- [4. Tricks of TensorRT](en/faq/tensorrt_tricks.md)
+- [5. How to Develop a New Model](en/faq/develop_a_new_model.md)
+
+## More FastDeploy Deployment Module
+
+- [Deployment AI Model as a Service](../serving)
+- [Benchmark Testing](../benchmark)
--- a/docs/cn/build_and_install/README.md
+++ b/docs/cn/build_and_install/README.md
@@ -15,6 +15,7 @@
 - [瑞芯微RK3588部署环境](rknpu2.md)
 - [晶晨A311D部署环境](a311d.md)
 - [昆仑芯XPU部署环境](xpu.md)
+- [华为昇腾部署环境](huawei_ascend.md)


 ## FastDeploy编译选项说明
@@ -25,6 +26,7 @@
 | ENABLE_PADDLE_BACKEND   | 默认OFF，是否编译集成Paddle Inference后端(CPU/GPU上推荐打开)                             |  
 | ENABLE_LITE_BACKEND     | 默认OFF，是否编译集成Paddle Lite后端(编译Android库时需要设置为ON)                          |
 | ENABLE_RKNPU2_BACKEND   | 默认OFF，是否编译集成RKNPU2后端(RK3588/RK3568/RK3566上推荐打开)                           |
+| WITH_ASCEND             | 默认OFF，当在华为昇腾NPU上部署时, 需要设置为ON                                              |
 | WITH_XPU                | 默认OFF，当在昆仑芯XPU上部署时，需设置为ON                                                |
 | WITH_TIMVX              | 默认OFF，需要在RV1126/RV1109/A311D上部署时，需设置为ON                                   |
 | ENABLE_TRT_BACKEND      | 默认OFF，是否编译集成TensorRT后端(GPU上推荐打开)                                          |
--- a/docs/cn/build_and_install/huawei_ascend.md
+++ b/docs/cn/build_and_install/huawei_ascend.md
@@ -0,0 +1,99 @@
+# 华为昇腾NPU 部署环境编译准备
+
+FastDeploy基于 Paddle-Lite 后端, 支持在华为昇腾NPU上进行部署推理。
+更多详细的信息请参考：[Paddle Lite部署示例](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/docs/demo_guides/huawei_ascend_npu.md)。
+
+本文档介绍如何在ARM Linux操作系统环境下, 编译基于 Paddle Lite 的 C++ 与 Python 的FastDeploy源码, 生成目标硬件为华为昇腾NPU的预测库。
+
+更多编译选项请参考[FastDeploy编译选项说明](./README.md)
+
+
+## 一.华为昇腾环境准备
+- Atlas 300I Pro 推理卡, 详情见[规格说明书](https://e.huawei.com/cn/products/cloud-computing-dc/atlas/atlas-300i-pro)
+- 安装Atlas 300I Pro 推理卡的驱动和固件包（Driver 和 Firmware)
+- 配套驱动和固件包下载：
+  - https://www.hiascend.com/hardware/firmware-drivers?tag=community（社区版）
+  - https://www.hiascend.com/hardware/firmware-drivers?tag=commercial（商业版）
+  - 驱动：Atlas-300i-pro-npu-driver_5.1.rc2_linux-aarch64.run
+  - 固件：Atlas-300i-pro-npu-firmware_5.1.rc2.run
+- 安装驱动和固件包：
+
+```shell
+# 增加可执行权限
+$ chmod +x *.run
+# 安装驱动和固件包
+$ ./Atlas-300i-pro-npu-driver_5.1.rc2_linux-aarch64.run --full
+$ ./Atlas-300i-pro-npu-firmware_5.1.rc2.run --full
+# 重启服务器
+$ reboot
+# 查看驱动信息，确认安装成功
+$ npu-smi info
+```
+- 更多系统和详细信息见[昇腾硬件产品文档](https://www.hiascend.com/document?tag=hardware)
+
+
+## 二.编译环境搭建
+
+### 宿主机环境需求  
+- os：ARM-Linux
+- gcc、g++、git、make、wget、python、pip、python-dev、patchelf
+- cmake（建议使用 3.10 或以上版本）
+
+### 使用Docker开发环境
+为了保证和FastDeploy验证过的编译环境一致，建议使用Docker开发环境进行配置.
+
+```shell
+# 下载 Dockerfile
+$ wget https://bj.bcebos.com/fastdeploy/test/Ascend_ubuntu18.04_aarch64_5.1.rc2.Dockerfile
+# 通过 Dockerfile 生成镜像
+$ docker build --network=host -f Ascend_ubuntu18.04_aarch64_5.1.rc2.Dockerfile -t Paddle Lite/ascend_aarch64:cann_5.1.rc2 .
+# 创建容器
+$ docker run -itd --privileged --name=ascend-aarch64 --net=host -v $PWD:/Work -w /Work --device=/dev/davinci0 --device=/dev/davinci_manager --device=/dev/hisi_hdc --device /dev/devmm_svm -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi  -v /usr/local/Ascend/driver/:/usr/local/Ascend/driver/ Paddle Lite/ascend_aarch64:cann_5.1.rc2 /bin/bash
+# 进入容器
+$ docker exec -it ascend-aarch64 /bin/bash
+# 确认容器的 Ascend 环境是否创建成功
+$ npu-smi info
+```
+以上步骤成功后，用户可以直接在docker内部开始FastDeploy的编译.
+
+注意:
+- 如果用户在Docker内想使用其他的CANN版本,请自行更新 Dockerfile 文件内的 CANN 下载路径, 同时更新相应的驱动和固件. 当前Dockerfile内默认为[CANN 5.1.RC2](https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%205.1.RC2/Ascend-cann-toolkit_5.1.RC2_linux-aarch64.run).
+- 如果用户不想使用docker，可以参考由Paddle Lite提供的[ARM Linux环境下的编译环境准备](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/docs/source_compile/arm_linux_compile_arm_linux.rst)自行配置编译环境, 之后再自行下载并安装相应的CANN软件包来完成配置.
+
+## 三.基于 Paddle Lite 的 C++ FastDeploy 库编译
+搭建好编译环境之后，编译命令如下：
+```bash
+# Download the latest source code
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy  
+mkdir build && cd build
+
+# CMake configuration with Ascend
+cmake -DWITH_ASCEND=ON  \
+      -DCMAKE_INSTALL_PREFIX=fastdeploy-ascend \
+      -DENABLE_VISION=ON \
+      ..
+
+# Build FastDeploy Ascend C++ SDK
+make -j8
+make install
+```  
+编译完成之后，会在当前的build目录下生成 fastdeploy-ascend 目录，表示基于 Paddle Lite 的 FastDeploy 库编译完成。
+
+## 四.基于 Paddle Lite 的 Python FastDeploy 库编译
+搭建好编译环境之后，编译命令如下：
+```bash
+# Download the latest source code
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/python
+export WITH_ASCEND=ON
+export ENABLE_VISION=ON
+
+python setup.py build
+python setup.py bdist_wheel
+
+#编译完成后,请用户自行安装当前目录的dist文件夹内的whl包.
+```
+
+- 华为昇腾NPU 上使用C++部署 PaddleClas 分类模型请参考：[PaddleClas 华为升腾NPU C++ 部署示例](../../../examples/vision/classification/paddleclas/ascend/cpp/README.md)
+- 华为昇腾NPU 上使用Python部署 PaddleClas 分类模型请参考：[PaddleClas 华为升腾NPU Python 部署示例](../../../examples/vision/classification/paddleclas/ascend/python/README.md)
--- a/docs/cn/faq/use_sdk_on_ascend.md
+++ b/docs/cn/faq/use_sdk_on_ascend.md
@@ -0,0 +1,21 @@
+[English](../../en/faq/use_sdk_on_linux.md) | 中文
+
+
+# Linux上使用C++在华为昇腾部署
+
+在完成部署示例的编译之后, 在运行程序之前, 由于我们需要借助华为昇腾工具包的功能, 所以还需要导入一些环境变量来初始化部署环境.
+用户可以直接使用如下脚本(位于编译好的FastDeploy库的目录下), 来初始化华为昇腾部署的环境.
+
+
+```
+# 我们默认的昇腾工具包的路径如下,
+# HUAWEI_ASCEND_TOOLKIT_HOME="/usr/local/Ascend/ascend-toolkit/latest"
+# HUAWEI_ASCEND_DRIVER_PATH="/usr/local/Ascend/driver"
+# 如果用户的安装目录与他不同, 需要自己先手动export.
+# export HUAWEI_ASCEND_TOOLKIT_HOME="Your_ascend_toolkit_path"
+# export HUAWEI_ASCEND_DRIVER_PATH="Your_ascend_driver_path"
+
+source fastdeploy-ascend/fastdeploy_init.sh
+```
+
+注意此命令执行后仅在当前的命令环境中生效（切换一个新的终端窗口，或关闭窗口重新打开后会无效），如若需要在系统中持续生效，可将这些环境变量加入到`~/.bashrc`文件中。
--- a/docs/en/build_and_install/README.md
+++ b/docs/en/build_and_install/README.md
@@ -28,9 +28,10 @@ English | [中文](../../cn/build_and_install/README.md)
 | ENABLE_OPENVINO_BACKEND | Default OFF，whether to enable OpenVINO backend(CPU) |
 | ENABLE_VISION | Default OFF，whether to enable vision models deployment module |
 | ENABLE_TEXT | Default OFF，whether to enable text models deployment module |
-| WITH_GPU | Default OFF, if build on GPU, this need to be ON |
-| WITH_XPU | Default OFF，if deploy on KunlunXin XPU，this need to be ON |
-| WITH_TIMVX | Default OFF，if deploy on RV1126/RV1109/A311D，this need to be ON |
+| WITH_GPU | Default OFF, if build on GPU, this needs to be ON |
+| WITH_XPU | Default OFF，if deploy on KunlunXin XPU，this needs to be ON |
+| WITH_TIMVX | Default OFF，if deploy on RV1126/RV1109/A311D，this needs to be ON |
+| WITH_ASCEND | Default OFF，if deploy on Huawei Ascend，this needs to be ON |
 | CUDA_DIRECTORY | Default /usr/local/cuda, if build on GPU, this defines the path of CUDA(>=11.2) |
 | TRT_DIRECTORY | If build with ENABLE_TRT_BACKEND=ON, this defines the path of TensorRT(>=8.4) |
 | ORT_DIRECTORY | [Optional] If build with ENABLE_ORT_BACKEND=ON, this flag defines the path of ONNX Runtime, but if this flag is not set, it will download ONNX Runtime library automatically |
--- a/docs/en/build_and_install/huawei_ascend.md
+++ b/docs/en/build_and_install/huawei_ascend.md
@@ -0,0 +1,98 @@
+# How to build Huawei Ascend Deployment Environment
+
+Based on the Paddle-Lite backend, FastDeploy supports model inference on Huawei's Ascend NPU.
+For more detailed information, please refer to: [Paddle Lite Deployment Example](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/docs/demo_guides/huawei_ascend_npu.md).
+
+This document describes how to compile C++ and Python FastDeploy source code under ARM Linux OS environment to generate prediction libraries for Huawei Sunrise NPU as the target hardware.
+
+For more compilation options, please refer to the [FastDeploy compilation options description](./README.md)
+
+##  Huawei Ascend Environment Preparation
+- Atlas 300I Pro, see detailes at [Spec Sheet](https://e.huawei.com/cn/products/cloud-computing-dc/atlas/atlas-300i-pro)
+- Install the driver and firmware package (Driver and Firmware) for the Atlas 300I Pro
+- Download the matching driver and firmware package at:
+  - https://www.hiascend.com/hardware/firmware-drivers?tag=community（Community Edition）
+  - https://www.hiascend.com/hardware/firmware-drivers?tag=commercial（Commercial version）
+  - driver：Atlas-300i-pro-npu-driver_5.1.rc2_linux-aarch64.run
+  - firmware：Atlas-300i-pro-npu-firmware_5.1.rc2.run
+- Installing drivers and firmware packages:
+
+```shell
+$ chmod +x *.run
+
+$ ./Atlas-300i-pro-npu-driver_5.1.rc2_linux-aarch64.run --full
+$ ./Atlas-300i-pro-npu-firmware_5.1.rc2.run --full
+
+$ reboot
+# Check the driver information to confirm successful installation
+$ npu-smi info
+```
+- More system and detailed information is available in the [Ascend Hardware Product Documentation](https://www.hiascend.com/document?tag=hardware)
+
+## Compilation environment construction
+
+### Host environment requirements  
+- os: ARM-Linux
+- gcc, g++, git, make, wget, python, pip, python-dev, patchelf
+- cmake (version 3.10 or above recommended)
+
+### Using Docker development environment
+In order to ensure consistency with the FastDeploy verified build environment, it is recommended to use the Docker development environment for configuration.
+
+```shell
+# Download Dockerfile
+$ wget https://bj.bcebos.com/fastdeploy/test/Ascend_ubuntu18.04_aarch64_5.1.rc2.Dockerfile
+# Create docker images
+$ docker build --network=host -f Ascend_ubuntu18.04_aarch64_5.1.rc2.Dockerfile -t Paddle Lite/ascend_aarch64:cann_5.1.rc2 .
+# Create container
+$ docker run -itd --privileged --name=ascend-aarch64 --net=host -v $PWD:/Work -w /Work --device=/dev/davinci0 --device=/dev/davinci_manager --device=/dev/hisi_hdc --device /dev/devmm_svm -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi  -v /usr/local/Ascend/driver/:/usr/local/Ascend/driver/ Paddle Lite/ascend_aarch64:cann_5.1.rc2 /bin/bash
+# Enter the container
+$ docker exec -it ascend-aarch64 /bin/bash
+# Verify that the Ascend environment for the container is created successfully
+$ npu-smi info
+```
+Once the above steps are successful, the user can start compiling FastDeploy directly from within docker.
+
+Note:
+- If you want to use another CANN version in Docker, please update the CANN download path in the Dockerfile file, and update the corresponding driver and firmware. The current default in Dockerfile is [CANN 5.1.RC2](https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%205.1.RC2/Ascend-cann-toolkit_5.1.RC2_linux-aarch64.run).
+- If users do not want to use docker, you can refer to [Compile Environment Preparation for ARM Linux Environments](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/docs/source_compile/arm_linux_compile_arm_linux.rst) provided by Paddle Lite and configure your own compilation environment, and then download and install the proper CANN packages to complete the configuration.
+
+## C++ FastDeploy library compilation based on Paddle Lite
+After setting up the compilation environment, the compilation command is as follows.
+
+```bash
+# Download the latest source code
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy  
+mkdir build && cd build
+
+# CMake configuration with Ascend
+cmake -DWITH_ASCEND=ON  \
+      -DCMAKE_INSTALL_PREFIX=fastdeploy-ascend \
+      -DENABLE_VISION=ON \
+      ..
+
+# Build FastDeploy Ascend C++ SDK
+make -j8
+make install
+```  
+When the compilation is complete, the fastdeploy-ascend directory is created in the current build directory, indicating that the FastDeploy library has been compiled.
+
+## Compiling Python FastDeploy Libraries Based on Paddle Lite
+
+```bash
+# Download the latest source code
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy/python
+export WITH_ASCEND=ON
+export ENABLE_VISION=ON
+
+python setup.py build
+python setup.py bdist_wheel
+
+#After the compilation is complete, please install the whl package in the dist folder of the current directory.
+```
+
+Deploying PaddleClas Classification Model on Huawei Ascend NPU using C++ please refer to: [PaddleClas Huawei Ascend NPU C++ Deployment Example](../../../examples/vision/classification/paddleclas/ascend/cpp/README.md)
+
+Deploying PaddleClas classification model on Huawei Ascend NPU using Python please refer to: [PaddleClas Huawei Ascend NPU Python Deployment Example](../../../examples/vision/classification/paddleclas/ascend/python/README.md)
--- a/docs/en/faq/use_sdk_on_ascend.md
+++ b/docs/en/faq/use_sdk_on_ascend.md
@@ -0,0 +1,21 @@
+[简体中文](../../cn/faq/use_sdk_on_linux.md) | English
+
+
+# # Linux deployment with C++ on Huawei Ascend
+
+After the deployment example is compiled, we need to import some environment variables to initialize the deployment environment before running the program, because we need to use the Huawei Ascend toolkit.
+Users can use the following script (located in the directory of the compiled FastDeploy library) to initialize the Huawei Ascend deployment environment.
+
+
+```
+# The path to our default Ascend Toolkit is as follows,
+# HUAWEI_ASCEND_TOOLKIT_HOME="/usr/local/Ascend/ascend-toolkit/latest"
+# HUAWEI_ASCEND_DRIVER_PATH="/usr/local/Ascend/driver"
+# If the user's installation directory is different from this, you need to export it manually first.
+# export HUAWEI_ASCEND_TOOLKIT_HOME="Your_ascend_toolkit_path"
+# export HUAWEI_ASCEND_DRIVER_PATH="Your_ascend_driver_path"
+
+source fastdeploy-ascend/fastdeploy_init.sh
+```
+
+Note that this command only takes effect in the current command environment after execution (switching to a new terminal window, or closing the window and reopening it will not work), if you need to keep it in effect on the system, add these environment variables to the `~/.bashrc` file.
--- a/examples/vision/classification/paddleclas/cpp/README.md
+++ b/examples/vision/classification/paddleclas/cpp/README.md
@@ -34,11 +34,16 @@ wget https://gitee.com/paddlepaddle/PaddleClas/raw/release/2.4/deploy/images/Ima
 ./infer_demo ResNet50_vd_infer ILSVRC2012_val_00000010.jpeg 3
 # KunlunXin XPU推理
 ./infer_demo ResNet50_vd_infer ILSVRC2012_val_00000010.jpeg 4
+# Huawei Ascend NPU推理
+./infer_demo ResNet50_vd_infer ILSVRC2012_val_00000010.jpeg 5
 ```

 以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:  
 - [如何在Windows中使用FastDeploy C++ SDK](../../../../../docs/cn/faq/use_sdk_on_windows.md)

+如果用户使用华为昇腾NPU部署, 请参考以下方式在部署前初始化部署环境:
+- [如何使用华为昇腾NPU部署](../../../../../docs/cn/faq/use_sdk_on_ascend.md)
+
 ## PaddleClas C++接口

 ### PaddleClas类
--- a/examples/vision/classification/paddleclas/cpp/infer.cc
+++ b/examples/vision/classification/paddleclas/cpp/infer.cc
@@ -148,6 +148,31 @@ void TrtInfer(const std::string& model_dir, const std::string& image_file) {
  std::cout << res.Str() << std::endl;
 }

+void AscendInfer(const std::string& model_dir, const std::string& image_file) {
+  auto model_file = model_dir + sep + "inference.pdmodel";
+  auto params_file = model_dir + sep + "inference.pdiparams";
+  auto config_file = model_dir + sep + "inference_cls.yaml";
+  
+  auto option = fastdeploy::RuntimeOption();
+  option.UseAscend();
+
+  auto model = fastdeploy::vision::classification::PaddleClasModel(
+      model_file, params_file, config_file, option);
+
+  assert(model.Initialized());
+
+  auto im = cv::imread(image_file);
+
+  fastdeploy::vision::ClassifyResult res;
+  if (!model.Predict(&im, &res)) {
+    std::cerr << "Failed to predict." << std::endl;
+    return;
+  }
+
+  std::cout << res.Str() << std::endl;
+}
+
+
 int main(int argc, char* argv[]) {
  if (argc < 4) {
    std::cout << "Usage: infer_demo path/to/model path/to/image run_option, "
@@ -169,6 +194,8 @@ int main(int argc, char* argv[]) {
    IpuInfer(argv[1], argv[2]);
  } else if (std::atoi(argv[3]) == 4) {
    XpuInfer(argv[1], argv[2]);
+  } else if (std::atoi(argv[3]) == 5) {
+    AscendInfer(argv[1], argv[2]);
  }
  return 0;
 }
--- a/examples/vision/classification/paddleclas/python/README.md
+++ b/examples/vision/classification/paddleclas/python/README.md
@@ -27,6 +27,8 @@ python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg -
 python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg --device ipu --topk 1
 # XPU推理
 python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg --device xpu --topk 1
+# 华为昇腾NPU推理
+python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg --device ascend --topk 1
 ```

 运行完成后返回结果如下所示
--- a/examples/vision/classification/paddleclas/python/infer.py
+++ b/examples/vision/classification/paddleclas/python/infer.py
@@ -17,7 +17,8 @@ def parse_arguments():
        "--device",
        type=str,
        default='cpu',
-        help="Type of inference device, support 'cpu' or 'gpu' or 'ipu'.")
+        help="Type of inference device, support 'cpu' or 'gpu' or 'ipu' or 'xpu' or 'ascend' ."
+    )
    parser.add_argument(
        "--use_trt",
        type=ast.literal_eval,
@@ -38,6 +39,9 @@ def build_option(args):
    if args.device.lower() == "xpu":
        option.use_xpu()

+    if args.device.lower() == "ascend":
+        option.use_ascend()
+
    if args.use_trt:
        option.use_trt_backend()
    return option
--- a/fastdeploy/backends/lite/lite_backend.cc
+++ b/fastdeploy/backends/lite/lite_backend.cc
@@ -91,6 +91,42 @@ void LiteBackend::BuildOption(const LiteBackendOption& option) {
        paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)});
  } 
  
+  if(option_.enable_ascend){
+    
+    if(option_.nnadapter_device_names.empty()){
+      config_.set_nnadapter_device_names({"huawei_ascend_npu"});
+    } else {
+      config_.set_nnadapter_device_names(option_.nnadapter_device_names);
+    }
+
+    if(!option_.nnadapter_context_properties.empty()){
+      config_.set_nnadapter_context_properties(option_.nnadapter_context_properties);
+    }
+
+    if(!option_.nnadapter_model_cache_dir.empty()){
+      config_.set_nnadapter_model_cache_dir(option_.nnadapter_model_cache_dir);
+    }
+
+    if(!option_.nnadapter_mixed_precision_quantization_config_path.empty()){
+      config_.set_nnadapter_mixed_precision_quantization_config_path(
+        option_.nnadapter_mixed_precision_quantization_config_path
+      );
+    }
+
+    if(!option_.nnadapter_subgraph_partition_config_path.empty()){
+      config_.set_nnadapter_subgraph_partition_config_path(
+        option_.nnadapter_subgraph_partition_config_path
+      );
+    }
+
+    valid_places.push_back(
+        paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)});
+    valid_places.push_back(
+        paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)});
+    valid_places.push_back(
+        paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)}); 
+  }
+  
  if(option_.enable_xpu){
    valid_places.push_back(
      paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFloat)});
--- a/fastdeploy/backends/lite/lite_backend.h
+++ b/fastdeploy/backends/lite/lite_backend.h
@@ -44,7 +44,15 @@ struct LiteBackendOption {
  // TODO(qiuyanjun): support more options for lite backend.
  // Such as fp16, different device target (kARM/kXPU/kNPU/...)
  std::string nnadapter_subgraph_partition_config_path = "";
+  std::string nnadapter_subgraph_partition_config_buffer = "";
+  std::string nnadapter_context_properties = "";
+  std::string nnadapter_model_cache_dir = "";
+  std::string nnadapter_mixed_precision_quantization_config_path = "";
+  std::map<std::string, std::vector<std::vector<int64_t>>>
+    nnadapter_dynamic_shape_info = {{"", {{0}}}};
+  std::vector<std::string> nnadapter_device_names = {};
  bool enable_timvx = false;
+  bool enable_ascend = false;
  bool enable_xpu = false;
  int device_id = 0;
  int xpu_l3_workspace_size = 0xfffc00;
--- a/fastdeploy/core/fd_type.cc
+++ b/fastdeploy/core/fd_type.cc
@@ -62,6 +62,9 @@ std::string Str(const Device& d) {
    case Device::TIMVX:
      out = "Device::TIMVX";
      break;
+    case Device::ASCEND:
+      out = "Device::ASCEND";
+      break;
    case Device::XPU:
      out = "Device::XPU";
      break;
@@ -88,6 +91,9 @@ std::ostream& operator<<(std::ostream& out,const Device& d){
  case Device::XPU:
    out << "Device::XPU";
    break;
+  case Device::ASCEND:
+    out << "Device::ASCEND";
+    break;
  default:
    out << "Device::UNKOWN";
  }
--- a/fastdeploy/core/fd_type.h
+++ b/fastdeploy/core/fd_type.h
@@ -22,7 +22,7 @@

 namespace fastdeploy {

-enum FASTDEPLOY_DECL Device { CPU, GPU, RKNPU, IPU, TIMVX, XPU};
+enum FASTDEPLOY_DECL Device { CPU, GPU, RKNPU, IPU, TIMVX, XPU, ASCEND};

 FASTDEPLOY_DECL std::string Str(const Device& d);

--- a/fastdeploy/fastdeploy_model.cc
+++ b/fastdeploy/fastdeploy_model.cc
@@ -51,6 +51,7 @@ bool FastDeployModel::InitRuntimeWithSpecifiedBackend() {
  bool use_ipu = (runtime_option.device == Device::IPU);
  bool use_rknpu = (runtime_option.device == Device::RKNPU);
  bool use_timvx = (runtime_option.device == Device::TIMVX);
+  bool use_ascend = (runtime_option.device == Device::ASCEND); 
  bool use_xpu = (runtime_option.device == Device::XPU);

  if (use_gpu) {
@@ -68,6 +69,11 @@ bool FastDeployModel::InitRuntimeWithSpecifiedBackend() {
      FDERROR << "The valid timvx backends of model " << ModelName() << " are " << Str(valid_timvx_backends) << ", " << runtime_option.backend << " is not supported." << std::endl;
      return false;
    }
+  } else if (use_ascend) {
+    if (!IsSupported(valid_ascend_backends, runtime_option.backend)) {
+      FDERROR << "The valid ascend backends of model " << ModelName() << " are " << Str(valid_ascend_backends) << ", " << runtime_option.backend << " is not supported." << std::endl;
+      return false;
+    }
  } else if (use_xpu) {
    if (!IsSupported(valid_xpu_backends, runtime_option.backend)) {
      FDERROR << "The valid xpu backends of model " << ModelName() << " are " << Str(valid_xpu_backends) << ", " << runtime_option.backend << " is not supported." << std::endl;
@@ -108,6 +114,8 @@ bool FastDeployModel::InitRuntimeWithSpecifiedDevice() {
    return CreateRKNPUBackend();
  } else if (runtime_option.device == Device::TIMVX) {
    return CreateTimVXBackend();
+  } else if (runtime_option.device == Device::ASCEND) {
+    return CreateASCENDBackend();
  } else if (runtime_option.device == Device::XPU) {
    return CreateXPUBackend();
  } else if (runtime_option.device == Device::IPU) {
@@ -119,7 +127,7 @@ bool FastDeployModel::InitRuntimeWithSpecifiedDevice() {
    return false;
 #endif
  }
-  FDERROR << "Only support CPU/GPU/IPU/RKNPU/TIMVX/XPU now." << std::endl;
+  FDERROR << "Only support CPU/GPU/IPU/RKNPU/TIMVX/XPU/ASCEND now." << std::endl;
  return false;
 }

@@ -256,6 +264,31 @@ bool FastDeployModel::CreateXPUBackend() {
  return false;
 }

+
+bool FastDeployModel::CreateASCENDBackend() {
+  if (valid_ascend_backends.size() == 0) {
+    FDERROR << "There's no valid ascend backends for model: " << ModelName()
+            << std::endl;
+    return false;
+  }
+
+  for (size_t i = 0; i < valid_ascend_backends.size(); ++i) {
+    if (!IsBackendAvailable(valid_ascend_backends[i])) {
+      continue;
+    }
+    runtime_option.backend = valid_ascend_backends[i];
+    runtime_ = std::unique_ptr<Runtime>(new Runtime());
+    if (!runtime_->Init(runtime_option)) {
+      return false;
+    }
+    runtime_initialized_ = true;
+    return true;
+  }
+  FDERROR << "Found no valid backend for model: " << ModelName() << std::endl;
+  return false;
+}
+
+
 bool FastDeployModel::CreateIpuBackend() {
  if (valid_ipu_backends.size() == 0) {
    FDERROR << "There's no valid ipu backends for model: " << ModelName()
--- a/fastdeploy/fastdeploy_model.h
+++ b/fastdeploy/fastdeploy_model.h
@@ -45,6 +45,9 @@ class FASTDEPLOY_DECL FastDeployModel {
  /** Model's valid timvx backends. This member defined all the timvx backends have successfully tested for the model
   */
  std::vector<Backend> valid_timvx_backends = {};
+  /** Model's valid ascend backends. This member defined all the cann backends have successfully tested for the model
+   */
+  std::vector<Backend> valid_ascend_backends = {};
  /** Model's valid KunlunXin xpu backends. This member defined all the KunlunXin xpu backends have successfully tested for the model
   */
  std::vector<Backend> valid_xpu_backends = {};
@@ -147,6 +150,7 @@ class FASTDEPLOY_DECL FastDeployModel {
  bool CreateRKNPUBackend();
  bool CreateTimVXBackend();
  bool CreateXPUBackend();
+  bool CreateASCENDBackend();

  std::shared_ptr<Runtime> runtime_;
  bool runtime_initialized_ = false;
--- a/fastdeploy/pybind/runtime.cc
+++ b/fastdeploy/pybind/runtime.cc
@@ -24,6 +24,7 @@ void BindRuntime(pybind11::module& m) {
      .def("use_gpu", &RuntimeOption::UseGpu)
      .def("use_cpu", &RuntimeOption::UseCpu)
      .def("use_rknpu2", &RuntimeOption::UseRKNPU2)
+      .def("use_ascend", &RuntimeOption::UseAscend)
      .def("use_xpu", &RuntimeOption::UseXpu)
      .def("set_external_stream", &RuntimeOption::SetExternalStream)
      .def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum)
@@ -34,6 +35,13 @@ void BindRuntime(pybind11::module& m) {
      .def("use_trt_backend", &RuntimeOption::UseTrtBackend)
      .def("use_openvino_backend", &RuntimeOption::UseOpenVINOBackend)
      .def("use_lite_backend", &RuntimeOption::UseLiteBackend)
+      .def("set_lite_device_names", &RuntimeOption::SetLiteDeviceNames)
+      .def("set_lite_context_properties", &RuntimeOption::SetLiteContextProperties)
+      .def("set_lite_model_cache_dir", &RuntimeOption::SetLiteModelCacheDir)
+      .def("set_lite_dynamic_shape_info", &RuntimeOption::SetLiteDynamicShapeInfo)
+      .def("set_lite_subgraph_partition_path", &RuntimeOption::SetLiteSubgraphPartitionPath)
+      .def("set_lite_mixed_precision_quantization_config_path", &RuntimeOption::SetLiteMixedPrecisionQuantizationConfigPath)
+      .def("set_lite_subgraph_partition_config_buffer", &RuntimeOption::SetLiteSubgraphPartitionConfigBuffer)
      .def("set_paddle_mkldnn", &RuntimeOption::SetPaddleMKLDNN)
      .def("set_openvino_device", &RuntimeOption::SetOpenVINODevice)
      .def("set_openvino_shape_info", &RuntimeOption::SetOpenVINOShapeInfo)
--- a/fastdeploy/runtime.cc
+++ b/fastdeploy/runtime.cc
@@ -283,6 +283,11 @@ void RuntimeOption::UseXpu(int xpu_id,
  device = Device::XPU;
 }

+void RuntimeOption::UseAscend(){
+  enable_ascend = true;
+  device = Device::ASCEND;
+}
+
 void RuntimeOption::SetExternalStream(void* external_stream) {
  external_stream_ = external_stream;
 }
@@ -408,6 +413,36 @@ void RuntimeOption::SetLiteSubgraphPartitionPath(
      nnadapter_subgraph_partition_config_path;
 }

+void RuntimeOption::SetLiteSubgraphPartitionConfigBuffer(
+      const std::string& nnadapter_subgraph_partition_config_buffer){
+  lite_nnadapter_subgraph_partition_config_buffer = nnadapter_subgraph_partition_config_buffer;
+}
+
+void RuntimeOption::SetLiteDeviceNames(const std::vector<std::string>& nnadapter_device_names){
+  lite_nnadapter_device_names = nnadapter_device_names; 
+}
+
+void RuntimeOption::SetLiteContextProperties(const std::string& nnadapter_context_properties){
+  lite_nnadapter_context_properties = nnadapter_context_properties; 
+}
+
+void RuntimeOption::SetLiteModelCacheDir(const std::string& nnadapter_model_cache_dir){
+  lite_nnadapter_model_cache_dir = nnadapter_model_cache_dir;
+}
+
+
+void RuntimeOption::SetLiteDynamicShapeInfo(
+      const std::map<std::string, std::vector<std::vector<int64_t>>>&
+          nnadapter_dynamic_shape_info){
+  lite_nnadapter_dynamic_shape_info = nnadapter_dynamic_shape_info; 
+}
+
+void RuntimeOption::SetLiteMixedPrecisionQuantizationConfigPath(
+      const std::string& nnadapter_mixed_precision_quantization_config_path){
+        lite_nnadapter_mixed_precision_quantization_config_path = nnadapter_mixed_precision_quantization_config_path;
+}
+
+
 void RuntimeOption::SetTrtInputShape(const std::string& input_name,
                                     const std::vector<int32_t>& min_shape,
                                     const std::vector<int32_t>& opt_shape,
@@ -576,7 +611,7 @@ bool Runtime::Init(const RuntimeOption& _option) {
    FDINFO << "Runtime initialized with Backend::OPENVINO in "
           << Str(option.device) << "." << std::endl;
  } else if (option.backend == Backend::LITE) {
-    FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX || option.device == Device::XPU,
+    FDASSERT(option.device == Device::CPU || option.device == Device::TIMVX || option.device == Device::XPU || option.device == Device::ASCEND,
             "Backend::LITE only supports Device::CPU/Device::TIMVX/Device::XPU.");
    CreateLiteBackend();
    FDINFO << "Runtime initialized with Backend::LITE in " << Str(option.device)
@@ -838,9 +873,15 @@ void Runtime::CreateLiteBackend() {
  lite_option.enable_fp16 = option.lite_enable_fp16;
  lite_option.power_mode = static_cast<int>(option.lite_power_mode);
  lite_option.optimized_model_dir = option.lite_optimized_model_dir;
-  lite_option.nnadapter_subgraph_partition_config_path =
-      option.lite_nnadapter_subgraph_partition_config_path;
+  lite_option.nnadapter_subgraph_partition_config_path = option.lite_nnadapter_subgraph_partition_config_path;
+  lite_option.nnadapter_subgraph_partition_config_buffer = option.lite_nnadapter_subgraph_partition_config_buffer;
+  lite_option.nnadapter_device_names = option.lite_nnadapter_device_names;
+  lite_option.nnadapter_context_properties = option.lite_nnadapter_context_properties;
+  lite_option.nnadapter_model_cache_dir = option.lite_nnadapter_model_cache_dir;
+  lite_option.nnadapter_dynamic_shape_info = option.lite_nnadapter_dynamic_shape_info;
+  lite_option.nnadapter_mixed_precision_quantization_config_path = option.lite_nnadapter_mixed_precision_quantization_config_path;
  lite_option.enable_timvx = option.enable_timvx;
+  lite_option.enable_ascend = option.enable_ascend;
  lite_option.enable_xpu = option.enable_xpu;
  lite_option.device_id  = option.device_id;
  lite_option.xpu_l3_workspace_size  = option.xpu_l3_workspace_size;
--- a/fastdeploy/runtime.h
+++ b/fastdeploy/runtime.h
@@ -116,6 +116,9 @@ struct FASTDEPLOY_DECL RuntimeOption {
  /// Use TimVX to inference
  void UseTimVX();

+  /// Use Huawei Ascend to inference
+  void UseAscend();
+
  ///
  /// \brief Turn on XPU.
  ///
@@ -235,11 +238,48 @@ struct FASTDEPLOY_DECL RuntimeOption {
  void SetLiteOptimizedModelDir(const std::string& optimized_model_dir);

  /**
-   * @brief Set nnadapter subgraph partition path for Paddle Lite backend.
+   * @brief Set subgraph partition path for Paddle Lite backend.
   */
  void SetLiteSubgraphPartitionPath(
      const std::string& nnadapter_subgraph_partition_config_path);

+  /**
+   * @brief Set subgraph partition path for Paddle Lite backend.
+   */
+  void SetLiteSubgraphPartitionConfigBuffer(
+      const std::string& nnadapter_subgraph_partition_config_buffer);
+
+  /**
+   * @brief Set device name for Paddle Lite backend.
+   */
+  void SetLiteDeviceNames(
+      const std::vector<std::string>& nnadapter_device_names);
+
+  /**
+   * @brief Set context properties for Paddle Lite backend.
+   */
+  void  SetLiteContextProperties(
+      const std::string& nnadapter_context_properties);
+
+  /**
+   * @brief Set model cache dir for Paddle Lite backend.
+   */
+  void SetLiteModelCacheDir(
+      const std::string& nnadapter_model_cache_dir);
+
+  /**
+   * @brief Set dynamic shape info for Paddle Lite backend.
+   */
+  void SetLiteDynamicShapeInfo(
+      const std::map<std::string, std::vector<std::vector<int64_t>>>&
+          nnadapter_dynamic_shape_info);
+
+  /**
+   * @brief Set mixed precision quantization config path for Paddle Lite backend.
+   */
+  void SetLiteMixedPrecisionQuantizationConfigPath(
+      const std::string& nnadapter_mixed_precision_quantization_config_path);
+
  /**
   * @brief enable half precision while use paddle lite backend
   */
@@ -398,7 +438,17 @@ struct FASTDEPLOY_DECL RuntimeOption {
  // optimized model dir for CxxConfig
  std::string lite_optimized_model_dir = "";
  std::string lite_nnadapter_subgraph_partition_config_path = "";
+  // and other nnadapter settings for CxxConfig
+  std::string lite_nnadapter_subgraph_partition_config_buffer = "";
+  std::string lite_nnadapter_context_properties = "";
+  std::string lite_nnadapter_model_cache_dir = "";
+  std::string lite_nnadapter_mixed_precision_quantization_config_path = "";
+  std::map<std::string, std::vector<std::vector<int64_t>>>
+    lite_nnadapter_dynamic_shape_info = {{"", {{0}}}};
+  std::vector<std::string> lite_nnadapter_device_names = {};
+
  bool enable_timvx = false;
+  bool enable_ascend = false;
  bool enable_xpu = false;

  // ======Only for Trt Backend=======
--- a/fastdeploy/vision/classification/ppcls/model.cc
+++ b/fastdeploy/vision/classification/ppcls/model.cc
@@ -29,6 +29,7 @@ PaddleClasModel::PaddleClasModel(const std::string& model_file,
                          Backend::LITE};
    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
    valid_timvx_backends = {Backend::LITE};
+    valid_ascend_backends = {Backend::LITE};
    valid_xpu_backends = {Backend::LITE};
    valid_ipu_backends = {Backend::PDINFER};
  } else if (model_format == ModelFormat::ONNX) {
--- a/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
+++ b/fastdeploy/vision/detection/contrib/yolov5/yolov5.cc
@@ -29,6 +29,7 @@ YOLOv5::YOLOv5(const std::string& model_file, const std::string& params_file,
    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
    valid_xpu_backends = {Backend::LITE};
    valid_timvx_backends = {Backend::LITE};
+    valid_ascend_backends = {Backend::LITE};
  }
  runtime_option = custom_option;
  runtime_option.model_format = model_format;
--- a/fastdeploy/vision/detection/contrib/yolov6.cc
+++ b/fastdeploy/vision/detection/contrib/yolov6.cc
@@ -72,6 +72,7 @@ YOLOv6::YOLOv6(const std::string& model_file, const std::string& params_file,
  } else {
    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+    valid_ascend_backends = {Backend::LITE};
  }
  runtime_option = custom_option;
  runtime_option.model_format = model_format;
--- a/fastdeploy/vision/detection/contrib/yolov7/yolov7.cc
+++ b/fastdeploy/vision/detection/contrib/yolov7/yolov7.cc
@@ -27,6 +27,7 @@ YOLOv7::YOLOv7(const std::string& model_file, const std::string& params_file,
  } else {
    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::LITE};
    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+    valid_ascend_backends = {Backend::LITE};
  }
  runtime_option = custom_option;
  runtime_option.model_format = model_format;
--- a/fastdeploy/vision/detection/ppdet/model.h
+++ b/fastdeploy/vision/detection/ppdet/model.h
@@ -39,6 +39,7 @@ class FASTDEPLOY_DECL PicoDet : public PPDetBase {
                        Backend::PDINFER, Backend::LITE};
    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
    valid_rknpu_backends = {Backend::RKNPU2};
+    valid_ascend_backends = {Backend::LITE};
    initialized = Initialize();
  }

@@ -65,6 +66,7 @@ class FASTDEPLOY_DECL PPYOLOE : public PPDetBase {
                        Backend::PDINFER, Backend::LITE};
    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
    valid_timvx_backends = {Backend::LITE};
+    valid_ascend_backends = {Backend::LITE};
    initialized = Initialize();
  }

@@ -89,6 +91,7 @@ class FASTDEPLOY_DECL PPYOLO : public PPDetBase {
                model_format) {
    valid_cpu_backends = {Backend::PDINFER, Backend::LITE};
    valid_gpu_backends = {Backend::PDINFER};
+    valid_ascend_backends = {Backend::LITE};
    initialized = Initialize();
  }

@@ -106,6 +109,7 @@ class FASTDEPLOY_DECL YOLOv3 : public PPDetBase {
    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER,
                        Backend::LITE};
    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
+    valid_ascend_backends = {Backend::LITE};
    initialized = Initialize();
  }

@@ -123,6 +127,7 @@ class FASTDEPLOY_DECL PaddleYOLOX : public PPDetBase {
    valid_cpu_backends = {Backend::OPENVINO, Backend::ORT, Backend::PDINFER,
                        Backend::LITE};
    valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
+    valid_ascend_backends = {Backend::LITE};
    initialized = Initialize();
  }

@@ -171,6 +176,7 @@ class FASTDEPLOY_DECL SSD : public PPDetBase {
                model_format) {
    valid_cpu_backends = {Backend::PDINFER, Backend::LITE};
    valid_gpu_backends = {Backend::PDINFER};
+    valid_ascend_backends = {Backend::LITE};
    initialized = Initialize();
  }

--- a/fastdeploy/vision/ocr/ppocr/classifier.cc
+++ b/fastdeploy/vision/ocr/ppocr/classifier.cc
@@ -32,6 +32,7 @@ Classifier::Classifier(const std::string& model_file,
  } else {
    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, Backend::LITE};
    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+    valid_ascend_backends = {Backend::LITE};
  }
  runtime_option = custom_option;
  runtime_option.model_format = model_format;
--- a/fastdeploy/vision/ocr/ppocr/dbdetector.cc
+++ b/fastdeploy/vision/ocr/ppocr/dbdetector.cc
@@ -32,6 +32,7 @@ DBDetector::DBDetector(const std::string& model_file,
  } else {
    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, Backend::LITE};
    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+    valid_ascend_backends = {Backend::LITE};
  }

  runtime_option = custom_option;
--- a/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc
+++ b/fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc
@@ -172,6 +172,7 @@ void BindPPOCRModel(pybind11::module& m) {
    .def_readwrite("mean", &vision::ocr::RecognizerPreprocessor::mean_)
    .def_readwrite("scale", &vision::ocr::RecognizerPreprocessor::scale_)
    .def_readwrite("is_scale", &vision::ocr::RecognizerPreprocessor::is_scale_)
+    .def_readwrite("static_shape", &vision::ocr::RecognizerPreprocessor::static_shape_) 
    .def("run", [](vision::ocr::RecognizerPreprocessor& self, std::vector<pybind11::array>& im_list) {
      std::vector<vision::FDMat> images;
      for (size_t i = 0; i < im_list.size(); ++i) {
--- a/fastdeploy/vision/ocr/ppocr/ppocr_v2.h
+++ b/fastdeploy/vision/ocr/ppocr/ppocr_v2.h
@@ -59,7 +59,8 @@ class FASTDEPLOY_DECL PPOCRv2 : public FastDeployModel {
   * \return true if the prediction successed, otherwise false.
   */
  virtual bool Predict(cv::Mat* img, fastdeploy::vision::OCRResult* result);
-  virtual bool Predict(const cv::Mat& img, fastdeploy::vision::OCRResult* result);
+  virtual bool Predict(const cv::Mat& img,
+                      fastdeploy::vision::OCRResult* result);
  /** \brief BatchPredict the input image and get OCR result.
   *
   * \param[in] images The list of input image data, comes from cv::imread(), is a 3-D array with layout HWC, BGR format.
--- a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.cc
+++ b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.cc
@@ -22,15 +22,16 @@ namespace vision {
 namespace ocr {

 void OcrRecognizerResizeImage(FDMat* mat, float max_wh_ratio,
-                              const std::vector<int>& rec_image_shape) {
-  int img_c, img_h, img_w;
-  img_c = rec_image_shape[0];
+                              const std::vector<int>& rec_image_shape, bool static_shape) {
+  int img_h, img_w;
  img_h = rec_image_shape[1];
  img_w = rec_image_shape[2];

-  img_w = int(img_h * max_wh_ratio);
+  if (!static_shape) {

+    img_w = int(img_h * max_wh_ratio);
    float ratio = float(mat->Width()) / float(mat->Height());
+
    int resize_w;
    if (ceilf(img_h * ratio) > img_w) {
      resize_w = img_w;
@@ -38,10 +39,35 @@ void OcrRecognizerResizeImage(FDMat* mat, float max_wh_ratio,
      resize_w = int(ceilf(img_h * ratio));
    }
    Resize::Run(mat, resize_w, img_h);
+    Pad::Run(mat, 0, 0, 0, int(img_w - mat->Width()), {127, 127, 127});

-  std::vector<float> value = {127, 127, 127};
-  Pad::Run(mat, 0, 0, 0, int(img_w - mat->Width()), value);
+  } else {
+    if (mat->Width() >= img_w) {
+      Resize::Run(mat, img_w, img_h); // Reszie W to 320
+    } else {
+      Resize::Run(mat, mat->Width(), img_h);
+      Pad::Run(mat, 0, 0, 0, int(img_w - mat->Width()), {127, 127, 127});
+      // Pad to 320
    } 
+  }
+}
+
+void OcrRecognizerResizeImageOnAscend(FDMat* mat,
+                              const std::vector<int>& rec_image_shape) {
+  
+  int img_h, img_w;
+  img_h = rec_image_shape[1]; 
+  img_w = rec_image_shape[2];  
+
+  if (mat->Width() >= img_w) {
+    Resize::Run(mat, img_w, img_h); // Reszie W to 320
+  } else {
+    Resize::Run(mat, mat->Width(), img_h);
+    Pad::Run(mat, 0, 0, 0, int(img_w - mat->Width()), {0,0,0});
+    // Pad to 320
+  }
+}
+

 bool RecognizerPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTensor>* outputs) {
  return Run(images, outputs, 0, images->size(), {});
@@ -75,7 +101,7 @@ bool RecognizerPreprocessor::Run(std::vector<FDMat>* images, std::vector<FDTenso
      real_index = indices[i];
    }
    FDMat* mat = &(images->at(real_index));
-    OcrRecognizerResizeImage(mat, max_wh_ratio, rec_image_shape_);
+    OcrRecognizerResizeImage(mat, max_wh_ratio, rec_image_shape_, static_shape_);
    NormalizeAndPermute::Run(mat, mean_, scale_, is_scale_);
  }
  // Only have 1 output Tensor.
--- a/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h
+++ b/fastdeploy/vision/ocr/ppocr/rec_preprocessor.h
@@ -39,6 +39,7 @@ class FASTDEPLOY_DECL RecognizerPreprocessor {
  std::vector<float> mean_ = {0.5f, 0.5f, 0.5f};
  std::vector<float> scale_ = {0.5f, 0.5f, 0.5f};
  bool is_scale_ = true;
+  bool static_shape_ = false;
 };

 }  // namespace ocr
--- a/fastdeploy/vision/ocr/ppocr/recognizer.cc
+++ b/fastdeploy/vision/ocr/ppocr/recognizer.cc
@@ -34,6 +34,7 @@ Recognizer::Recognizer(const std::string& model_file,
  } else {
    valid_cpu_backends = {Backend::PDINFER, Backend::ORT, Backend::OPENVINO, Backend::LITE};
    valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
+    valid_ascend_backends = {Backend::LITE}; 
  }

  runtime_option = custom_option;
@@ -83,6 +84,7 @@ bool Recognizer::BatchPredict(const std::vector<cv::Mat>& images,
    FDERROR << "Failed to preprocess the input image." << std::endl;
    return false;
  }
+
  reused_input_tensors_[0].name = InputInfoOfRuntime(0).name;
  if (!Infer(reused_input_tensors_, &reused_output_tensors_)) {
    FDERROR << "Failed to inference by runtime." << std::endl;
--- a/fastdeploy/vision/segmentation/ppseg/model.cc
+++ b/fastdeploy/vision/segmentation/ppseg/model.cc
@@ -29,6 +29,7 @@ PaddleSegModel::PaddleSegModel(const std::string& model_file,
  valid_gpu_backends = {Backend::PDINFER, Backend::ORT, Backend::TRT};
  valid_rknpu_backends = {Backend::RKNPU2};
  valid_timvx_backends = {Backend::LITE};
+  valid_ascend_backends = {Backend::LITE};
  runtime_option = custom_option;
  runtime_option.model_format = model_format;
  runtime_option.model_file = model_file;
--- a/python/fastdeploy/runtime.py
+++ b/python/fastdeploy/runtime.py
@@ -301,6 +301,11 @@ class RuntimeOption:
                   rknpu2_core=rknpu2.CoreMask.RKNN_NPU_CORE_0):
        return self._option.use_rknpu2(rknpu2_name, rknpu2_core)

+    def use_ascend(self):
+        """Inference with Huawei Ascend NPU
+        """
+        return self._option.use_ascend()
+
    def set_cpu_thread_num(self, thread_num=-1):
        """Set number of threads if inference with CPU

@@ -355,6 +360,46 @@ class RuntimeOption:
        """
        return self.use_lite_backend()

+    def set_lite_device_names(self, device_names):
+        """Set nnadapter device name for Paddle Lite backend.
+        """
+        return self._option.set_lite_device_names(device_names)
+
+    def set_lite_context_properties(self, context_properties):
+        """Set nnadapter context properties for Paddle Lite backend.
+        """
+        return self._option.set_lite_context_properties(context_properties)
+
+    def set_lite_model_cache_dir(self, model_cache_dir):
+        """Set nnadapter model cache dir for Paddle Lite backend.
+        """
+        return self._option.set_lite_model_cache_dir(model_cache_dir)
+
+    def set_lite_dynamic_shape_info(self, dynamic_shape_info):
+        """ Set nnadapter dynamic shape info for Paddle Lite backend.
+        """
+        return self._option.set_lite_dynamic_shape_info(dynamic_shape_info)
+
+    def set_lite_subgraph_partition_path(self, subgraph_partition_path):
+        """ Set nnadapter subgraph partition path for Paddle Lite backend.
+        """
+        return self._option.set_lite_subgraph_partition_path(
+            subgraph_partition_path)
+
+    def set_lite_subgraph_partition_config_buffer(self,
+                                                  subgraph_partition_buffer):
+        """ Set nnadapter subgraph partition buffer for Paddle Lite backend.
+        """
+        return self._option.set_lite_subgraph_partition_config_buffer(
+            subgraph_partition_buffer)
+
+    def set_lite_mixed_precision_quantization_config_path(
+            self, mixed_precision_quantization_config_path):
+        """ Set nnadapter mixed precision quantization config path for Paddle Lite backend..
+        """
+        return self._option.set_lite_mixed_precision_quantization_config_path(
+            mixed_precision_quantization_config_path)
+
    def set_paddle_mkldnn(self, use_mkldnn=True):
        """Enable/Disable MKLDNN while using Paddle Inference backend, mkldnn is enabled by default.
        """
--- a/python/fastdeploy/vision/ocr/ppocr/init.py
+++ b/python/fastdeploy/vision/ocr/ppocr/init.py
@@ -508,6 +508,17 @@ class RecognizerPreprocessor:
        """
        return self._preprocessor.run(input_ims)

+    @property
+    def static_shape(self):
+        return self._preprocessor.static_shape
+
+    @static_shape.setter
+    def static_shape(self, value):
+        assert isinstance(
+            value,
+            bool), "The value to set `static_shape` must be type of bool."
+        self._preprocessor.static_shape = value
+
    @property
    def is_scale(self):
        return self._preprocessor.is_scale
@@ -626,6 +637,17 @@ class Recognizer(FastDeployModel):
    def postprocessor(self, value):
        self._model.postprocessor = value

+    @property
+    def static_shape(self):
+        return self._model.preprocessor.static_shape
+
+    @static_shape.setter
+    def static_shape(self, value):
+        assert isinstance(
+            value,
+            bool), "The value to set `static_shape` must be type of bool."
+        self._model.preprocessor.static_shape = value
+
    @property
    def is_scale(self):
        return self._model.preprocessor.is_scale
--- a/python/setup.py
+++ b/python/setup.py
@@ -56,6 +56,7 @@ if os.getenv("BUILD_ON_CPU", "OFF") == "ON":
 setup_configs = dict()
 setup_configs["ENABLE_RKNPU2_BACKEND"] = os.getenv("ENABLE_RKNPU2_BACKEND",
                                                   "OFF")
+setup_configs["WITH_ASCEND"] = os.getenv("WITH_ASCEND", "OFF")
 setup_configs["ENABLE_ORT_BACKEND"] = os.getenv("ENABLE_ORT_BACKEND", "OFF")
 setup_configs["ENABLE_OPENVINO_BACKEND"] = os.getenv("ENABLE_OPENVINO_BACKEND",
                                                     "OFF")
--- a/scripts/ascend_init.sh
+++ b/scripts/ascend_init.sh
@@ -0,0 +1,13 @@
+# Set huawei ascend toolkit correctly.
+HUAWEI_ASCEND_TOOLKIT_HOME="/usr/local/Ascend/ascend-toolkit/latest"
+HUAWEI_ASCEND_DRIVER_PATH="/usr/local/Ascend/driver"
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HUAWEI_ASCEND_DRIVER_PATH/lib64/driver:$HUAWEI_ASCEND_DRIVER_PATH/lib64:$HUAWEI_ASCEND_DRIVER_PATH/lib64/stub:$HUAWEI_ASCEND_TOOLKIT_HOME/acllib/lib64:$HUAWEI_ASCEND_TOOLKIT_HOME/atc/lib64:$HUAWEI_ASCEND_TOOLKIT_HOME/opp/op_proto/built-in
+export PYTHONPATH=$PYTHONPATH:$HUAWEI_ASCEND_TOOLKIT_HOME/fwkacllib/python/site-packages:$HUAWEI_ASCEND_TOOLKIT_HOME/acllib/python/site-packages:$HUAWEI_ASCEND_TOOLKIT_HOME/toolkit/python/site-packages:$HUAWEI_ASCEND_TOOLKIT_HOME/atc/python/site-packages:$HUAWEI_ASCEND_TOOLKIT_HOME/pyACL/python/site-packages/acl
+export PATH=$PATH:$HUAWEI_ASCEND_TOOLKIT_HOME/atc/ccec_compiler/bin:${HUAWEI_ASCEND_TOOLKIT_HOME}/acllib/bin:$HUAWEI_ASCEND_TOOLKIT_HOME/atc/bin
+export ASCEND_AICPU_PATH=$HUAWEI_ASCEND_TOOLKIT_HOME
+export ASCEND_OPP_PATH=$HUAWEI_ASCEND_TOOLKIT_HOME/opp
+export TOOLCHAIN_HOME=$HUAWEI_ASCEND_TOOLKIT_HOME/toolkit
+export ASCEND_SLOG_PRINT_TO_STDOUT=0
+export ASCEND_GLOBAL_LOG_LEVEL=3
+
+echo "===== Finish Initializing Environment for Ascend Deployment ====="
--- a/scripts/fastdeploy_init.sh
+++ b/scripts/fastdeploy_init.sh
@@ -1,10 +1,11 @@
 # source this file to import libraries

 PLATFORM=`uname`
-FASTDEPLOY_LIBRARY_PATH=${BASH_SOURCE%/*}
+FASTDEPLOY_LIBRARY_PATH=${BASH_SOURCE}
 if [ "$PLATFORM" == "Linux" ];then
    FASTDEPLOY_LIBRARY_PATH=`readlink -f ${FASTDEPLOY_LIBRARY_PATH}`
 fi
+FASTDEPLOY_LIBRARY_PATH=${FASTDEPLOY_LIBRARY_PATH%/*}

 echo "=============== Information ======================"
 echo "FastDeploy Library Path: $FASTDEPLOY_LIBRARY_PATH"
@@ -32,5 +33,10 @@ for LIB_DIR in ${LIBS_DIRECOTRIES[@]};do
    IMPORT_PATH=${LIB_DIR}":"$IMPORT_PATH
 done

+if [ -f "ascend_init.sh" ]
+then
+    source ascend_init.sh
+fi
+
 echo "[Execute] Will try to export all the library directories to environments, if not work, please try to export these path by your self."
 export LD_LIBRARY_PATH=${IMPORT_PATH}:$LD_LIBRARY_PATH