[Backend] Add RKNPU2 backend support (#456)

* 10-29/14:05 * 新增cmake * 新增rknpu2 backend * 10-29/14:43 * Runtime fd_type新增RKNPU代码 * 10-29/15:02 * 新增ppseg RKNPU2推理代码 * 10-29/15:46 * 新增ppseg RKNPU2 cpp example代码 * 10-29/15:51 * 新增README文档 * 10-29/15:51 * 按照要求修改部分注释以及变量名称 * 10-29/15:51 * 修复重命名之后，cc文件中的部分代码还用旧函数名的bug * 10-29/22:32 * str(Device::NPU)将输出NPU而不是UNKOWN * 修改runtime文件中的注释格式 * 新增Building Summary ENABLE_RKNPU2_BACKEND输出 * pybind新增支持rknpu2 * 新增python编译选项 * 新增PPSeg Python代码 * 新增以及更新各种文档 * 10-30/14:11 * 尝试修复编译cuda时产生的错误 * 10-30/19:27 * 修改CpuName和CoreMask层级 * 修改ppseg rknn推理层级 * 图片将移动到网络进行下载 * 10-30/19:39 * 更新文档 * 10-30/19:39 * 更新文档 * 更新ppseg rknpu2 example中的函数命名方式 * 更新ppseg rknpu2 example为一个cc文件 * 修复disable_normalize_and_permute部分的逻辑错误 * 移除rknpu2初始化时的无用参数 * 10-30/19:39 * 尝试重置python代码 * 10-30/10:16 * rknpu2_config.h文件不再包含rknn_api头文件防止出现导入错误的问题 * 10-31/14:31 * 修改pybind，支持最新的rknpu2 backends * 再次支持ppseg python推理 * 移动cpuname 和 coremask的层级 * 10-31/15:35 * 尝试修复rknpu2导入错误 * 10-31/19:00 * 新增RKNPU2模型导出代码以及其对应的文档 * 更新大量文档错误 * 10-31/19:00 * 现在编译完fastdeploy仓库后无需重新设置RKNN2_TARGET_SOC * 10-31/19:26 * 修改部分错误文档 * 10-31/19:26 * 修复错误删除的部分 * 修复各种错误文档 * 修复FastDeploy.cmake在设置RKNN2_TARGET_SOC错误时，提示错误的信息 * 修复rknpu2_backend.cc中存在的中文注释 * 10-31/20:45 * 删除无用的注释 * 10-31/20:45 * 按照要求修改Device::NPU为Device::RKNPU，硬件将共用valid_hardware_backends * 删除无用注释以及debug代码 * 11-01/09:45 * 更新变量命名方式 * 11-01/10:16 * 修改部分文档，修改函数命名方式 Co-authored-by: Jason <jiangjiajun@baidu.com>
2025-10-17 22:21:48 +08:00 · 2022-11-01 11:14:05 +08:00
parent bb00e0757e
commit 4ffcfbe726
37 changed files with 1567 additions and 74 deletions
--- a/fastdeploy/runtime.h
+++ b/fastdeploy/runtime.h
@@ -20,12 +20,13 @@

 #pragma once

+#include <algorithm>
 #include <map>
 #include <vector>
-#include <algorithm>

 #include "fastdeploy/backends/backend.h"
 #include "fastdeploy/utils/perf.h"
+#include "backends/rknpu/rknpu2/rknpu2_config.h"

 /** \brief All C++ FastDeploy APIs are defined inside this namespace
 *
@@ -34,21 +35,23 @@ namespace fastdeploy {

 /*! Inference backend supported in FastDeploy */
 enum Backend {
-  UNKNOWN,  ///< Unknown inference backend
-  ORT,  ///< ONNX Runtime, support Paddle/ONNX format model, CPU / Nvidia GPU
-  TRT,  ///< TensorRT, support Paddle/ONNX format model, Nvidia GPU only
-  PDINFER,  ///< Paddle Inference, support Paddle format model, CPU / Nvidia GPU
-  POROS,  ///< Poros, support TorchScript format model, CPU / Nvidia GPU
-  OPENVINO,  ///< Intel OpenVINO, support Paddle/ONNX format, CPU only
-  LITE,  ///< Paddle Lite, support Paddle format model, ARM CPU only
+  UNKNOWN, ///< Unknown inference backend
+  ORT,     ///< ONNX Runtime, support Paddle/ONNX format model, CPU / Nvidia GPU
+  TRT,     ///< TensorRT, support Paddle/ONNX format model, Nvidia GPU only
+  PDINFER, ///< Paddle Inference, support Paddle format model, CPU / Nvidia GPU
+  POROS,   ///< Poros, support TorchScript format model, CPU / Nvidia GPU
+  OPENVINO, ///< Intel OpenVINO, support Paddle/ONNX format, CPU only
+  LITE,     ///< Paddle Lite, support Paddle format model, ARM CPU only
+  RKNPU2,   ///< RKNPU2, support RKNN format model, Rockchip NPU only
 };

 /*! Deep learning model format */
 enum ModelFormat {
-  AUTOREC,  ///< Auto recognize the model format by model file name
-  PADDLE,  ///< Model with paddlepaddle format
-  ONNX,  ///< Model with ONNX format
-  TORCHSCRIPT,  ///< Model with TorchScript format
+  AUTOREC,     ///< Auto recognize the model format by model file name
+  PADDLE,      ///< Model with paddlepaddle format
+  ONNX,        ///< Model with ONNX format
+  RKNN,        ///< Model with RKNN format
+  TORCHSCRIPT, ///< Model with TorchScript format
 };

 FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
@@ -58,12 +61,12 @@ FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,

 /*! Paddle Lite power mode for mobile device. */
 enum LitePowerMode {
-  LITE_POWER_HIGH = 0,  ///< Use Lite Backend with high power mode
-  LITE_POWER_LOW = 1,  ///< Use Lite Backend with low power mode
-  LITE_POWER_FULL = 2,  ///< Use Lite Backend with full power mode
-  LITE_POWER_NO_BIND = 3,  ///< Use Lite Backend with no bind power mode
-  LITE_POWER_RAND_HIGH = 4,  ///< Use Lite Backend with rand high mode
-  LITE_POWER_RAND_LOW = 5  ///< Use Lite Backend with rand low power mode
+  LITE_POWER_HIGH = 0,      ///< Use Lite Backend with high power mode
+  LITE_POWER_LOW = 1,       ///< Use Lite Backend with low power mode
+  LITE_POWER_FULL = 2,      ///< Use Lite Backend with full power mode
+  LITE_POWER_NO_BIND = 3,   ///< Use Lite Backend with no bind power mode
+  LITE_POWER_RAND_HIGH = 4, ///< Use Lite Backend with rand high mode
+  LITE_POWER_RAND_LOW = 5   ///< Use Lite Backend with rand low power mode
 };

 FASTDEPLOY_DECL std::string Str(const Backend& b);
@@ -86,7 +89,7 @@ ModelFormat GuessModelFormat(const std::string& model_file);
 /*! @brief Option object used when create a new Runtime object
 */
 struct FASTDEPLOY_DECL RuntimeOption {
-   /** \brief Set path of model file and parameter file
+  /** \brief Set path of model file and parameter file
   *
   * \param[in] model_path Path of model file, e.g ResNet50/model.pdmodel for Paddle format model / ResNet50/model.onnx for ONNX format model
   * \param[in] params_path Path of parameter file, this only used when the model format is Paddle, e.g Resnet50/model.pdiparams
@@ -102,6 +105,9 @@ struct FASTDEPLOY_DECL RuntimeOption {
  /// Use Nvidia GPU to inference
  void UseGpu(int gpu_id = 0);

+  void UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name = fastdeploy::rknpu2::CpuName::RK3588,
+                 fastdeploy::rknpu2::CoreMask rknpu2_core = fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0);
+
  void SetExternalStream(void* external_stream);

  /*
@@ -173,14 +179,14 @@ struct FASTDEPLOY_DECL RuntimeOption {
   */
  void DisableLiteFP16();

-   /**
-   * @brief enable int8 precision while use paddle lite backend
-   */
+  /**
+    * @brief enable int8 precision while use paddle lite backend
+    */
  void EnableLiteInt8();

  /**
-   * @brief disable int8 precision, change to full precision(float32)
-   */
+    * @brief disable int8 precision, change to full precision(float32)
+    */
  void DisableLiteInt8();

  /**
@@ -325,9 +331,13 @@ struct FASTDEPLOY_DECL RuntimeOption {
  int unconst_ops_thres = -1;
  std::string poros_file = "";

-  std::string model_file = "";   // Path of model file
-  std::string params_file = "";  // Path of parameters file, can be empty
-  ModelFormat model_format = ModelFormat::AUTOREC;  // format of input model
+  // ======Only for RKNPU2 Backend=======
+  fastdeploy::rknpu2::CpuName rknpu2_cpu_name_ = fastdeploy::rknpu2::CpuName::RK3588;
+  fastdeploy::rknpu2::CoreMask rknpu2_core_mask_ = fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO;
+
+  std::string model_file = "";  // Path of model file
+  std::string params_file = ""; // Path of parameters file, can be empty
+  ModelFormat model_format = ModelFormat::AUTOREC; // format of input model

  // inside parameters, only for inside usage
  // remove multiclass_nms in Paddle2ONNX
@@ -388,6 +398,7 @@ struct FASTDEPLOY_DECL Runtime {
  void CreateTrtBackend();
  void CreateOpenVINOBackend();
  void CreateLiteBackend();
+  void CreateRKNPU2Backend();
  std::unique_ptr<BaseBackend> backend_;
 };
-}  // namespace fastdeploy
+} // namespace fastdeploy