[Backend] Add RKNPU2 backend support (#456)

* 10-29/14:05
* 新增cmake
* 新增rknpu2 backend

* 10-29/14:43
* Runtime fd_type新增RKNPU代码

* 10-29/15:02
* 新增ppseg RKNPU2推理代码

* 10-29/15:46
* 新增ppseg RKNPU2 cpp example代码

* 10-29/15:51
* 新增README文档

* 10-29/15:51
* 按照要求修改部分注释以及变量名称

* 10-29/15:51
* 修复重命名之后,cc文件中的部分代码还用旧函数名的bug

* 10-29/22:32
* str(Device::NPU)将输出NPU而不是UNKOWN
* 修改runtime文件中的注释格式
* 新增Building Summary ENABLE_RKNPU2_BACKEND输出
* pybind新增支持rknpu2
* 新增python编译选项
* 新增PPSeg Python代码
* 新增以及更新各种文档

* 10-30/14:11
* 尝试修复编译cuda时产生的错误

* 10-30/19:27
* 修改CpuName和CoreMask层级
* 修改ppseg rknn推理层级
* 图片将移动到网络进行下载

* 10-30/19:39
* 更新文档

* 10-30/19:39
* 更新文档
* 更新ppseg rknpu2 example中的函数命名方式
* 更新ppseg rknpu2 example为一个cc文件
* 修复disable_normalize_and_permute部分的逻辑错误
* 移除rknpu2初始化时的无用参数

* 10-30/19:39
* 尝试重置python代码

* 10-30/10:16
* rknpu2_config.h文件不再包含rknn_api头文件防止出现导入错误的问题

* 10-31/14:31
* 修改pybind,支持最新的rknpu2 backends
* 再次支持ppseg python推理
* 移动cpuname 和 coremask的层级

* 10-31/15:35
* 尝试修复rknpu2导入错误

* 10-31/19:00
* 新增RKNPU2模型导出代码以及其对应的文档
* 更新大量文档错误

* 10-31/19:00
* 现在编译完fastdeploy仓库后无需重新设置RKNN2_TARGET_SOC

* 10-31/19:26
* 修改部分错误文档

* 10-31/19:26
* 修复错误删除的部分
* 修复各种错误文档
* 修复FastDeploy.cmake在设置RKNN2_TARGET_SOC错误时,提示错误的信息
* 修复rknpu2_backend.cc中存在的中文注释

* 10-31/20:45
* 删除无用的注释

* 10-31/20:45
* 按照要求修改Device::NPU为Device::RKNPU,硬件将共用valid_hardware_backends
* 删除无用注释以及debug代码

* 11-01/09:45
* 更新变量命名方式

* 11-01/10:16
* 修改部分文档,修改函数命名方式

Co-authored-by: Jason <jiangjiajun@baidu.com>
This commit is contained in:
Zheng_Bicheng
2022-11-01 11:14:05 +08:00
committed by GitHub
parent bb00e0757e
commit 4ffcfbe726
37 changed files with 1567 additions and 74 deletions

View File

@@ -0,0 +1,425 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h"
namespace fastdeploy {
RKNPU2Backend::~RKNPU2Backend() {
if(input_attrs != nullptr){
free(input_attrs);
}
if(output_attrs != nullptr){
free(output_attrs);
}
}
/***************************************************************
* @name GetSDKAndDeviceVersion
* @brief get RKNN sdk and device version
* @param None
* @return bool
* @note None
***************************************************************/
bool RKNPU2Backend::GetSDKAndDeviceVersion() {
int ret;
// get sdk and device version
ret = rknn_query(ctx, RKNN_QUERY_SDK_VERSION, &sdk_ver, sizeof(sdk_ver));
if (ret != RKNN_SUCC) {
printf("rknn_query fail! ret=%d\n", ret);
return false;
}
FDINFO << "rknn_api/rknnrt version: " << sdk_ver.api_version
<< ", driver version: " << sdk_ver.drv_version << std::endl;
return true;
}
/***************************************************************
* @name BuildOption
* @brief save option
* @param RKNPU2BackendOption
* @note None
***************************************************************/
void RKNPU2Backend::BuildOption(const RKNPU2BackendOption& option) {
this->option_ = option;
// save cpu_name
this->option_.cpu_name = option.cpu_name;
// save context
this->option_.core_mask = option.core_mask;
}
/***************************************************************
* @name InitFromRKNN
* @brief Initialize RKNN model
* @param model_file: Binary data for the RKNN model or the path of RKNN model.
* params_file: None
* option: config
* @return bool
* @note None
***************************************************************/
bool RKNPU2Backend::InitFromRKNN(const std::string& model_file,
const RKNPU2BackendOption& option) {
// LoadModel
if (!this->LoadModel((char*)model_file.data())) {
FDERROR << "load model failed" << std::endl;
return false;
}
// GetSDKAndDeviceVersion
if (!this->GetSDKAndDeviceVersion()) {
FDERROR << "get SDK and device version failed" << std::endl;
return false;
}
// BuildOption
this->BuildOption(option);
// SetCoreMask if RK3588
if (this->option_.cpu_name == rknpu2::CpuName::RK3588) {
if (!this->SetCoreMask(option_.core_mask)) {
FDERROR << "set core mask failed" << std::endl;
return false;
}
}
// GetModelInputOutputInfos
if (!this->GetModelInputOutputInfos()) {
FDERROR << "get model input output infos failed" << std::endl;
return false;
}
return true;
}
/***************************************************************
* @name SetCoreMask
* @brief set NPU core for model
* @param core_mask: The specification of NPU core setting.
* @return bool
* @note Only support RK3588
***************************************************************/
bool RKNPU2Backend::SetCoreMask(rknpu2::CoreMask& core_mask) const {
int ret = rknn_set_core_mask(ctx, static_cast<rknn_core_mask>(core_mask));
if (ret != RKNN_SUCC) {
FDERROR << "rknn_set_core_mask fail! ret=" << ret << std::endl;
return false;
}
return true;
}
/***************************************************************
* @name LoadModel
* @brief read rknn model
* @param model: Binary data for the RKNN model or the path of RKNN model.
* @return bool
* @note None
***************************************************************/
bool RKNPU2Backend::LoadModel(void* model) {
int ret = RKNN_SUCC;
ret = rknn_init(&ctx, model, 0, 0, nullptr);
if (ret != RKNN_SUCC) {
FDERROR << "rknn_init fail! ret=" << ret << std::endl;
return false;
}
return true;
}
/***************************************************************
* @name GetModelInputOutputInfos
* @brief Get the detailed input and output infos of Model
* @param None
* @return bool
* @note None
***************************************************************/
bool RKNPU2Backend::GetModelInputOutputInfos() {
int ret = RKNN_SUCC;
// Get the number of model inputs and outputs
ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
if (ret != RKNN_SUCC) {
return false;
}
// Get detailed input parameters
input_attrs = (rknn_tensor_attr*)malloc(sizeof(rknn_tensor_attr) * io_num.n_input);
memset(input_attrs, 0, io_num.n_input * sizeof(rknn_tensor_attr));
inputs_desc_.resize(io_num.n_input);
for (uint32_t i = 0; i < io_num.n_input; i++) {
input_attrs[i].index = i;
// query info
ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]),
sizeof(rknn_tensor_attr));
if (ret != RKNN_SUCC) {
printf("rknn_init error! ret=%d\n", ret);
return false;
}
std::string temp_name = input_attrs[i].name;
std::vector<int> temp_shape{};
temp_shape.resize(input_attrs[i].n_dims);
for (int j = 0; j < input_attrs[i].n_dims; j++) {
temp_shape[j] = (int)input_attrs[i].dims[j];
}
FDDataType temp_dtype =
fastdeploy::RKNPU2Backend::RknnTensorTypeToFDDataType(
input_attrs[i].type);
TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype};
inputs_desc_[i] = temp_input_info;
}
// Get detailed output parameters
output_attrs =
(rknn_tensor_attr*)malloc(sizeof(rknn_tensor_attr) * io_num.n_output);
memset(output_attrs, 0, io_num.n_output * sizeof(rknn_tensor_attr));
outputs_desc_.resize(io_num.n_output);
for (uint32_t i = 0; i < io_num.n_output; i++) {
output_attrs[i].index = i;
// query info
ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]),
sizeof(rknn_tensor_attr));
if (ret != RKNN_SUCC) {
FDERROR << "rknn_query fail! ret = " << ret << std::endl;
return false;
}
std::string temp_name = output_attrs[i].name;
std::vector<int> temp_shape{};
temp_shape.resize(output_attrs[i].n_dims);
for (int j = 0; j < output_attrs[i].n_dims; j++) {
temp_shape[j] = (int)output_attrs[i].dims[j];
}
FDDataType temp_dtype =
fastdeploy::RKNPU2Backend::RknnTensorTypeToFDDataType(
output_attrs[i].type);
TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype};
outputs_desc_[i] = temp_input_info;
}
return true;
}
/***************************************************************
* @name DumpTensorAttr
* @brief Get the model's detailed inputs and outputs
* @param rknn_tensor_attr
* @return None
* @note None
***************************************************************/
void RKNPU2Backend::DumpTensorAttr(rknn_tensor_attr& attr) {
printf("index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], "
"n_elems=%d, size=%d, fmt=%s, type=%s, "
"qnt_type=%s, zp=%d, scale=%f\n",
attr.index, attr.name, attr.n_dims, attr.dims[0], attr.dims[1],
attr.dims[2], attr.dims[3], attr.n_elems, attr.size,
get_format_string(attr.fmt), get_type_string(attr.type),
get_qnt_type_string(attr.qnt_type), attr.zp, attr.scale);
}
TensorInfo RKNPU2Backend::GetInputInfo(int index) {
FDASSERT(index < NumInputs(),
"The index: %d should less than the number of inputs: %d.", index,
NumInputs())
return inputs_desc_[index];
}
std::vector<TensorInfo> RKNPU2Backend::GetInputInfos() { return inputs_desc_; }
TensorInfo RKNPU2Backend::GetOutputInfo(int index) {
FDASSERT(index < NumOutputs(),
"The index: %d should less than the number of outputs %d.", index,
NumOutputs())
return outputs_desc_[index];
}
std::vector<TensorInfo> RKNPU2Backend::GetOutputInfos() {
return outputs_desc_;
}
bool RKNPU2Backend::Infer(std::vector<FDTensor>& inputs,
std::vector<FDTensor>* outputs) {
int ret = RKNN_SUCC;
// Judge whether the input and output size are the same
if (inputs.size() != inputs_desc_.size()) {
FDERROR << "[RKNPU2Backend] Size of the inputs(" << inputs.size()
<< ") should keep same with the inputs of this model("
<< inputs_desc_.size() << ")." << std::endl;
return false;
}
// the input size only can be one
if (inputs.size() > 1) {
FDERROR << "[RKNPU2Backend] Size of the inputs only support 1."
<< std::endl;
return false;
}
// Judge whether the input and output types are the same
rknn_tensor_type input_type =
fastdeploy::RKNPU2Backend::FDDataTypeToRknnTensorType(inputs[0].dtype);
if (input_type != input_attrs[0].type) {
FDWARNING << "The input tensor type != model's inputs type."
<< "The input_type need " << get_type_string(input_attrs[0].type)
<< ",but inputs[0].type is " << get_type_string(input_type)
<< std::endl;
}
rknn_tensor_format input_layout =
RKNN_TENSOR_NHWC; // RK3588 only support NHWC
input_attrs[0].type = input_type;
input_attrs[0].fmt = input_layout;
input_attrs[0].size = inputs[0].Nbytes();
input_attrs[0].size_with_stride = inputs[0].Nbytes();
input_attrs[0].pass_through = 0;
// create input tensor memory
rknn_tensor_mem* input_mems[1];
input_mems[0] = rknn_create_mem(ctx, inputs[0].Nbytes());
if (input_mems[0] == nullptr) {
FDERROR << "rknn_create_mem input_mems error." << std::endl;
return false;
}
// Copy input data to input tensor memory
uint32_t width = input_attrs[0].dims[2];
uint32_t stride = input_attrs[0].w_stride;
if (width == stride) {
if (inputs[0].Data() == nullptr) {
FDERROR << "inputs[0].Data is NULL." << std::endl;
return false;
}
memcpy(input_mems[0]->virt_addr, inputs[0].Data(), inputs[0].Nbytes());
} else {
FDERROR << "[RKNPU2Backend] only support width == stride." << std::endl;
return false;
}
// Create output tensor memory
rknn_tensor_mem* output_mems[io_num.n_output];
for (uint32_t i = 0; i < io_num.n_output; ++i) {
// Most post-processing does not support the fp16 format.
// The unified output here is float32
uint32_t output_size = output_attrs[i].n_elems * sizeof(float);
output_mems[i] = rknn_create_mem(ctx, output_size);
}
// Set input tensor memory
ret = rknn_set_io_mem(ctx, input_mems[0], &input_attrs[0]);
if (ret != RKNN_SUCC) {
FDERROR << "input tensor memory rknn_set_io_mem fail! ret=" << ret
<< std::endl;
return false;
}
// Set output tensor memory
for (uint32_t i = 0; i < io_num.n_output; ++i) {
// default output type is depend on model, this requires float32 to compute top5
output_attrs[i].type = RKNN_TENSOR_FLOAT32;
ret = rknn_set_io_mem(ctx, output_mems[i], &output_attrs[i]);
// set output memory and attribute
if (ret != RKNN_SUCC) {
FDERROR << "output tensor memory rknn_set_io_mem fail! ret=" << ret
<< std::endl;
return false;
}
}
// run rknn
ret = rknn_run(ctx, nullptr);
if (ret != RKNN_SUCC) {
FDERROR << "rknn run error! ret=" << ret << std::endl;
return false;
}
rknn_destroy_mem(ctx, input_mems[0]);
// get result
outputs->resize(outputs_desc_.size());
std::vector<int64_t> temp_shape(4);
for (size_t i = 0; i < outputs_desc_.size(); ++i) {
temp_shape.resize(outputs_desc_[i].shape.size());
for (int j = 0; j < outputs_desc_[i].shape.size(); ++j) {
temp_shape[j] = outputs_desc_[i].shape[j];
}
(*outputs)[i].Resize(temp_shape, outputs_desc_[i].dtype,
outputs_desc_[i].name);
memcpy((*outputs)[i].MutableData(), (float*)output_mems[i]->virt_addr, (*outputs)[i].Nbytes());
rknn_destroy_mem(ctx, output_mems[i]);
}
return true;
}
/***************************************************************
* @name RknnTensorTypeToFDDataType
* @brief Change RknnTensorType To FDDataType
* @param rknn_tensor_type
* @return None
* @note Most post-processing does not support the fp16 format.
* Therefore, if the input is FP16, the output will be FP32.
***************************************************************/
FDDataType RKNPU2Backend::RknnTensorTypeToFDDataType(rknn_tensor_type type) {
if (type == rknn_tensor_type::RKNN_TENSOR_FLOAT16) {
return FDDataType::FP32;
}
if (type == rknn_tensor_type::RKNN_TENSOR_FLOAT32) {
return FDDataType::FP32;
}
if (type == rknn_tensor_type::RKNN_TENSOR_INT8) {
return FDDataType::INT8;
}
if (type == rknn_tensor_type::RKNN_TENSOR_INT16) {
return FDDataType::INT16;
}
if (type == rknn_tensor_type::RKNN_TENSOR_INT32) {
return FDDataType::INT32;
}
if (type == rknn_tensor_type::RKNN_TENSOR_UINT8) {
return FDDataType::UINT8;
}
if (type == rknn_tensor_type::RKNN_TENSOR_BOOL) {
return FDDataType::BOOL;
}
FDERROR << "FDDataType don't support this type" << std::endl;
return FDDataType::UNKNOWN1;
}
/***************************************************************
* @name FDDataTypeToRknnTensorType
* @brief Change FDDataType To RknnTensorType
* @param FDDataType
* @return None
* @note None
***************************************************************/
rknn_tensor_type
RKNPU2Backend::FDDataTypeToRknnTensorType(fastdeploy::FDDataType type) {
if (type == FDDataType::FP16) {
return rknn_tensor_type::RKNN_TENSOR_FLOAT16;
}
if (type == FDDataType::FP32) {
return rknn_tensor_type::RKNN_TENSOR_FLOAT32;
}
if (type == FDDataType::INT8) {
return rknn_tensor_type::RKNN_TENSOR_INT8;
}
if (type == FDDataType::INT16) {
return rknn_tensor_type::RKNN_TENSOR_INT16;
}
if (type == FDDataType::INT32) {
return rknn_tensor_type::RKNN_TENSOR_INT32;
}
if (type == FDDataType::UINT8) {
return rknn_tensor_type::RKNN_TENSOR_UINT8;
}
if (type == FDDataType::BOOL) {
return rknn_tensor_type::RKNN_TENSOR_BOOL;
}
FDERROR << "rknn_tensor_type don't support this type" << std::endl;
return RKNN_TENSOR_TYPE_MAX;
}
} // namespace fastdeploy