mirror of
				https://github.com/PaddlePaddle/FastDeploy.git
				synced 2025-11-01 04:12:58 +08:00 
			
		
		
		
	[RKNPU2] RKYOLO Support FP32 return value (#898)
* RKNPU2 Backend兼容其他模型的量化 fd_tensor正式移除zp和scale的量化参数 * 更新FP32返回值的RKYOLO * 更新rkyolov5支持fp32格式 * 更新rkyolov5支持fp32格式 * 更新YOLOv5速度文档 Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
This commit is contained in:
		| @@ -12,7 +12,7 @@ | ||||
| // See the License for the specific language governing permissions and | ||||
| // limitations under the License. | ||||
| #include "fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h" | ||||
|  | ||||
| #include "fastdeploy/utils/perf.h" | ||||
| namespace fastdeploy { | ||||
| RKNPU2Backend::~RKNPU2Backend() { | ||||
|   // Release memory uniformly here | ||||
| @@ -190,7 +190,6 @@ bool RKNPU2Backend::GetModelInputOutputInfos() { | ||||
|       FDERROR << "rknpu2_backend only support input format is NHWC or UNDEFINED" << std::endl; | ||||
|     } | ||||
|  | ||||
|     DumpTensorAttr(input_attrs_[i]); | ||||
|  | ||||
|     // copy input_attrs_ to input tensor info | ||||
|     std::string temp_name = input_attrs_[i].name; | ||||
| @@ -199,16 +198,13 @@ bool RKNPU2Backend::GetModelInputOutputInfos() { | ||||
|     for (int j = 0; j < input_attrs_[i].n_dims; j++) { | ||||
|       temp_shape[j] = (int)input_attrs_[i].dims[j]; | ||||
|     } | ||||
|     FDDataType temp_dtype = | ||||
|         fastdeploy::RKNPU2Backend::RknnTensorTypeToFDDataType( | ||||
|             input_attrs_[i].type); | ||||
|     FDDataType temp_dtype = fastdeploy::RKNPU2Backend::RknnTensorTypeToFDDataType(input_attrs_[i].type); | ||||
|     TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype}; | ||||
|     inputs_desc_[i] = temp_input_info; | ||||
|   } | ||||
|  | ||||
|   // Get detailed output parameters | ||||
|   output_attrs_ = | ||||
|       (rknn_tensor_attr*)malloc(sizeof(rknn_tensor_attr) * io_num.n_output); | ||||
|   output_attrs_ = (rknn_tensor_attr*)malloc(sizeof(rknn_tensor_attr) * io_num.n_output); | ||||
|   memset(output_attrs_, 0, io_num.n_output * sizeof(rknn_tensor_attr)); | ||||
|   outputs_desc_.resize(io_num.n_output); | ||||
|  | ||||
| @@ -225,19 +221,13 @@ bool RKNPU2Backend::GetModelInputOutputInfos() { | ||||
|       return false; | ||||
|     } | ||||
|  | ||||
|     // If the output dimension is 3, the runtime will automatically change it to 4.  | ||||
|     // If the output dimension is 3, the runtime will automatically change it to 4. | ||||
|     // Obviously, this is wrong, and manual correction is required here. | ||||
|     int n_dims = output_attrs_[i].n_dims; | ||||
|     if((n_dims == 4) && (output_attrs_[i].dims[3] == 1)){ | ||||
|       n_dims--; | ||||
|       FDWARNING << "The output["  | ||||
|                 << i | ||||
|                 << "].shape[3] is 1, remove this dim."  | ||||
|                 << std::endl; | ||||
|     } | ||||
|  | ||||
|     DumpTensorAttr(output_attrs_[i]); | ||||
|  | ||||
|     // copy output_attrs_ to output tensor | ||||
|     std::string temp_name = output_attrs_[i].name; | ||||
|     std::vector<int> temp_shape{}; | ||||
| @@ -246,9 +236,8 @@ bool RKNPU2Backend::GetModelInputOutputInfos() { | ||||
|       temp_shape[j] = (int)output_attrs_[i].dims[j]; | ||||
|     } | ||||
|  | ||||
|     FDDataType temp_dtype = | ||||
|         fastdeploy::RKNPU2Backend::RknnTensorTypeToFDDataType( | ||||
|             output_attrs_[i].type); | ||||
|     // The data type of output data is changed to FP32 | ||||
|     FDDataType temp_dtype = FDDataType::FP32; | ||||
|     TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype}; | ||||
|     outputs_desc_[i] = temp_input_info; | ||||
|   } | ||||
| @@ -265,11 +254,12 @@ bool RKNPU2Backend::GetModelInputOutputInfos() { | ||||
| void RKNPU2Backend::DumpTensorAttr(rknn_tensor_attr& attr) { | ||||
|   printf("index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], " | ||||
|          "n_elems=%d, size=%d, fmt=%s, type=%s, " | ||||
|          "qnt_type=%s, zp=%d, scale=%f\n", | ||||
|          "qnt_type=%s, zp=%d, scale=%f, pass_through=%d", | ||||
|          attr.index, attr.name, attr.n_dims, attr.dims[0], attr.dims[1], | ||||
|          attr.dims[2], attr.dims[3], attr.n_elems, attr.size, | ||||
|          get_format_string(attr.fmt), get_type_string(attr.type), | ||||
|          get_qnt_type_string(attr.qnt_type), attr.zp, attr.scale); | ||||
|          get_qnt_type_string(attr.qnt_type), attr.zp, attr.scale, | ||||
|          attr.pass_through); | ||||
| } | ||||
|  | ||||
| TensorInfo RKNPU2Backend::GetInputInfo(int index) { | ||||
| @@ -320,7 +310,12 @@ bool RKNPU2Backend::Infer(std::vector<FDTensor>& inputs, | ||||
|       input_attrs_[i].type = input_type; | ||||
|       input_attrs_[i].size = inputs[0].Nbytes(); | ||||
|       input_attrs_[i].size_with_stride = inputs[0].Nbytes(); | ||||
|       input_attrs_[i].pass_through = 0; | ||||
|       if(input_attrs_[i].type == RKNN_TENSOR_FLOAT16 || | ||||
|           input_attrs_[i].type == RKNN_TENSOR_FLOAT32){ | ||||
|         FDINFO << "The input model is not a quantitative model. " | ||||
|                   "Close the normalize operation." << std::endl; | ||||
|       } | ||||
|  | ||||
|       input_mems_[i] = rknn_create_mem(ctx, inputs[i].Nbytes()); | ||||
|       if (input_mems_[i] == nullptr) { | ||||
|         FDERROR << "rknn_create_mem input_mems_ error." << std::endl; | ||||
| @@ -345,11 +340,13 @@ bool RKNPU2Backend::Infer(std::vector<FDTensor>& inputs, | ||||
|         FDERROR << "rknn_create_mem output_mems_ error." << std::endl; | ||||
|         return false; | ||||
|       } | ||||
|       if(output_attrs_[i].type == RKNN_TENSOR_FLOAT16){ | ||||
|         output_attrs_[i].type = RKNN_TENSOR_FLOAT32; | ||||
|       } | ||||
|  | ||||
|       // The data type of output data is changed to FP32 | ||||
|       output_attrs_[i].type = RKNN_TENSOR_FLOAT32; | ||||
|  | ||||
|       // default output type is depend on model, this requires float32 to compute top5 | ||||
|       ret = rknn_set_io_mem(ctx, output_mems_[i], &output_attrs_[i]); | ||||
|  | ||||
|       // set output memory and attribute | ||||
|       if (ret != RKNN_SUCC) { | ||||
|         FDERROR << "output tensor memory rknn_set_io_mem fail! ret=" << ret | ||||
| @@ -360,7 +357,7 @@ bool RKNPU2Backend::Infer(std::vector<FDTensor>& inputs, | ||||
|  | ||||
|     this->infer_init = true; | ||||
|   } | ||||
|    | ||||
|  | ||||
|   // Copy input data to input tensor memory | ||||
|   for (uint32_t i = 0; i < io_num.n_input; i++) { | ||||
|     uint32_t width = input_attrs_[i].dims[2]; | ||||
| @@ -376,7 +373,6 @@ bool RKNPU2Backend::Infer(std::vector<FDTensor>& inputs, | ||||
|       return false; | ||||
|     } | ||||
|   } | ||||
|    | ||||
|  | ||||
|   // run rknn | ||||
|   ret = rknn_run(ctx, nullptr); | ||||
| @@ -395,8 +391,6 @@ bool RKNPU2Backend::Infer(std::vector<FDTensor>& inputs, | ||||
|     } | ||||
|     (*outputs)[i].Resize(temp_shape, outputs_desc_[i].dtype, | ||||
|                          outputs_desc_[i].name); | ||||
|     std::vector<float>  output_scale = {output_attrs_[i].scale}; | ||||
|     (*outputs)[i].SetQuantizationInfo(output_attrs_[i].zp, output_scale); | ||||
|     memcpy((*outputs)[i].MutableData(), (float*)output_mems_[i]->virt_addr, | ||||
|            (*outputs)[i].Nbytes()); | ||||
|   } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Zheng_Bicheng
					Zheng_Bicheng