[RKNPU2] RKYOLO Support FP32 return value (#898)

* RKNPU2 Backend兼容其他模型的量化 fd_tensor正式移除zp和scale的量化参数 * 更新FP32返回值的RKYOLO * 更新rkyolov5支持fp32格式 * 更新rkyolov5支持fp32格式 * 更新YOLOv5速度文档 Co-authored-by: DefTruth <31974251+DefTruth@users.noreply.github.com>
2025-11-01 04:12:58 +08:00 · 2022-12-19 10:03:18 +08:00
parent 1798ad69ed
commit 95beb2bbf6
10 changed files with 76 additions and 118 deletions
--- a/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.cc
+++ b/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h"
-
+#include "fastdeploy/utils/perf.h"
 namespace fastdeploy {
 RKNPU2Backend::~RKNPU2Backend() {
  // Release memory uniformly here
@@ -190,7 +190,6 @@ bool RKNPU2Backend::GetModelInputOutputInfos() {
      FDERROR << "rknpu2_backend only support input format is NHWC or UNDEFINED" << std::endl;
    }

-    DumpTensorAttr(input_attrs_[i]);

    // copy input_attrs_ to input tensor info
    std::string temp_name = input_attrs_[i].name;
@@ -199,16 +198,13 @@ bool RKNPU2Backend::GetModelInputOutputInfos() {
    for (int j = 0; j < input_attrs_[i].n_dims; j++) {
      temp_shape[j] = (int)input_attrs_[i].dims[j];
    }
-    FDDataType temp_dtype =
-        fastdeploy::RKNPU2Backend::RknnTensorTypeToFDDataType(
-            input_attrs_[i].type);
+    FDDataType temp_dtype = fastdeploy::RKNPU2Backend::RknnTensorTypeToFDDataType(input_attrs_[i].type);
    TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype};
    inputs_desc_[i] = temp_input_info;
  }

  // Get detailed output parameters
-  output_attrs_ =
-      (rknn_tensor_attr*)malloc(sizeof(rknn_tensor_attr) * io_num.n_output);
+  output_attrs_ = (rknn_tensor_attr*)malloc(sizeof(rknn_tensor_attr) * io_num.n_output);
  memset(output_attrs_, 0, io_num.n_output * sizeof(rknn_tensor_attr));
  outputs_desc_.resize(io_num.n_output);

@@ -225,19 +221,13 @@ bool RKNPU2Backend::GetModelInputOutputInfos() {
      return false;
    }

-    // If the output dimension is 3, the runtime will automatically change it to 4. 
+    // If the output dimension is 3, the runtime will automatically change it to 4.
    // Obviously, this is wrong, and manual correction is required here.
    int n_dims = output_attrs_[i].n_dims;
    if((n_dims == 4) && (output_attrs_[i].dims[3] == 1)){
      n_dims--;
-      FDWARNING << "The output[" 
-                << i
-                << "].shape[3] is 1, remove this dim." 
-                << std::endl;
    }

-    DumpTensorAttr(output_attrs_[i]);
-
    // copy output_attrs_ to output tensor
    std::string temp_name = output_attrs_[i].name;
    std::vector<int> temp_shape{};
@@ -246,9 +236,8 @@ bool RKNPU2Backend::GetModelInputOutputInfos() {
      temp_shape[j] = (int)output_attrs_[i].dims[j];
    }

-    FDDataType temp_dtype =
-        fastdeploy::RKNPU2Backend::RknnTensorTypeToFDDataType(
-            output_attrs_[i].type);
+    // The data type of output data is changed to FP32
+    FDDataType temp_dtype = FDDataType::FP32;
    TensorInfo temp_input_info = {temp_name, temp_shape, temp_dtype};
    outputs_desc_[i] = temp_input_info;
  }
@@ -265,11 +254,12 @@ bool RKNPU2Backend::GetModelInputOutputInfos() {
 void RKNPU2Backend::DumpTensorAttr(rknn_tensor_attr& attr) {
  printf("index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], "
         "n_elems=%d, size=%d, fmt=%s, type=%s, "
-         "qnt_type=%s, zp=%d, scale=%f\n",
+         "qnt_type=%s, zp=%d, scale=%f, pass_through=%d",
         attr.index, attr.name, attr.n_dims, attr.dims[0], attr.dims[1],
         attr.dims[2], attr.dims[3], attr.n_elems, attr.size,
         get_format_string(attr.fmt), get_type_string(attr.type),
-         get_qnt_type_string(attr.qnt_type), attr.zp, attr.scale);
+         get_qnt_type_string(attr.qnt_type), attr.zp, attr.scale,
+         attr.pass_through);
 }

 TensorInfo RKNPU2Backend::GetInputInfo(int index) {
@@ -320,7 +310,12 @@ bool RKNPU2Backend::Infer(std::vector<FDTensor>& inputs,
      input_attrs_[i].type = input_type;
      input_attrs_[i].size = inputs[0].Nbytes();
      input_attrs_[i].size_with_stride = inputs[0].Nbytes();
-      input_attrs_[i].pass_through = 0;
+      if(input_attrs_[i].type == RKNN_TENSOR_FLOAT16 ||
+          input_attrs_[i].type == RKNN_TENSOR_FLOAT32){
+        FDINFO << "The input model is not a quantitative model. "
+                  "Close the normalize operation." << std::endl;
+      }
+
      input_mems_[i] = rknn_create_mem(ctx, inputs[i].Nbytes());
      if (input_mems_[i] == nullptr) {
        FDERROR << "rknn_create_mem input_mems_ error." << std::endl;
@@ -345,11 +340,13 @@ bool RKNPU2Backend::Infer(std::vector<FDTensor>& inputs,
        FDERROR << "rknn_create_mem output_mems_ error." << std::endl;
        return false;
      }
-      if(output_attrs_[i].type == RKNN_TENSOR_FLOAT16){
-        output_attrs_[i].type = RKNN_TENSOR_FLOAT32;
-      }
+
+      // The data type of output data is changed to FP32
+      output_attrs_[i].type = RKNN_TENSOR_FLOAT32;
+
      // default output type is depend on model, this requires float32 to compute top5
      ret = rknn_set_io_mem(ctx, output_mems_[i], &output_attrs_[i]);
+
      // set output memory and attribute
      if (ret != RKNN_SUCC) {
        FDERROR << "output tensor memory rknn_set_io_mem fail! ret=" << ret
@@ -360,7 +357,7 @@ bool RKNPU2Backend::Infer(std::vector<FDTensor>& inputs,

    this->infer_init = true;
  }
-  
+
  // Copy input data to input tensor memory
  for (uint32_t i = 0; i < io_num.n_input; i++) {
    uint32_t width = input_attrs_[i].dims[2];
@@ -376,7 +373,6 @@ bool RKNPU2Backend::Infer(std::vector<FDTensor>& inputs,
      return false;
    }
  }
-  

  // run rknn
  ret = rknn_run(ctx, nullptr);
@@ -395,8 +391,6 @@ bool RKNPU2Backend::Infer(std::vector<FDTensor>& inputs,
    }
    (*outputs)[i].Resize(temp_shape, outputs_desc_[i].dtype,
                         outputs_desc_[i].name);
-    std::vector<float>  output_scale = {output_attrs_[i].scale};
-    (*outputs)[i].SetQuantizationInfo(output_attrs_[i].zp, output_scale);
    memcpy((*outputs)[i].MutableData(), (float*)output_mems_[i]->virt_addr,
           (*outputs)[i].Nbytes());
  }