[RKNPU2] Update quantitative model (#879)

* 对RKNPU2后端进行修改，当模型为非量化模型时，不在NPU执行normalize操作，当模型为量化模型时，在NUP上执行normalize操作 * 更新RKNPU2框架，输出数据的数据类型统一返回fp32类型 * 更新scrfd，拆分disable_normalize和disable_permute * 更新scrfd代码，支持量化 * 更新scrfd python example代码 * 更新模型转换代码，支持量化模型 * 更新文档 * 按照要求修改 * 按照要求修改 * 修正模型转换文档 * 更新一下转换脚本
2025-11-01 12:22:53 +08:00 · 2022-12-19 13:58:43 +08:00
parent 383887dede
commit dc13eb7049
18 changed files with 92 additions and 148 deletions
--- a/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.cc
+++ b/fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 #include "fastdeploy/backends/rknpu/rknpu2/rknpu2_backend.h"
-#include "fastdeploy/utils/perf.h"
+
 namespace fastdeploy {
 RKNPU2Backend::~RKNPU2Backend() {
  // Release memory uniformly here
@@ -254,12 +254,11 @@ bool RKNPU2Backend::GetModelInputOutputInfos() {
 void RKNPU2Backend::DumpTensorAttr(rknn_tensor_attr& attr) {
  printf("index=%d, name=%s, n_dims=%d, dims=[%d, %d, %d, %d], "
         "n_elems=%d, size=%d, fmt=%s, type=%s, "
-         "qnt_type=%s, zp=%d, scale=%f, pass_through=%d",
+         "qnt_type=%s, zp=%d, scale=%f\n",
         attr.index, attr.name, attr.n_dims, attr.dims[0], attr.dims[1],
         attr.dims[2], attr.dims[3], attr.n_elems, attr.size,
         get_format_string(attr.fmt), get_type_string(attr.type),
-         get_qnt_type_string(attr.qnt_type), attr.zp, attr.scale,
-         attr.pass_through);
+         get_qnt_type_string(attr.qnt_type), attr.zp, attr.scale);
 }

 TensorInfo RKNPU2Backend::GetInputInfo(int index) {
@@ -310,12 +309,7 @@ bool RKNPU2Backend::Infer(std::vector<FDTensor>& inputs,
      input_attrs_[i].type = input_type;
      input_attrs_[i].size = inputs[0].Nbytes();
      input_attrs_[i].size_with_stride = inputs[0].Nbytes();
-      if(input_attrs_[i].type == RKNN_TENSOR_FLOAT16 ||
-          input_attrs_[i].type == RKNN_TENSOR_FLOAT32){
-        FDINFO << "The input model is not a quantitative model. "
-                  "Close the normalize operation." << std::endl;
-      }
-
+      input_attrs_[i].pass_through = 0;
      input_mems_[i] = rknn_create_mem(ctx, inputs[i].Nbytes());
      if (input_mems_[i] == nullptr) {
        FDERROR << "rknn_create_mem input_mems_ error." << std::endl;
@@ -346,7 +340,6 @@ bool RKNPU2Backend::Infer(std::vector<FDTensor>& inputs,

      // default output type is depend on model, this requires float32 to compute top5
      ret = rknn_set_io_mem(ctx, output_mems_[i], &output_attrs_[i]);
-
      // set output memory and attribute
      if (ret != RKNN_SUCC) {
        FDERROR << "output tensor memory rknn_set_io_mem fail! ret=" << ret
@@ -357,7 +350,7 @@ bool RKNPU2Backend::Infer(std::vector<FDTensor>& inputs,

    this->infer_init = true;
  }
-
+  
  // Copy input data to input tensor memory
  for (uint32_t i = 0; i < io_num.n_input; i++) {
    uint32_t width = input_attrs_[i].dims[2];