// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "paddle2onnx/mapper/quantize/dequantize_linear.h" namespace paddle2onnx { REGISTER_MAPPER(dequantize_linear, DequantizeLinearMapper) int32_t DequantizeLinearMapper::GetMinOpset(bool verbose) { if (!IsConstantInput("Scale")) { Error() << "Input `Scale` requires to be a constant tensor." << std::endl; return -1; } std::vector scales; if (!TryGetInputValue("Scale", &scales)) { Error() << "Failed to read tensor value of `Scale`." << std::endl; return -1; } if (bit_length_ != 8) { Error() << "Only support bit_length = 8." << std::endl; return -1; } if (scales.size() > 1) { auto x_info = GetInput("X"); if (x_info[0].shape[quant_axis_] != scales.size()) { Error() << "Scale size must equal to the size of input quantize axis." << std::endl; return -1; } Logger(verbose, 13) << "While size of scales greater than 1, " << RequireOpset(13) << std::endl; return 13; } auto x_info = GetInput("X"); auto x_shape = x_info[0].shape; if (x_shape.size() == 2) { if (quant_axis_ != 1) { Error() << "When the rank of input is 2, the attribute quant_axis " "requires to be 1." << std::endl; return -1; } } else if (x_shape.size() == 4) { if (!(quant_axis_ == 1 || quant_axis_ == 0)) { Error() << "When the rank of input is 4, the attribute quant_axis " "requires to be 0 or 1." << std::endl; return -1; } } Logger(verbose, 10) << RequireOpset(10) << std::endl; return 10; } void DequantizeLinearMapper::ConvertInt8ToFp32( const std::vector &onnx_scales, std::vector *weight) { auto x_info = GetInput("X"); auto x_shape = x_info[0].shape; if (x_shape.size() == 2) { for (auto j = 0; j < x_shape[1]; ++j) { float scale_value = 0; if (onnx_scales.size() == 1) { scale_value = onnx_scales[0]; } else { scale_value = onnx_scales[j]; } for (auto i = 0; i < x_shape[0]; ++i) { auto offset = i * x_shape[1] + j; (*weight)[offset] *= scale_value; } } } else if (x_shape.size() == 4) { if (quant_axis_ == 0) { auto inner_offset = 1; for (auto i : x_shape) { inner_offset *= i; } inner_offset /= x_shape[0]; for (int i = 0; i < x_shape[0]; ++i) { float scale_value = 0; if (onnx_scales.size() == 1) { scale_value = onnx_scales[0]; } else { scale_value = onnx_scales[i]; } for (auto j = 0; j < inner_offset; ++j) { auto offset = i * inner_offset + j; (*weight)[offset] *= scale_value; } } } else { auto inner_offset = x_shape[2] * x_shape[3]; auto outter_offset = x_shape[1] * inner_offset; for (auto i = 0; i < x_shape[0]; ++i) { for (auto j = 0; j < x_shape[1]; ++j) { float scale_value = 0; if (onnx_scales.size() == 1) { scale_value = onnx_scales[0]; } else { scale_value = onnx_scales[j]; } for (auto k = 0; k < inner_offset; k++) { auto offset = i * outter_offset + j * inner_offset + k; (*weight)[offset] *= scale_value; } } } } } } void DequantizeLinearMapper::Opset10() { auto x_info = GetInput("X"); auto x_shape = x_info[0].shape; std::vector scales; Assert(TryGetInputValue("Scale", &scales), "Failed to read tensor value of `Scale`."); std::vector onnx_scales; onnx_scales.reserve(scales.size()); for (auto &i : scales) { onnx_scales.push_back(i / 127); } std::vector onnx_zeros(onnx_scales.size(), 0); std::string scale_node, zero_node; if (onnx_zeros.size() == 1) { scale_node = helper_->Constant({}, ONNX_NAMESPACE::TensorProto::FLOAT, onnx_scales[0]); zero_node = helper_->Constant({}, ONNX_NAMESPACE::TensorProto::INT8, onnx_zeros[0]); } else { scale_node = helper_->Constant(ONNX_NAMESPACE::TensorProto::FLOAT, onnx_scales); zero_node = helper_->Constant(ONNX_NAMESPACE::TensorProto::INT8, onnx_zeros); } std::vector weight; TryGetInputValue("X", &weight); if (weight.empty()) { auto node = helper_->MakeNode("DequantizeLinear", {x_info[0].name, scale_node, zero_node}, {GetOutput("Y")[0].name}); if (helper_->GetOpsetVersion() >= 13) { AddAttribute(node, "axis", quant_axis_); } QuantizeInfo quantize_info(onnx_scales, onnx_zeros, scale_node, zero_node, quant_axis_); helper_->quantize_info[GetOutput("Y")[0].name] = quantize_info; return; } ConvertInt8ToFp32(onnx_scales, &weight); QuantizeInfo quantize_info(onnx_scales, onnx_zeros, scale_node, zero_node, quant_axis_); helper_->quantize_info[x_info[0].name] = quantize_info; Weight fp32_weight; fp32_weight.set(P2ODataType::FP32, x_shape, weight); helper_->updated_params[x_info[0].name] = fp32_weight; auto node = helper_->MakeNode("QuantizeLinear", {x_info[0].name, scale_node, zero_node}); if (helper_->GetOpsetVersion() >= 13) { AddAttribute(node, "axis", quant_axis_); } auto dq_node = helper_->MakeNode("DequantizeLinear", {node->output(0), scale_node, zero_node}, {GetOutput("Y")[0].name}); if (helper_->GetOpsetVersion() >= 13) { AddAttribute(dq_node, "axis", quant_axis_); } } } // namespace paddle2onnx