Files
FastDeploy/paddle2onnx/mapper/quantize_helper.h
Jason 6343b0db47 [Build] Support build with source code of Paddle2ONNX (#1559)
* Add notes for tensors

* Optimize some apis

* move some warnings

* Support build with Paddle2ONNX

* Add protobuf support

* Fix compile on mac

* add clearn package script

* Add paddle2onnx code

* remove submodule

* Add onnx ocde

* remove softlink

* add onnx code

* fix error

* Add cmake file

* fix patchelf

* update paddle2onnx

* Delete .gitmodules

---------

Co-authored-by: PaddleCI <paddle_ci@example.com>
Co-authored-by: pangyoki <pangyoki@126.com>
Co-authored-by: jiangjiajun <jiangjiajun@baidu.lcom>
2023-03-17 10:03:22 +08:00

134 lines
5.4 KiB
C++

// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <onnx/onnx_pb.h>
#include <cmath>
#include <fstream>
#include <iomanip>
#include "paddle2onnx/mapper/mapper.h"
#include "paddle2onnx/parser/parser.h"
namespace paddle2onnx {
struct QuantizeModelProcessor {
public:
std::vector<QuantizeInfo> quantize_info;
const PaddleParser* parser_;
OnnxHelper* helper_;
std::vector<std::shared_ptr<ONNX_NAMESPACE::NodeProto>>* parameters_;
std::vector<std::shared_ptr<ONNX_NAMESPACE::ValueInfoProto>>* inputs_;
std::vector<std::shared_ptr<ONNX_NAMESPACE::ValueInfoProto>>* outputs_;
std::vector<std::shared_ptr<ONNX_NAMESPACE::NodeProto>>* nodes_;
// All types that support quantization
std::vector<std::string> supported_quantize_type_;
std::map<std::string, std::vector<std::shared_ptr<ONNX_NAMESPACE::NodeProto>>>
name2node_dict_;
std::vector<std::string> tensors_to_be_quantize; // records those tensors
// that need to add quantize
// and dequantize op
std::vector<std::string> only_dequantize_tensors; // records those tensors
// that only need to add
// the dequantize op
// Convert to different model formats based on backend, backend can be
// TensorRT, ONNXRuntime and Others
void ProcessQuantizeModel(
std::vector<std::shared_ptr<ONNX_NAMESPACE::NodeProto>>* parameters,
std::vector<std::shared_ptr<ONNX_NAMESPACE::ValueInfoProto>>* inputs,
std::vector<std::shared_ptr<ONNX_NAMESPACE::ValueInfoProto>>* outputs,
std::vector<std::shared_ptr<ONNX_NAMESPACE::NodeProto>>* nodes,
OnnxHelper* helper, const std::string& deploy_backend,
const PaddleParser& parser, std::string* calibration_cache = nullptr);
// Remove all Quantize and Dequantize ops
void RemoveAllQuantizeOps();
// If all tensors in tensor_names have quantize info and all the next nodes
// can be quantized, return True, otherwise
// return false
bool CanBeQuantize(const std::vector<std::string>& tensor_names,
const std::vector<int64_t>& output_index = {-1});
// only_dequantize records those tensors that only need to add the dequantize
// op
void AppendQuantizeTensor(const std::string& tensor,
const bool& only_dequantize = false);
// Add QDQ for ORT according to:
// https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc
void AddQDQForORT();
// Determine if the tensor is directly linked to the output by identity
bool ConnectToOutput(const std::string& output_name);
// Generate cache file for TensorRT8.X int8 deploy
void GenerateCache(std::string* calibration_cache);
// Add QDQ for TRT according to:
// https://github.com/NVIDIA/TensorRT/tree/main/tools/pytorch-quantization/pytorch_quantization/nn/modules
void AddTrtQDQ();
// Add QDQ for RKNN
void AddQDQForRKNN();
// Add quantize related op in model according to tensor names
void AddQDQInModel(const std::vector<std::string>& tensors_to_be_quantize);
void QuantizeInfoBroadcast();
// merge conv + add
void MergeConvAdd();
// merge conv + BN
void MergeConvBN();
// Determine whether a tensor is an output
bool IsGraphOutput(const std::string& name);
// Because processing the quantize model will add new nodes, which will
// destroy the topo sorting of nodes, this function will sort the nodes again
void SortNodes();
bool GetTensorShape(const std::string& name, std::vector<int64_t>* shape);
// return the value of tensor by name
template <typename T>
bool GetTensorByName(const std::string& name, std::vector<T>* value);
// Perform tensor wise quantization, returning scale and zero
void GetTensorWiseQuantizeInfo(const std::vector<float>& tensor,
std::vector<float>* scale,
std::vector<int64_t>* zero);
// Perform channel wise quantization, returning scale and zero
void GetChannelWiseQuantizeInfo(const std::vector<float>& tensor,
const std::vector<int64_t>& shape,
const int64_t& quant_axis,
std::vector<float>* scale,
std::vector<int64_t>* zero);
// Generate name2node_dict to save input name and its related nodes
void UpdateInputNameToNodes();
void RemoveNodeByName(const std::string& name, const bool& update_io = true);
void ReplaceInputOfAllNodes(
const std::string& old_name, const std::string& new_name,
const std::vector<std::shared_ptr<ONNX_NAMESPACE::NodeProto>>&
except_nodes = {});
};
} // namespace paddle2onnx