mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 09:07:10 +08:00

* Add notes for tensors * Optimize some apis * move some warnings * Support build with Paddle2ONNX * Add protobuf support * Fix compile on mac * add clearn package script * Add paddle2onnx code * remove submodule * Add onnx ocde * remove softlink * add onnx code * fix error * Add cmake file * fix patchelf * update paddle2onnx * Delete .gitmodules --------- Co-authored-by: PaddleCI <paddle_ci@example.com> Co-authored-by: pangyoki <pangyoki@126.com> Co-authored-by: jiangjiajun <jiangjiajun@baidu.lcom>
134 lines
5.4 KiB
C++
134 lines
5.4 KiB
C++
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#pragma once
|
|
#include <onnx/onnx_pb.h>
|
|
|
|
#include <cmath>
|
|
#include <fstream>
|
|
#include <iomanip>
|
|
|
|
#include "paddle2onnx/mapper/mapper.h"
|
|
#include "paddle2onnx/parser/parser.h"
|
|
namespace paddle2onnx {
|
|
|
|
struct QuantizeModelProcessor {
|
|
public:
|
|
std::vector<QuantizeInfo> quantize_info;
|
|
const PaddleParser* parser_;
|
|
OnnxHelper* helper_;
|
|
|
|
std::vector<std::shared_ptr<ONNX_NAMESPACE::NodeProto>>* parameters_;
|
|
std::vector<std::shared_ptr<ONNX_NAMESPACE::ValueInfoProto>>* inputs_;
|
|
std::vector<std::shared_ptr<ONNX_NAMESPACE::ValueInfoProto>>* outputs_;
|
|
std::vector<std::shared_ptr<ONNX_NAMESPACE::NodeProto>>* nodes_;
|
|
// All types that support quantization
|
|
std::vector<std::string> supported_quantize_type_;
|
|
|
|
std::map<std::string, std::vector<std::shared_ptr<ONNX_NAMESPACE::NodeProto>>>
|
|
name2node_dict_;
|
|
std::vector<std::string> tensors_to_be_quantize; // records those tensors
|
|
// that need to add quantize
|
|
// and dequantize op
|
|
std::vector<std::string> only_dequantize_tensors; // records those tensors
|
|
// that only need to add
|
|
// the dequantize op
|
|
// Convert to different model formats based on backend, backend can be
|
|
// TensorRT, ONNXRuntime and Others
|
|
void ProcessQuantizeModel(
|
|
std::vector<std::shared_ptr<ONNX_NAMESPACE::NodeProto>>* parameters,
|
|
std::vector<std::shared_ptr<ONNX_NAMESPACE::ValueInfoProto>>* inputs,
|
|
std::vector<std::shared_ptr<ONNX_NAMESPACE::ValueInfoProto>>* outputs,
|
|
std::vector<std::shared_ptr<ONNX_NAMESPACE::NodeProto>>* nodes,
|
|
OnnxHelper* helper, const std::string& deploy_backend,
|
|
const PaddleParser& parser, std::string* calibration_cache = nullptr);
|
|
|
|
// Remove all Quantize and Dequantize ops
|
|
void RemoveAllQuantizeOps();
|
|
|
|
// If all tensors in tensor_names have quantize info and all the next nodes
|
|
// can be quantized, return True, otherwise
|
|
// return false
|
|
bool CanBeQuantize(const std::vector<std::string>& tensor_names,
|
|
const std::vector<int64_t>& output_index = {-1});
|
|
// only_dequantize records those tensors that only need to add the dequantize
|
|
// op
|
|
void AppendQuantizeTensor(const std::string& tensor,
|
|
const bool& only_dequantize = false);
|
|
|
|
// Add QDQ for ORT according to:
|
|
// https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.cc
|
|
void AddQDQForORT();
|
|
|
|
// Determine if the tensor is directly linked to the output by identity
|
|
bool ConnectToOutput(const std::string& output_name);
|
|
|
|
// Generate cache file for TensorRT8.X int8 deploy
|
|
void GenerateCache(std::string* calibration_cache);
|
|
|
|
// Add QDQ for TRT according to:
|
|
// https://github.com/NVIDIA/TensorRT/tree/main/tools/pytorch-quantization/pytorch_quantization/nn/modules
|
|
void AddTrtQDQ();
|
|
|
|
// Add QDQ for RKNN
|
|
void AddQDQForRKNN();
|
|
|
|
// Add quantize related op in model according to tensor names
|
|
void AddQDQInModel(const std::vector<std::string>& tensors_to_be_quantize);
|
|
|
|
void QuantizeInfoBroadcast();
|
|
|
|
// merge conv + add
|
|
void MergeConvAdd();
|
|
|
|
// merge conv + BN
|
|
void MergeConvBN();
|
|
|
|
// Determine whether a tensor is an output
|
|
bool IsGraphOutput(const std::string& name);
|
|
|
|
// Because processing the quantize model will add new nodes, which will
|
|
// destroy the topo sorting of nodes, this function will sort the nodes again
|
|
void SortNodes();
|
|
|
|
bool GetTensorShape(const std::string& name, std::vector<int64_t>* shape);
|
|
|
|
// return the value of tensor by name
|
|
template <typename T>
|
|
bool GetTensorByName(const std::string& name, std::vector<T>* value);
|
|
|
|
// Perform tensor wise quantization, returning scale and zero
|
|
void GetTensorWiseQuantizeInfo(const std::vector<float>& tensor,
|
|
std::vector<float>* scale,
|
|
std::vector<int64_t>* zero);
|
|
|
|
// Perform channel wise quantization, returning scale and zero
|
|
void GetChannelWiseQuantizeInfo(const std::vector<float>& tensor,
|
|
const std::vector<int64_t>& shape,
|
|
const int64_t& quant_axis,
|
|
std::vector<float>* scale,
|
|
std::vector<int64_t>* zero);
|
|
|
|
// Generate name2node_dict to save input name and its related nodes
|
|
void UpdateInputNameToNodes();
|
|
|
|
void RemoveNodeByName(const std::string& name, const bool& update_io = true);
|
|
|
|
void ReplaceInputOfAllNodes(
|
|
const std::string& old_name, const std::string& new_name,
|
|
const std::vector<std::shared_ptr<ONNX_NAMESPACE::NodeProto>>&
|
|
except_nodes = {});
|
|
};
|
|
} // namespace paddle2onnx
|