mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
Add Quantization Function. (#256)
* Add PaddleOCR Support * Add PaddleOCR Support * Add PaddleOCRv3 Support * Add PaddleOCRv3 Support * Update README.md * Update README.md * Update README.md * Update README.md * Add PaddleOCRv3 Support * Add PaddleOCRv3 Supports * Add PaddleOCRv3 Suport * Fix Rec diff * Remove useless functions * Remove useless comments * Add PaddleOCRv2 Support * Add PaddleOCRv3 & PaddleOCRv2 Support * remove useless parameters * Add utils of sorting det boxes * Fix code naming convention * Fix code naming convention * Fix code naming convention * Fix bug in the Classify process * Imporve OCR Readme * Fix diff in Cls model * Update Model Download Link in Readme * Fix diff in PPOCRv2 * Improve OCR readme * Imporve OCR readme * Improve OCR readme * Improve OCR readme * Imporve OCR readme * Improve OCR readme * Fix conflict * Add readme for OCRResult * Improve OCR readme * Add OCRResult readme * Improve OCR readme * Improve OCR readme * Add Model Quantization Demo * Fix Model Quantization Readme * Fix Model Quantization Readme * Add the function to do PTQ quantization * Improve quant tools readme * Improve quant tool readme * Improve quant tool readme * Add PaddleInference-GPU for OCR Rec model * Add QAT method to fastdeploy-quantization tool * Remove examples/slim for now * Move configs folder * Add Quantization Support for Classification Model * Imporve ways of importing preprocess * Upload YOLO Benchmark on readme * Upload YOLO Benchmark on readme * Upload YOLO Benchmark on readme * Improve Quantization configs and readme * Add support for multi-inputs model
This commit is contained in:
155
tools/quantization/fdquant/fdquant.py
Normal file
155
tools/quantization/fdquant/fdquant.py
Normal file
@@ -0,0 +1,155 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import sys
|
||||
import numpy as np
|
||||
import time
|
||||
import argparse
|
||||
from tqdm import tqdm
|
||||
import paddle
|
||||
from paddleslim.common import load_config, load_onnx_model
|
||||
from paddleslim.auto_compression import AutoCompression
|
||||
from paddleslim.quant import quant_post_static
|
||||
from fdquant.dataset import *
|
||||
|
||||
|
||||
def argsparser():
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
'--config_path',
|
||||
type=str,
|
||||
default=None,
|
||||
help="path of compression strategy config.",
|
||||
required=True)
|
||||
parser.add_argument(
|
||||
'--method',
|
||||
type=str,
|
||||
default=None,
|
||||
help="choose PTQ or QAT as quantization method",
|
||||
required=True)
|
||||
parser.add_argument(
|
||||
'--save_dir',
|
||||
type=str,
|
||||
default='output',
|
||||
help="directory to save compressed model.")
|
||||
parser.add_argument(
|
||||
'--devices',
|
||||
type=str,
|
||||
default='gpu',
|
||||
help="which device used to compress.")
|
||||
|
||||
return parser
|
||||
|
||||
|
||||
def reader_wrapper(reader, input_list=None):
|
||||
def gen():
|
||||
for data_list in reader:
|
||||
in_dict = {}
|
||||
for data in data_list:
|
||||
for i, input_name in enumerate(input_list):
|
||||
in_dict[input_name] = data[i]
|
||||
yield in_dict
|
||||
|
||||
return gen
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
time_s = time.time()
|
||||
|
||||
paddle.enable_static()
|
||||
parser = argsparser()
|
||||
FLAGS = parser.parse_args()
|
||||
|
||||
assert FLAGS.devices in ['cpu', 'gpu', 'xpu', 'npu']
|
||||
paddle.set_device(FLAGS.devices)
|
||||
|
||||
global global_config
|
||||
all_config = load_config(FLAGS.config_path)
|
||||
assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}"
|
||||
global_config = all_config["Global"]
|
||||
input_list = global_config['input_list']
|
||||
|
||||
assert os.path.exists(global_config[
|
||||
'image_path']), "image_path does not exist!"
|
||||
paddle.vision.image.set_image_backend('cv2')
|
||||
# transform could be customized.
|
||||
train_dataset = paddle.vision.datasets.ImageFolder(
|
||||
global_config['image_path'],
|
||||
transform=eval(global_config['preprocess']))
|
||||
train_loader = paddle.io.DataLoader(
|
||||
train_dataset,
|
||||
batch_size=1,
|
||||
shuffle=True,
|
||||
drop_last=True,
|
||||
num_workers=0)
|
||||
train_loader = reader_wrapper(train_loader, input_list=input_list)
|
||||
eval_func = None
|
||||
|
||||
# ACT compression
|
||||
if FLAGS.method == 'QAT':
|
||||
ac = AutoCompression(
|
||||
model_dir=global_config['model_dir'],
|
||||
model_filename=global_config['model_filename'],
|
||||
params_filename=global_config['params_filename'],
|
||||
train_dataloader=train_loader,
|
||||
save_dir=FLAGS.save_dir,
|
||||
config=all_config,
|
||||
eval_callback=eval_func)
|
||||
ac.compress()
|
||||
|
||||
# PTQ compression
|
||||
if FLAGS.method == 'PTQ':
|
||||
|
||||
# Read PTQ config
|
||||
assert "PTQ" in all_config, f"Key 'PTQ' not found in config file. \n{all_config}"
|
||||
ptq_config = all_config["PTQ"]
|
||||
|
||||
# Inititalize the executor
|
||||
place = paddle.CUDAPlace(
|
||||
0) if FLAGS.devices == 'gpu' else paddle.CPUPlace()
|
||||
exe = paddle.static.Executor(place)
|
||||
|
||||
# Read ONNX or PADDLE format model
|
||||
if global_config['format'] == 'onnx':
|
||||
load_onnx_model(global_config["model_dir"])
|
||||
inference_model_path = global_config["model_dir"].rstrip().rstrip(
|
||||
'.onnx') + '_infer'
|
||||
else:
|
||||
inference_model_path = global_config["model_dir"].rstrip('/')
|
||||
|
||||
quant_post_static(
|
||||
executor=exe,
|
||||
model_dir=inference_model_path,
|
||||
quantize_model_path=FLAGS.save_dir,
|
||||
data_loader=train_loader,
|
||||
model_filename=global_config["model_filename"],
|
||||
params_filename=global_config["params_filename"],
|
||||
batch_size=32,
|
||||
batch_nums=10,
|
||||
algo=ptq_config['calibration_method'],
|
||||
hist_percent=0.999,
|
||||
is_full_quantize=False,
|
||||
bias_correction=False,
|
||||
onnx_format=True,
|
||||
skip_tensor_list=ptq_config['skip_tensor_list']
|
||||
if 'skip_tensor_list' in ptq_config else None)
|
||||
|
||||
time_total = time.time() - time_s
|
||||
print("Finish Compression, total time used is : ", time_total, "seconds.")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Reference in New Issue
Block a user