Files
FastDeploy/tools/auto_compression/fd_auto_compress/fd_auto_compress.py
yunyaoXYY efa634ebf8 [Quantization] Change the usage of FastDeploy auto compression tool. (#576)
* Add PaddleOCR Support

* Add PaddleOCR Support

* Add PaddleOCRv3 Support

* Add PaddleOCRv3 Support

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Add PaddleOCRv3 Support

* Add PaddleOCRv3 Supports

* Add PaddleOCRv3 Suport

* Fix Rec diff

* Remove useless functions

* Remove useless comments

* Add PaddleOCRv2 Support

* Add PaddleOCRv3 & PaddleOCRv2 Support

* remove useless parameters

* Add utils of sorting det boxes

* Fix code naming convention

* Fix code naming convention

* Fix code naming convention

* Fix bug in the Classify process

* Imporve OCR Readme

* Fix diff in Cls model

* Update Model Download Link in Readme

* Fix diff in PPOCRv2

* Improve OCR readme

* Imporve OCR readme

* Improve OCR readme

* Improve OCR readme

* Imporve OCR readme

* Improve OCR readme

* Fix conflict

* Add readme for OCRResult

* Improve OCR readme

* Add OCRResult readme

* Improve OCR readme

* Improve OCR readme

* Add Model Quantization Demo

* Fix Model Quantization Readme

* Fix Model Quantization Readme

* Add the function to do PTQ quantization

* Improve quant tools readme

* Improve quant tool readme

* Improve quant tool readme

* Add PaddleInference-GPU for OCR Rec model

* Add QAT method to fastdeploy-quantization tool

* Remove examples/slim for now

* Move configs folder

* Add Quantization Support for Classification Model

* Imporve ways of importing preprocess

* Upload YOLO Benchmark on readme

* Upload YOLO Benchmark on readme

* Upload YOLO Benchmark on readme

* Improve Quantization configs and readme

* Add support for multi-inputs model

* Add backends and params file for YOLOv7

* Add quantized model deployment support for YOLO series

* Fix YOLOv5 quantize readme

* Fix YOLO quantize readme

* Fix YOLO quantize readme

* Improve quantize YOLO readme

* Improve quantize YOLO readme

* Improve quantize YOLO readme

* Improve quantize YOLO readme

* Improve quantize YOLO readme

* Fix bug, change Fronted to ModelFormat

* Change Fronted to ModelFormat

* Add examples to deploy quantized paddleclas models

* Fix readme

* Add quantize Readme

* Add quantize Readme

* Add quantize Readme

* Modify readme of quantization tools

* Modify readme of quantization tools

* Improve quantization tools readme

* Improve quantization readme

* Improve PaddleClas quantized model deployment  readme

* Add PPYOLOE-l quantized deployment examples

* Improve quantization tools readme

* Improve Quantize Readme

* Fix conflicts

* Fix conflicts

* improve readme

* Improve quantization tools and readme

* Improve quantization tools and readme

* Add quantized deployment examples for PaddleSeg model

* Fix cpp readme

* Fix memory leak of reader_wrapper function

* Fix model file name in PaddleClas quantization examples

* Update Runtime and E2E benchmark

* Update Runtime and E2E benchmark

* Rename quantization tools to auto compression tools

* Remove PPYOLOE data when deployed on MKLDNN

* Fix readme

* Support PPYOLOE with OR without NMS and update readme

* Update Readme

* Update configs and readme

* Update configs and readme

* Add Paddle-TensorRT backend in quantized model deploy examples

* Support PPYOLOE+ series

* Add reused_input_tensors for PPYOLOE

* Improve fastdeploy tools usage

* improve fastdeploy tool

* Improve fastdeploy auto compression tool

* Improve fastdeploy auto compression tool

* Improve fastdeploy auto compression tool

* Improve fastdeploy auto compression tool

* Improve fastdeploy auto compression tool

* remove modify

* Improve fastdeploy auto compression tool

* Improve fastdeploy auto compression tool

* Improve fastdeploy auto compression tool

* Improve fastdeploy auto compression tool

* Improve fastdeploy auto compression tool
2022-11-14 15:16:14 +08:00

190 lines
6.2 KiB
Python

# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import numpy as np
import time
import argparse
from tqdm import tqdm
import paddle
from paddleslim.common import load_config, load_onnx_model
from paddleslim.auto_compression import AutoCompression
from paddleslim.quant import quant_post_static
from fd_auto_compress.dataset import *
def argsparser():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
'--config_path',
type=str,
default=None,
help="path of compression strategy config.",
required=True)
parser.add_argument(
'--method',
type=str,
default=None,
help="choose PTQ or QAT as quantization method",
required=True)
parser.add_argument(
'--save_dir',
type=str,
default='output',
help="directory to save compressed model.")
parser.add_argument(
'--devices',
type=str,
default='gpu',
help="which device used to compress.")
return parser
def reader_wrapper(reader, input_list):
if isinstance(input_list, list) and len(input_list) == 1:
input_name = input_list[0]
def gen():
in_dict = {}
for i, data in enumerate(reader()):
imgs = np.array(data[0])
in_dict[input_name] = imgs
yield in_dict
return gen
if isinstance(input_list, list) and len(input_list) > 1:
def gen():
for idx, data in enumerate(reader()):
in_dict = {}
for i in range(len(input_list)):
intput_name = input_list[i]
feed_data = np.array(data[0][i])
in_dict[intput_name] = feed_data
yield in_dict
return gen
def auto_compress(FLAGS):
#FLAGS needs parse
time_s = time.time()
paddle.enable_static()
assert FLAGS.devices in ['cpu', 'gpu', 'xpu', 'npu']
paddle.set_device(FLAGS.devices)
global global_config
if FLAGS.method == 'QAT':
all_config = load_config(FLAGS.config_path)
assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}"
global_config = all_config["Global"]
input_list = global_config['input_list']
assert os.path.exists(global_config[
'qat_image_path']), "image_path does not exist!"
paddle.vision.image.set_image_backend('cv2')
# transform could be customized.
train_dataset = paddle.vision.datasets.ImageFolder(
global_config['qat_image_path'],
transform=eval(global_config['qat_preprocess']))
train_loader = paddle.io.DataLoader(
train_dataset,
batch_size=global_config['qat_batch_size'],
shuffle=True,
drop_last=True,
num_workers=0)
train_loader = reader_wrapper(train_loader, input_list=input_list)
eval_func = None
# ACT compression
ac = AutoCompression(
model_dir=global_config['model_dir'],
model_filename=global_config['model_filename'],
params_filename=global_config['params_filename'],
train_dataloader=train_loader,
save_dir=FLAGS.save_dir,
config=all_config,
eval_callback=eval_func)
ac.compress()
# PTQ compression
if FLAGS.method == 'PTQ':
# Read Global config and prepare dataset
all_config = load_config(FLAGS.config_path)
assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}"
global_config = all_config["Global"]
input_list = global_config['input_list']
assert os.path.exists(global_config[
'ptq_image_path']), "image_path does not exist!"
paddle.vision.image.set_image_backend('cv2')
# transform could be customized.
val_dataset = paddle.vision.datasets.ImageFolder(
global_config['ptq_image_path'],
transform=eval(global_config['ptq_preprocess']))
val_loader = paddle.io.DataLoader(
val_dataset,
batch_size=1,
shuffle=True,
drop_last=True,
num_workers=0)
val_loader = reader_wrapper(val_loader, input_list=input_list)
# Read PTQ config
assert "PTQ" in all_config, f"Key 'PTQ' not found in config file. \n{all_config}"
ptq_config = all_config["PTQ"]
# Inititalize the executor
place = paddle.CUDAPlace(
0) if FLAGS.devices == 'gpu' else paddle.CPUPlace()
exe = paddle.static.Executor(place)
# Read ONNX or PADDLE format model
if global_config['format'] == 'onnx':
load_onnx_model(global_config["model_dir"])
inference_model_path = global_config["model_dir"].rstrip().rstrip(
'.onnx') + '_infer'
else:
inference_model_path = global_config["model_dir"].rstrip('/')
quant_post_static(
executor=exe,
model_dir=inference_model_path,
quantize_model_path=FLAGS.save_dir,
data_loader=val_loader,
model_filename=global_config["model_filename"],
params_filename=global_config["params_filename"],
batch_size=32,
batch_nums=10,
algo=ptq_config['calibration_method'],
hist_percent=0.999,
is_full_quantize=False,
bias_correction=False,
onnx_format=True,
skip_tensor_list=ptq_config['skip_tensor_list']
if 'skip_tensor_list' in ptq_config else None)
time_total = time.time() - time_s
print("Finish Compression, total time used is : ", time_total, "seconds.")