FastDeploy/tools/auto_compression/fd_auto_compress/fd_auto_compress.py

# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import sys
import numpy as np
import time
import argparse
from tqdm import tqdm
import paddle
from paddleslim.common import load_config, load_onnx_model
from paddleslim.auto_compression import AutoCompression
from paddleslim.quant import quant_post_static
from fd_auto_compress.dataset import *


def argsparser():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        '--config_path',
        type=str,
        default=None,
        help="path of compression strategy config.",
        required=True)
    parser.add_argument(
        '--method',
        type=str,
        default=None,
        help="choose PTQ or QAT as quantization method",
        required=True)
    parser.add_argument(
        '--save_dir',
        type=str,
        default='output',
        help="directory to save compressed model.")
    parser.add_argument(
        '--devices',
        type=str,
        default='gpu',
        help="which device used to compress.")

    return parser


def reader_wrapper(reader, input_list):

    if isinstance(input_list, list) and len(input_list) == 1:
        input_name = input_list[0]

        def gen():
            in_dict = {}
            for i, data in enumerate(reader()):
                imgs = np.array(data[0])
                in_dict[input_name] = imgs
                yield in_dict

        return gen

    if isinstance(input_list, list) and len(input_list) > 1:

        def gen():
            for idx, data in enumerate(reader()):
                in_dict = {}
                for i in range(len(input_list)):
                    intput_name = input_list[i]
                    feed_data = np.array(data[0][i])
                    in_dict[intput_name] = feed_data

                yield in_dict

        return gen


def auto_compress(FLAGS):

    #FLAGS needs parse
    time_s = time.time()
    paddle.enable_static()

    assert FLAGS.devices in ['cpu', 'gpu', 'xpu', 'npu']
    paddle.set_device(FLAGS.devices)
    global global_config

    if FLAGS.method == 'QAT':

        all_config = load_config(FLAGS.config_path)
        assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}"
        global_config = all_config["Global"]
        input_list = global_config['input_list']

        assert os.path.exists(global_config[
            'qat_image_path']), "image_path does not exist!"
        paddle.vision.image.set_image_backend('cv2')
        # transform could be customized.
        train_dataset = paddle.vision.datasets.ImageFolder(
            global_config['qat_image_path'],
            transform=eval(global_config['qat_preprocess']))
        train_loader = paddle.io.DataLoader(
            train_dataset,
            batch_size=global_config['qat_batch_size'],
            shuffle=True,
            drop_last=True,
            num_workers=0)
        train_loader = reader_wrapper(train_loader, input_list=input_list)
        eval_func = None

        # ACT compression
        ac = AutoCompression(
            model_dir=global_config['model_dir'],
            model_filename=global_config['model_filename'],
            params_filename=global_config['params_filename'],
            train_dataloader=train_loader,
            save_dir=FLAGS.save_dir,
            config=all_config,
            eval_callback=eval_func)
        ac.compress()

    # PTQ compression
    if FLAGS.method == 'PTQ':

        # Read Global config and prepare dataset
        all_config = load_config(FLAGS.config_path)
        assert "Global" in all_config, f"Key 'Global' not found in config file. \n{all_config}"
        global_config = all_config["Global"]
        input_list = global_config['input_list']

        assert os.path.exists(global_config[
            'ptq_image_path']), "image_path does not exist!"

        paddle.vision.image.set_image_backend('cv2')
        # transform could be customized.
        val_dataset = paddle.vision.datasets.ImageFolder(
            global_config['ptq_image_path'],
            transform=eval(global_config['ptq_preprocess']))
        val_loader = paddle.io.DataLoader(
            val_dataset,
            batch_size=1,
            shuffle=True,
            drop_last=True,
            num_workers=0)
        val_loader = reader_wrapper(val_loader, input_list=input_list)

        # Read PTQ config
        assert "PTQ" in all_config, f"Key 'PTQ' not found in config file. \n{all_config}"
        ptq_config = all_config["PTQ"]

        # Inititalize the executor
        place = paddle.CUDAPlace(
            0) if FLAGS.devices == 'gpu' else paddle.CPUPlace()
        exe = paddle.static.Executor(place)

        # Read ONNX or PADDLE format model
        if global_config['format'] == 'onnx':
            load_onnx_model(global_config["model_dir"])
            inference_model_path = global_config["model_dir"].rstrip().rstrip(
                '.onnx') + '_infer'
        else:
            inference_model_path = global_config["model_dir"].rstrip('/')

        quant_post_static(
            executor=exe,
            model_dir=inference_model_path,
            quantize_model_path=FLAGS.save_dir,
            data_loader=val_loader,
            model_filename=global_config["model_filename"],
            params_filename=global_config["params_filename"],
            batch_size=32,
            batch_nums=10,
            algo=ptq_config['calibration_method'],
            hist_percent=0.999,
            is_full_quantize=False,
            bias_correction=False,
            onnx_format=True,
            skip_tensor_list=ptq_config['skip_tensor_list']
            if 'skip_tensor_list' in ptq_config else None)

    time_total = time.time() - time_s
    print("Finish Compression, total time used is : ", time_total, "seconds.")