Files
FastDeploy/fastdeploy/model_executor/layers/quantization/__init__.py
2025-06-29 23:29:37 +00:00

71 lines
2.1 KiB
Python

# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
quantization module
"""
from typing import Dict, List, Type
from .quant_base import QuantConfigBase
QUANTIZATION_METHODS: List[str] = [
"wint2",
"wint4",
"wint8",
"weight_only",
"block_wise_fp8",
"w4afp8",
"w8a8",
"w4a8",
"wfp8afp8",
"mix_quant",
"tensor_wise_fp8",
"kvcache",
]
def get_quantization_config(quantization: str) -> Type[QuantConfigBase]:
"""
Get the quantization config class by the quantization name.
"""
if quantization not in QUANTIZATION_METHODS:
raise ValueError(f"Invalid quantization method: {quantization}")
from .block_wise_fp8 import BlockWiseFP8Config
from .kv_cache import KvCacheQuantConfig
from .mix_quant import MixQuantConfig
from .tensor_wise_fp8 import TensorWiseFP8Config
from .w4a8 import W4A8Config
from .w4afp8 import W4AFP8Config
from .w8a8 import W8A8Config
from .weight_only import WeightOnlyConfig, WINT4Config, WINT8Config
from .wfp8afp8 import WFP8AFP8Config
from .wint2 import WINT2Config
method_to_config: Dict[str, Type[QuantConfigBase]] = {
"wint2": WINT2Config,
"wint4": WINT4Config,
"wint8": WINT8Config,
"weight_only": WeightOnlyConfig,
"block_wise_fp8": BlockWiseFP8Config,
"w4afp8": W4AFP8Config,
"w8a8": W8A8Config,
"w4a8": W4A8Config,
"wfp8afp8": WFP8AFP8Config,
"tensor_wise_fp8": TensorWiseFP8Config,
"kvcache": KvCacheQuantConfig,
"mix_quant": MixQuantConfig,
}
return method_to_config[quantization]