mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 00:33:03 +08:00
Sync v2.0 version of code to github repo
This commit is contained in:
@@ -19,11 +19,18 @@ from typing import Dict, List, Type
|
||||
from .quant_base import QuantConfigBase
|
||||
|
||||
QUANTIZATION_METHODS: List[str] = [
|
||||
"wint2",
|
||||
"wint4",
|
||||
"wint8",
|
||||
"weight_only",
|
||||
"block_wise",
|
||||
"block_wise_fp8",
|
||||
"w4afp8",
|
||||
"w8a8",
|
||||
"w4a8",
|
||||
"wfp8afp8",
|
||||
"mix_quant",
|
||||
"tensor_wise_fp8",
|
||||
"kvcache",
|
||||
]
|
||||
|
||||
|
||||
@@ -34,20 +41,30 @@ def get_quantization_config(quantization: str) -> Type[QuantConfigBase]:
|
||||
if quantization not in QUANTIZATION_METHODS:
|
||||
raise ValueError(f"Invalid quantization method: {quantization}")
|
||||
|
||||
from .block_wise import BlockWiseConfig
|
||||
from .block_wise_fp8 import BlockWiseFP8Config
|
||||
from .kv_cache import KvCacheQuantConfig
|
||||
from .mix_quant import MixQuantConfig
|
||||
from .tensor_wise_fp8 import TensorWiseFP8Config
|
||||
from .w4a8 import W4A8Config
|
||||
from .w4afp8 import W4AFP8Config
|
||||
from .w8a8 import W8A8Config
|
||||
from .weight_only import WeightOnlyConfig
|
||||
from .weight_only import WeightOnlyConfig, WINT4Config, WINT8Config
|
||||
from .wfp8afp8 import WFP8AFP8Config
|
||||
from .kv_cache import KvCacheQuantConfig
|
||||
|
||||
from .wint2 import WINT2Config
|
||||
|
||||
method_to_config: Dict[str, Type[QuantConfigBase]] = {
|
||||
"wint2": WINT2Config,
|
||||
"wint4": WINT4Config,
|
||||
"wint8": WINT8Config,
|
||||
"weight_only": WeightOnlyConfig,
|
||||
"block_wise": BlockWiseConfig,
|
||||
"block_wise_fp8": BlockWiseFP8Config,
|
||||
"w4afp8": W4AFP8Config,
|
||||
"w8a8": W8A8Config,
|
||||
"w4a8": W4A8Config,
|
||||
"wfp8afp8": WFP8AFP8Config,
|
||||
"kvcache": KvCacheQuantConfig
|
||||
"tensor_wise_fp8": TensorWiseFP8Config,
|
||||
"kvcache": KvCacheQuantConfig,
|
||||
"mix_quant": MixQuantConfig,
|
||||
}
|
||||
|
||||
return method_to_config[quantization]
|
||||
|
Reference in New Issue
Block a user