support c4 attn && fix cache

This commit is contained in:
lizhenyun01
2025-07-23 23:51:28 +08:00
parent 832d25334a
commit 29c3292f02
16 changed files with 198 additions and 65 deletions

View File

@@ -34,6 +34,8 @@ class MixQuantConfig(QuantConfigBase):
moe_quant_type: str,
kv_cache_quant_type: str = None,
image_moe_quant_type: str = None,
is_channel_wise: bool = False,
has_zero_point: bool = False,
) -> None:
super().__init__()
self.dense_quant_type = dense_quant_type
@@ -43,6 +45,8 @@ class MixQuantConfig(QuantConfigBase):
self.image_moe_quant_type = moe_quant_type
else:
self.image_moe_quant_type = image_moe_quant_type
self.is_channel_wise = is_channel_wise
self.has_zero_point = has_zero_point
self.quant_max_bound = 0
self.quant_min_bound = 0
self.quant_round_type = 0
@@ -57,6 +61,8 @@ class MixQuantConfig(QuantConfigBase):
config["moe_quant_type"],
config.get("kv_cache_quant_type", None),
config.get("image_moe_quant_type", None),
config.get("is_channel_wise", False),
config.get("has_zero_point", False),
)
def get_quant_method(self, layer) -> Optional[QuantMethodBase]:
@@ -67,7 +73,11 @@ class MixQuantConfig(QuantConfigBase):
return get_quantization_config(self.moe_quant_type).from_config({}).get_quant_method(layer)
elif isinstance(layer, Attention):
if self.kv_cache_quant_type is not None:
return get_quantization_config("kvcache").from_config(self.kv_cache_quant_type).get_quant_method(layer)
return (
get_quantization_config("kvcache")
.from_config(self.kv_cache_quant_type, self.is_channel_wise, self.has_zero_point)
.get_quant_method(layer)
)
else:
return None
else: