support c4 attn && fix cache

This commit is contained in:
lizhenyun01
2025-07-23 23:51:28 +08:00
parent 832d25334a
commit 29c3292f02
16 changed files with 198 additions and 65 deletions

View File

@@ -146,6 +146,7 @@ class XPUAttentionBackend(AttentionBackend):
def get_kv_cache_shape(
self,
max_num_blocks: int,
kv_cache_quant_type: str = None,
) -> Tuple[int, int, int, int]:
"""
Caculate kv cache shape