support c4 attn && fix cache

2025-10-05 16:48:03 +08:00 · 2025-07-23 23:51:28 +08:00
parent 832d25334a
commit 29c3292f02
16 changed files with 198 additions and 65 deletions
--- a/fastdeploy/model_executor/layers/attention/xpu_attn_backend.py
+++ b/fastdeploy/model_executor/layers/attention/xpu_attn_backend.py
@@ -146,6 +146,7 @@ class XPUAttentionBackend(AttentionBackend):
    def get_kv_cache_shape(
        self,
        max_num_blocks: int,
+        kv_cache_quant_type: str = None,
    ) -> Tuple[int, int, int, int]:
        """
        Caculate kv cache shape