[Feature] refactor metax_gpu attention and moe and remove some useless code (#3688)

Co-authored-by: yongqiangma <xing.wo@163.com>
2025-10-05 00:33:03 +08:00 · 2025-09-12 14:40:25 +08:00
parent cab7a633fe
commit 805f29a06c
5 changed files with 389 additions and 289 deletions
--- a/fastdeploy/config.py
+++ b/fastdeploy/config.py
@@ -894,7 +894,7 @@ class CacheConfig:
            self.kv_cache_ratio = 1.0
        else:
            self.kv_cache_ratio = 0.75
-        self.enc_dec_block_num = 0 if current_platform.is_iluvatar() else 2
+        self.enc_dec_block_num = 0 if current_platform.is_iluvatar() or current_platform.is_maca() else 2
        self.prealloc_dec_block_slot_num_threshold = 12
        self.cache_dtype = "bfloat16"
        self.model_cfg = None