[Iluvatar GPU] Optimize attention performance and fix moe load ckpt error (#3651)

This commit is contained in:
yzwu
2025-09-22 21:13:59 +08:00
committed by GitHub
parent 5532e8a323
commit 504461b6b5
17 changed files with 1344 additions and 363 deletions

View File

@@ -1186,9 +1186,7 @@ class CacheConfig:
self.kv_cache_ratio = 1.0
else:
self.kv_cache_ratio = 0.75
self.enc_dec_block_num = (
0 if current_platform.is_iluvatar() or current_platform.is_maca() else envs.FD_ENC_DEC_BLOCK_NUM
)
self.enc_dec_block_num = 0 if current_platform.is_maca() else envs.FD_ENC_DEC_BLOCK_NUM
self.prealloc_dec_block_slot_num_threshold = 12
self.cache_dtype = "bfloat16"
self.model_cfg = None