Unify server-side and model-side Config (Part1) (#3018)

* move cache config * fix mtp
2025-10-12 20:11:20 +08:00 · 2025-07-28 10:51:52 +08:00
parent 8f426c1690
commit 6ccc10ad47
23 changed files with 243 additions and 289 deletions
--- a/fastdeploy/model_executor/layers/attention/iluvatar_attn_backend.py
+++ b/fastdeploy/model_executor/layers/attention/iluvatar_attn_backend.py
@@ -94,8 +94,8 @@ class IluvatarAttnBackend(AttentionBackend):
    ):
        super().__init__()
        self.attention_metadata = IluvatarAttentionMetadata()
-        self.attention_metadata.block_size = llm_config.parallel_config.block_size
-        assert llm_config.parallel_config.enc_dec_block_num == 0, "Iluvatar does not support yet"
+        self.attention_metadata.block_size = llm_config.cache_config.block_size
+        assert llm_config.cache_config.enc_dec_block_num == 0, "Iluvatar does not support yet"

        self.attention_metadata.max_context_len = llm_config.parallel_config.max_model_len
        self.attention_metadata.causal = getattr(llm_config.model_config, "causal", True)