mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-12 20:11:20 +08:00
Unify server-side and model-side Config (Part1) (#3018)
* move cache config * fix mtp
This commit is contained in:
@@ -94,8 +94,8 @@ class IluvatarAttnBackend(AttentionBackend):
|
||||
):
|
||||
super().__init__()
|
||||
self.attention_metadata = IluvatarAttentionMetadata()
|
||||
self.attention_metadata.block_size = llm_config.parallel_config.block_size
|
||||
assert llm_config.parallel_config.enc_dec_block_num == 0, "Iluvatar does not support yet"
|
||||
self.attention_metadata.block_size = llm_config.cache_config.block_size
|
||||
assert llm_config.cache_config.enc_dec_block_num == 0, "Iluvatar does not support yet"
|
||||
|
||||
self.attention_metadata.max_context_len = llm_config.parallel_config.max_model_len
|
||||
self.attention_metadata.causal = getattr(llm_config.model_config, "causal", True)
|
||||
|
Reference in New Issue
Block a user