mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 00:33:03 +08:00
[Feature][MTP]Support MTP for rl-model (#4009)
* qk norm for speculate decode C16 * support mtp in v1_scheduler mode * support mtp rope_3d * support mtp features * add unit test && del some log --------- Co-authored-by: yuanxiaolan <yuanxiaolan01@baidu.com> Co-authored-by: xiaoxiaohehe001 <hiteezsf@163.com>
This commit is contained in:
@@ -100,6 +100,8 @@ class AppendAttentionBackend(AttentionBackend):
|
||||
self.rope_3d: bool = getattr(fd_config.model_config, "rope_3d", False) or getattr(
|
||||
fd_config.model_config, "use_3d_rope", False
|
||||
)
|
||||
if fd_config.speculative_config.model_type != "main":
|
||||
self.rope_3d = False
|
||||
self.causal: bool = getattr(fd_config.model_config, "causal", True)
|
||||
self.speculative_method: str = fd_config.speculative_config.method
|
||||
self.use_speculate: bool = self.speculative_method is not None
|
||||
@@ -356,7 +358,7 @@ class AppendAttentionBackend(AttentionBackend):
|
||||
getattr(layer, "cache_v_zp", None),
|
||||
layer.linear_shift,
|
||||
layer.linear_smooth,
|
||||
forward_meta.attn_mask_offsets,
|
||||
None if self.use_speculate else forward_meta.attn_mask_offsets,
|
||||
metadata.kv_signal_data_list[layer.layer_id],
|
||||
getattr(layer, "q_norm_weight", None),
|
||||
getattr(layer, "k_norm_weight", None),
|
||||
@@ -374,7 +376,7 @@ class AppendAttentionBackend(AttentionBackend):
|
||||
metadata.max_partition_size,
|
||||
metadata.encoder_max_partition_size,
|
||||
self.speculate_max_draft_token_num + 1,
|
||||
self.causal,
|
||||
self.causal or self.use_speculate,
|
||||
self.speculative_method is not None,
|
||||
)
|
||||
return res
|
||||
|
Reference in New Issue
Block a user