mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 00:57:33 +08:00
【Fix bug] w4afp8 的nblock固定为256,并且fa3的append attn 增加mask参数 (#3771)
* fix w4afp8 * 增加集中式配置 * codestyle * fix fa3 append attn
This commit is contained in:
@@ -359,6 +359,7 @@ class FlashAttentionBackend(AttentionBackend):
|
||||
getattr(layer, "cache_v_zp", None),
|
||||
layer.linear_shift,
|
||||
layer.linear_smooth,
|
||||
forward_meta.attn_mask_offsets,
|
||||
metadata.kv_signal_data_list[layer.layer_id],
|
||||
getattr(layer, "q_norm_weight", None),
|
||||
getattr(layer, "k_norm_weight", None),
|
||||
|
Reference in New Issue
Block a user