support qk norm (#3145)

This commit is contained in:
Yuan Xiaolan
2025-08-05 16:46:14 +08:00
committed by GitHub
parent 4a10e29804
commit 7ce00e597c
17 changed files with 791 additions and 201 deletions

View File

@@ -60,6 +60,9 @@ def append_attention(
linear_shift: Optional[paddle.Tensor] = None,
linear_smooth: Optional[paddle.Tensor] = None,
kv_signal_data: Optional[paddle.Tensor] = None,
q_norm_weight: Optional[paddle.Tensor] = None,
k_norm_weight: Optional[paddle.Tensor] = None,
rms_norm_eps: float = 1e-6,
compute_type: str = "bf16",
cache_quant_type: str = "none",
use_neox_rotary_style: bool = False,
@@ -114,6 +117,9 @@ def append_attention(
linear_shift,
linear_smooth,
kv_signal_data,
q_norm_weight,
k_norm_weight,
rms_norm_eps,
compute_type,
cache_quant_type,
use_neox_rotary_style,