mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
support qk norm (#3145)
This commit is contained in:
@@ -60,6 +60,9 @@ def append_attention(
|
||||
linear_shift: Optional[paddle.Tensor] = None,
|
||||
linear_smooth: Optional[paddle.Tensor] = None,
|
||||
kv_signal_data: Optional[paddle.Tensor] = None,
|
||||
q_norm_weight: Optional[paddle.Tensor] = None,
|
||||
k_norm_weight: Optional[paddle.Tensor] = None,
|
||||
rms_norm_eps: float = 1e-6,
|
||||
compute_type: str = "bf16",
|
||||
cache_quant_type: str = "none",
|
||||
use_neox_rotary_style: bool = False,
|
||||
@@ -114,6 +117,9 @@ def append_attention(
|
||||
linear_shift,
|
||||
linear_smooth,
|
||||
kv_signal_data,
|
||||
q_norm_weight,
|
||||
k_norm_weight,
|
||||
rms_norm_eps,
|
||||
compute_type,
|
||||
cache_quant_type,
|
||||
use_neox_rotary_style,
|
||||
|
||||
Reference in New Issue
Block a user