fix and refine vl (#2866)

* refine vl config

* delete attn_sep

* fix vl accuracy
This commit is contained in:
Yuanle Liu
2025-07-16 20:59:28 +08:00
committed by GitHub
parent aa76085d1f
commit 63d6e7ce06
11 changed files with 63 additions and 117 deletions

View File

@@ -423,11 +423,11 @@ class ErnieVlRotaryEmbedding3D:
def get_rope_3d(
rotary_dim: int,
base: 10000,
position_ids,
paritial_rotary_factor: 1,
max_position: 131072,
freq_allocation: 2,
base: float,
position_ids: paddle.Tensor,
partial_rotary_factor: float,
max_position: int,
freq_allocation: int,
) -> paddle.Tensor:
"""
Pre-calculate rotary position embedding for position_ids.
@@ -435,19 +435,19 @@ def get_rope_3d(
Args:
rotary_dim (int):
Dimension of rotary embeddings (head dimension)
base (float, optional):
base (float):
Base value used to compute the inverse frequencies.
Default: 10000.0.
position_ids (paddle.Tensor):
Tensor containing position indices of input tokens.
partial_rotary_factor (int, optional):
partial_rotary_factor (float):
Factor controlling partial rotary application.
Default: 1 (apply to all dimensions).
max_position: Maximum position index to precompute.
freq_allocation: Number of rotary dimensions allocated to temporal axis
"""
rotary_emb3d_layer = ErnieVlRotaryEmbedding3D(rotary_dim, base,
paritial_rotary_factor,
partial_rotary_factor,
max_position,
freq_allocation)
rotary_emb_3d = rotary_emb3d_layer(position_ids)