fix and refine vl (#2866)

* refine vl config * delete attn_sep * fix vl accuracy
2025-10-04 16:22:57 +08:00 · 2025-07-16 20:59:28 +08:00
parent aa76085d1f
commit 63d6e7ce06
11 changed files with 63 additions and 117 deletions
--- a/fastdeploy/model_executor/layers/rotary_embedding.py
+++ b/fastdeploy/model_executor/layers/rotary_embedding.py
@@ -423,11 +423,11 @@ class ErnieVlRotaryEmbedding3D:

 def get_rope_3d(
    rotary_dim: int,
-    base: 10000,
-    position_ids,
-    paritial_rotary_factor: 1,
-    max_position: 131072,
-    freq_allocation: 2,
+    base: float,
+    position_ids: paddle.Tensor,
+    partial_rotary_factor: float,
+    max_position: int,
+    freq_allocation: int,
 ) -> paddle.Tensor:
    """
    Pre-calculate rotary position embedding for position_ids.
@@ -435,19 +435,19 @@ def get_rope_3d(
    Args:
        rotary_dim (int):
            Dimension of rotary embeddings (head dimension)
-        base (float, optional):
+        base (float):
            Base value used to compute the inverse frequencies.
            Default: 10000.0.
        position_ids (paddle.Tensor):
            Tensor containing position indices of input tokens.
-        partial_rotary_factor (int, optional):
+        partial_rotary_factor (float):
            Factor controlling partial rotary application.
            Default: 1 (apply to all dimensions).
        max_position: Maximum position index to precompute.
        freq_allocation: Number of rotary dimensions allocated to temporal axis
    """
    rotary_emb3d_layer = ErnieVlRotaryEmbedding3D(rotary_dim, base,
-                                                  paritial_rotary_factor,
+                                                  partial_rotary_factor,
                                                  max_position,
                                                  freq_allocation)
    rotary_emb_3d = rotary_emb3d_layer(position_ids)