Sync v2.0 version of code to github repo

2025-10-05 00:33:03 +08:00 · 2025-06-29 23:29:37 +00:00
parent d151496038
commit 92c2cfa2e7
597 changed files with 78776 additions and 22905 deletions
--- a/fastdeploy/model_executor/layers/rotary_embedding.py
+++ b/fastdeploy/model_executor/layers/rotary_embedding.py
@@ -14,25 +14,25 @@
 # limitations under the License.
 """

-from typing import Any, Optional
+from typing import Optional

 import paddle

+from fastdeploy.config import ModelConfig
+from fastdeploy.platforms import current_platform
+
+from .utils import CpuGuard
+

 class ErnieRotaryEmbedding:

-    def __init__(self,
-                 rotary_dim,
-                 base,
-                 partial_rotary_factor,
-                 rope_scaling=None):
+    def __init__(self, rotary_dim, base, partial_rotary_factor):
        """
        Pre-calculate rotary position embedding for position_ids.
        """
        self.rotary_dim = rotary_dim
        self.base = base
        self.partial_rotary_factor = partial_rotary_factor
-        self.rope_scaling = rope_scaling

    def __call__(self, position_ids):
        bsz, max_seq_len = position_ids.shape[:2]
@@ -70,18 +70,13 @@ class ErnieRotaryEmbedding:

 class QwenRotaryEmbedding:

-    def __init__(self,
-                 rotary_dim,
-                 base,
-                 partial_rotary_factor,
-                 rope_scaling=None):
+    def __init__(self, rotary_dim, base, partial_rotary_factor):
        """
        Pre-calculate rotary position embedding for position_ids.
        """
        self.rotary_dim = rotary_dim
        self.base = base
        self.partial_rotary_factor = partial_rotary_factor
-        self.rope_scaling = rope_scaling

    def __call__(self, position_ids):
        bsz, max_seq_len = position_ids.shape[:2]
@@ -104,35 +99,72 @@ class QwenRotaryEmbedding:
        return rot_emb


+def get_rope_impl(
+    rotary_dim: int,
+    base: 10000.0,
+    position_ids,
+    model_config: Optional[ModelConfig] = None,
+    partial_rotary_factor=1,
+):
+    """
+    The real implementation of get_rope
+    """
+
+    architecture = model_config.architectures[0]
+    if model_config is not None and model_config is None or architecture.startswith(
+            "Qwen"):
+        rotary_emb_layer = QwenRotaryEmbedding(rotary_dim, base,
+                                               partial_rotary_factor)
+        rotary_emb = rotary_emb_layer(position_ids)
+    else:
+        rotary_emb_layer = ErnieRotaryEmbedding(rotary_dim, base,
+                                                partial_rotary_factor)
+        rotary_emb = rotary_emb_layer(position_ids)
+    return rotary_emb
+
+
+def get_rope_xpu(
+    rotary_dim: int,
+    base: 10000.0,
+    position_ids,
+    model_config: ModelConfig,
+    partial_rotary_factor=1,
+):
+    """
+    In XPU, cos and sin compute must be done on cpu
+    """
+    with CpuGuard():
+        position_ids = position_ids.cpu()
+        rotary_emb = get_rope_impl(rotary_dim, base, position_ids,
+                                   model_config, partial_rotary_factor)
+        return rotary_emb.to('xpu')
+
+
 def get_rope(
    rotary_dim: int,
    base: 10000.0,
    position_ids,
+    model_config: ModelConfig,
    partial_rotary_factor=1,
-    rope_scaling: Optional[dict[str, Any]] = None,
 ):
-    rope_type = rope_scaling.get("architectures", None)
-    if "Qwen2ForCausalLM" in rope_type:
-        rotary_emb_layer = QwenRotaryEmbedding(rotary_dim, base,
-                                               partial_rotary_factor,
-                                               rope_scaling)
-        rotary_emb = rotary_emb_layer(position_ids)
+    """
+    The warpper of get_rope
+    """
+    if current_platform.is_xpu():
+        return get_rope_xpu(rotary_dim, base, position_ids, model_config,
+                            partial_rotary_factor)
    else:
-        rotary_emb_layer = ErnieRotaryEmbedding(rotary_dim, base,
-                                                partial_rotary_factor,
-                                                rope_scaling)
-        rotary_emb = rotary_emb_layer(position_ids)
-    return rotary_emb
+        return get_rope_impl(rotary_dim, base, position_ids, model_config,
+                             partial_rotary_factor)


 class ErnieVlRotaryEmbedding3D:

    def __init__(self, rotary_dim, base, partial_rotary_factor, max_position,
-                 freq_allocation, rope_scaling):
+                 freq_allocation):
        self.rotary_dim = rotary_dim
        self.base = base
        self.paritial_rotary_factor = partial_rotary_factor
-        self.rope_scaling = rope_scaling
        self.max_position = max_position
        self.freq_allocation = freq_allocation

@@ -223,12 +255,10 @@ def get_rope_3d(
    paritial_rotary_factor: 1,
    max_position: 131072,
    freq_allocation: 2,
-    rope_scaling: Optional[dict[str, Any]] = None,
 ):
    rotary_emb3d_layer = ErnieVlRotaryEmbedding3D(rotary_dim, base,
                                                  paritial_rotary_factor,
                                                  max_position,
-                                                  freq_allocation,
-                                                  rope_scaling)
+                                                  freq_allocation)
    rotary_emb_3d = rotary_emb3d_layer(position_ids)
    return rotary_emb_3d