Sync v2.0 version of code to github repo

This commit is contained in:
Jiang-Jia-Jun
2025-06-29 23:29:37 +00:00
parent d151496038
commit 92c2cfa2e7
597 changed files with 78776 additions and 22905 deletions

View File

@@ -14,25 +14,25 @@
# limitations under the License.
"""
from typing import Any, Optional
from typing import Optional
import paddle
from fastdeploy.config import ModelConfig
from fastdeploy.platforms import current_platform
from .utils import CpuGuard
class ErnieRotaryEmbedding:
def __init__(self,
rotary_dim,
base,
partial_rotary_factor,
rope_scaling=None):
def __init__(self, rotary_dim, base, partial_rotary_factor):
"""
Pre-calculate rotary position embedding for position_ids.
"""
self.rotary_dim = rotary_dim
self.base = base
self.partial_rotary_factor = partial_rotary_factor
self.rope_scaling = rope_scaling
def __call__(self, position_ids):
bsz, max_seq_len = position_ids.shape[:2]
@@ -70,18 +70,13 @@ class ErnieRotaryEmbedding:
class QwenRotaryEmbedding:
def __init__(self,
rotary_dim,
base,
partial_rotary_factor,
rope_scaling=None):
def __init__(self, rotary_dim, base, partial_rotary_factor):
"""
Pre-calculate rotary position embedding for position_ids.
"""
self.rotary_dim = rotary_dim
self.base = base
self.partial_rotary_factor = partial_rotary_factor
self.rope_scaling = rope_scaling
def __call__(self, position_ids):
bsz, max_seq_len = position_ids.shape[:2]
@@ -104,35 +99,72 @@ class QwenRotaryEmbedding:
return rot_emb
def get_rope_impl(
rotary_dim: int,
base: 10000.0,
position_ids,
model_config: Optional[ModelConfig] = None,
partial_rotary_factor=1,
):
"""
The real implementation of get_rope
"""
architecture = model_config.architectures[0]
if model_config is not None and model_config is None or architecture.startswith(
"Qwen"):
rotary_emb_layer = QwenRotaryEmbedding(rotary_dim, base,
partial_rotary_factor)
rotary_emb = rotary_emb_layer(position_ids)
else:
rotary_emb_layer = ErnieRotaryEmbedding(rotary_dim, base,
partial_rotary_factor)
rotary_emb = rotary_emb_layer(position_ids)
return rotary_emb
def get_rope_xpu(
rotary_dim: int,
base: 10000.0,
position_ids,
model_config: ModelConfig,
partial_rotary_factor=1,
):
"""
In XPU, cos and sin compute must be done on cpu
"""
with CpuGuard():
position_ids = position_ids.cpu()
rotary_emb = get_rope_impl(rotary_dim, base, position_ids,
model_config, partial_rotary_factor)
return rotary_emb.to('xpu')
def get_rope(
rotary_dim: int,
base: 10000.0,
position_ids,
model_config: ModelConfig,
partial_rotary_factor=1,
rope_scaling: Optional[dict[str, Any]] = None,
):
rope_type = rope_scaling.get("architectures", None)
if "Qwen2ForCausalLM" in rope_type:
rotary_emb_layer = QwenRotaryEmbedding(rotary_dim, base,
partial_rotary_factor,
rope_scaling)
rotary_emb = rotary_emb_layer(position_ids)
"""
The warpper of get_rope
"""
if current_platform.is_xpu():
return get_rope_xpu(rotary_dim, base, position_ids, model_config,
partial_rotary_factor)
else:
rotary_emb_layer = ErnieRotaryEmbedding(rotary_dim, base,
partial_rotary_factor,
rope_scaling)
rotary_emb = rotary_emb_layer(position_ids)
return rotary_emb
return get_rope_impl(rotary_dim, base, position_ids, model_config,
partial_rotary_factor)
class ErnieVlRotaryEmbedding3D:
def __init__(self, rotary_dim, base, partial_rotary_factor, max_position,
freq_allocation, rope_scaling):
freq_allocation):
self.rotary_dim = rotary_dim
self.base = base
self.paritial_rotary_factor = partial_rotary_factor
self.rope_scaling = rope_scaling
self.max_position = max_position
self.freq_allocation = freq_allocation
@@ -223,12 +255,10 @@ def get_rope_3d(
paritial_rotary_factor: 1,
max_position: 131072,
freq_allocation: 2,
rope_scaling: Optional[dict[str, Any]] = None,
):
rotary_emb3d_layer = ErnieVlRotaryEmbedding3D(rotary_dim, base,
paritial_rotary_factor,
max_position,
freq_allocation,
rope_scaling)
freq_allocation)
rotary_emb_3d = rotary_emb3d_layer(position_ids)
return rotary_emb_3d