[Iluvatar GPU] Optimze attention and moe performance (#3234)

This commit is contained in:
yzwu
2025-08-08 10:51:24 +08:00
committed by GitHub
parent 37569cca86
commit fbdd6b0663
24 changed files with 1130 additions and 1653 deletions

View File

@@ -128,10 +128,16 @@ def rejection_top_p_sampling(
rejection_top_p_sampling
"""
try:
from fastdeploy.model_executor.ops.gpu import (
rejection_top_p_sampling,
top_k_renorm_probs,
)
if current_platform.is_iluvatar():
from fastdeploy.model_executor.ops.iluvatar import (
rejection_top_p_sampling,
top_k_renorm_probs,
)
else:
from fastdeploy.model_executor.ops.gpu import (
rejection_top_p_sampling,
top_k_renorm_probs,
)
if paddle.count_nonzero(top_k) == 0:
ids = rejection_top_p_sampling(