[NewFeture]add ep rollout model init and update/clear ep buffer (#3927)

* add ep rollout model init && add deep update/clear * fix test
2025-10-15 05:01:00 +08:00 · 2025-09-12 14:15:13 +08:00
parent c64ceac34d
commit 10768a4d79
13 changed files with 364 additions and 304 deletions
--- a/fastdeploy/model_executor/layers/moe/moe.py
+++ b/fastdeploy/model_executor/layers/moe/moe.py
@@ -27,6 +27,11 @@ from fastdeploy.model_executor.utils import slice_fn
 from fastdeploy.platforms import current_platform
 from fastdeploy.worker.experts_manager import RedundantExpertManger

+try:
+    from fastdeploy.model_executor.ops.gpu import noaux_tc
+except:
+    logger.warning("import noaux_tc Failed!")
+

 def get_moe_method():
    """
@@ -54,6 +59,31 @@ def get_moe_method():
    raise NotImplementedError


+def get_moe_scores(
+    gating_output: paddle.Tensor,
+    n_group,
+    topk_group,
+    top_k,
+    routed_scaling_factor,
+    e_score_correction_bias,
+) -> paddle.Tensor:
+    """
+    compute moe scores using e_score_correction_bias.
+    """
+    scores = paddle.nn.functional.sigmoid(gating_output)
+    assert e_score_correction_bias is not None, "e_score_correction_bias is none!"
+    scores_with_bias = scores + e_score_correction_bias
+    scores, topk_values, topk_idx = noaux_tc(
+        scores,
+        scores_with_bias,
+        n_group if n_group > 0 else 1,
+        topk_group if topk_group > 0 else 1,
+        top_k,
+        routed_scaling_factor,
+    )
+    return scores, topk_values, topk_idx
+
+
 class FusedMoE(nn.Layer):
    """
    FusedMoE is a layer that performs MoE (Mixture of Experts) computation.