[NewFeature]Support dp multi api server && Fix some bug in mixed ep && merge develop (#3598)

* [Feature] update ep * fix ci * fix ci * fix ci * fix ci * fix ci * fix ci * fix ci * fix queue ports idx * fix ci * fix ci * fix ci * fix ci * fix ci * fix ci * fix ci * fix ci * Update engine.py * fix ci * fix some bug in mixed ep * add server fix and op fix * rm some log * fix code style * ltd fix * fix * fix * fix some bug * fix bug * fix bug * fix style * Update config.py * Update splitwise_connector.py * Update cache_messager.py * Update __init__.py * merge and fix * Update engine.py * Update common_engine.py * Update run_ci_xpu.sh * Update ernie_processor.py * Update ernie_processor.py --------- Co-authored-by: ltd0924 <ltd0924@sina.com> Co-authored-by: ltd0924 <32387785+ltd0924@users.noreply.github.com>
2025-10-05 08:37:06 +08:00 · 2025-08-26 19:59:02 +08:00
parent cbce94a00e
commit 82e64b13e1
24 changed files with 1244 additions and 1200 deletions
--- a/fastdeploy/model_executor/layers/moe/ep.py
+++ b/fastdeploy/model_executor/layers/moe/ep.py
@@ -49,6 +49,7 @@ def get_moe_scores(
    compute moe scores using e_score_correction_bias.
    """
    scores = paddle.nn.functional.sigmoid(gating_output)
+    assert e_score_correction_bias is not None, "e_score_correction_bias is none!"
    scores_with_bias = scores + e_score_correction_bias
    scores, topk_values, topk_idx = noaux_tc(
        scores,
@@ -104,11 +105,12 @@ class DeepEPEngine:

        # In mixed EP mode on a single node, we dynamically switch between
        # high throughput and low latency modes.
+
        if splitwise_role == "mixed":
            self.deepep_engine = deep_ep.Buffer(
                self.group,
                int(2e9),
-                int(5e9),
+                int(6e9),
                low_latency_mode=True,
                num_qps_per_rank=24,
            )
@@ -387,6 +389,7 @@ class EPPrefillRunner(EPRunner):
        *args,
        **kwargs,
    ):
+
        (
            num_tokens_per_rank,
            num_tokens_per_rdma_rank,