[PD Disaggregation] Support Qwen3-MoE use PD + EP inference. (#4691)

support Qwen-MoE PD/EP
2025-12-24 13:28:13 +08:00 · 2025-11-06 10:32:15 +08:00
parent e8c3e20ee6
commit 62dfad4a5f
10 changed files with 93 additions and 74 deletions
--- a/fastdeploy/worker/worker_process.py
+++ b/fastdeploy/worker/worker_process.py
@@ -853,7 +853,6 @@ def initialize_fd_config(args, ranks: int = 1, local_rank: int = 0) -> FDConfig:
            num_experts = model_config.moe_num_experts[0]
        else:
            num_experts = model_config.moe_num_experts
-
        num_experts_per_rank = num_experts // parallel_config.expert_parallel_size
        num_experts_start_offset = expert_parallel_rank * num_experts_per_rank
        max_chips_per_node = 16 if current_platform.is_iluvatar() else 8