[PD Disaggregation] Support Qwen3-MoE use PD + EP inference. (#4691)

support Qwen-MoE PD/EP
This commit is contained in:
K11OntheBoat
2025-11-06 10:32:15 +08:00
committed by GitHub
parent e8c3e20ee6
commit 62dfad4a5f
10 changed files with 93 additions and 74 deletions

View File

@@ -853,7 +853,6 @@ def initialize_fd_config(args, ranks: int = 1, local_rank: int = 0) -> FDConfig:
num_experts = model_config.moe_num_experts[0]
else:
num_experts = model_config.moe_num_experts
num_experts_per_rank = num_experts // parallel_config.expert_parallel_size
num_experts_start_offset = expert_parallel_rank * num_experts_per_rank
max_chips_per_node = 16 if current_platform.is_iluvatar() else 8