mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[PD Disaggregation] Support Qwen3-MoE use PD + EP inference. (#4691)
support Qwen-MoE PD/EP
This commit is contained in:
@@ -853,7 +853,6 @@ def initialize_fd_config(args, ranks: int = 1, local_rank: int = 0) -> FDConfig:
|
||||
num_experts = model_config.moe_num_experts[0]
|
||||
else:
|
||||
num_experts = model_config.moe_num_experts
|
||||
|
||||
num_experts_per_rank = num_experts // parallel_config.expert_parallel_size
|
||||
num_experts_start_offset = expert_parallel_rank * num_experts_per_rank
|
||||
max_chips_per_node = 16 if current_platform.is_iluvatar() else 8
|
||||
|
||||
Reference in New Issue
Block a user