[Feature] support eplb in api_server (#4782)

* support eplb in api_server

* update code

* add eplb test case

* update eplb

* support tp+dp eplb

* update test cese

* update code

* update code

* fix bug

* update copilot review

* update test case name
This commit is contained in:
kevin
2025-11-24 20:22:29 +08:00
committed by GitHub
parent d5bd64336a
commit 8e4e3ff510
25 changed files with 2102 additions and 421 deletions

View File

@@ -467,6 +467,16 @@ class EngineArgs:
Url for router server, such as `0.0.0.0:30000`.
"""
enable_eplb: bool = False
"""
Flag to enable eplb
"""
eplb_config: Optional[Dict[str, Any]] = None
"""
Configuration for eplb.
"""
def __post_init__(self):
"""
Post-initialization processing to set default tokenizer if not provided.
@@ -850,6 +860,18 @@ class EngineArgs:
default=EngineArgs.enable_expert_parallel,
help="Enable expert parallelism.",
)
parallel_group.add_argument(
"--enable-eplb",
action="store_true",
default=EngineArgs.enable_eplb,
help="Enable eplb.",
)
parallel_group.add_argument(
"--eplb-config",
type=json.loads,
default=EngineArgs.eplb_config,
help="Config of eplb.",
)
# Load group
load_group = parser.add_argument_group("Load Configuration")
@@ -1126,7 +1148,7 @@ class EngineArgs:
def create_scheduler_config(self) -> SchedulerConfig:
"""
Create and retuan a SchedulerConfig object based on the current settings.
Create and return a SchedulerConfig object based on the current settings.
"""
prefix = "scheduler_"
prefix_len = len(prefix)
@@ -1173,13 +1195,22 @@ class EngineArgs:
early_stop_args[k] = v
return EarlyStopConfig(early_stop_args)
def create_eplb_config(self) -> EPLBConfig:
"""
Create and retuan an EPLBConfig object based on the current settings.
"""
eplb_args = asdict(self)
if self.eplb_config is not None:
for k, v in self.eplb_config.items():
eplb_args[k] = v
eplb_args["enable_eplb"] = self.enable_eplb
return EPLBConfig(eplb_args)
def create_engine_config(self, port_availability_check=True) -> FDConfig:
"""
Create and return a Config object based on the current settings.
"""
all_dict = asdict(self)
eplb_cfg = EPLBConfig()
all_dict["enable_redundant_experts"] = eplb_cfg.enable_redundant_experts
model_cfg = ModelConfig(all_dict)
# XPU currently disable prefix cache for VL model
@@ -1221,6 +1252,7 @@ class EngineArgs:
scheduler_cfg = self.create_scheduler_config()
graph_opt_cfg = self.create_graph_optimization_config()
plas_attention_config = self.create_plas_attention_config()
eplb_cfg = self.create_eplb_config()
router_config = RouterConfig(all_dict)
early_stop_cfg = self.create_early_stop_config()