[Feature] Support eplb for fd (#4599)

* support eplb

* support eplb

---------

Co-authored-by: kevin <chengyf112@gmail.com>
This commit is contained in:
chenjian
2025-11-03 14:08:15 +08:00
committed by GitHub
parent c657f8d16a
commit f83d0cf127
9 changed files with 1527 additions and 0 deletions

View File

@@ -26,6 +26,7 @@ from fastdeploy.config import (
CacheConfig,
ConvertOption,
EarlyStopConfig,
EPLBConfig,
FDConfig,
GraphOptimizationConfig,
LoadConfig,
@@ -1076,6 +1077,8 @@ class EngineArgs:
Create and return a Config object based on the current settings.
"""
all_dict = asdict(self)
eplb_cfg = EPLBConfig()
all_dict["enable_redundant_experts"] = eplb_cfg.enable_redundant_experts
model_cfg = ModelConfig(all_dict)
# XPU currently disable prefix cache for VL model
@@ -1134,6 +1137,7 @@ class EngineArgs:
load_config=load_cfg,
parallel_config=parallel_cfg,
speculative_config=speculative_cfg,
eplb_config=eplb_cfg,
structured_outputs_config=structured_outputs_config,
ips=self.ips,
use_warmup=self.use_warmup,