[RL] Support Rollout Routing Replay (#5321)

* [RL] Support Rollout Routing Replay

* add routing indices cache

* fix config bug and moe forward bug

* R3 Support GLM

* support eb4.5

* fix merge bug

* Apply suggestion from @Copilot

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Apply suggestion from @Copilot

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Apply suggestion from @Copilot

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Apply suggestion from @Copilot

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* add routing replay ci

* support glm topk

* support orther top_k

* fix ci bug

* pre-commit

* only support chatcmpl

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Yuanle Liu <yuanlehome@163.com>
This commit is contained in:
RAM
2025-12-05 20:01:33 +08:00
committed by GitHub
parent 8545b705ed
commit 96d2d4877b
24 changed files with 592 additions and 24 deletions

View File

@@ -35,6 +35,7 @@ from fastdeploy.config import (
PlasAttentionConfig,
PoolerConfig,
RouterConfig,
RoutingReplayConfig,
RunnerOption,
SpeculativeConfig,
StructuredOutputsConfig,
@@ -491,6 +492,11 @@ class EngineArgs:
Configuration for eplb.
"""
routing_replay_config: Optional[Dict[str, Any]] = None
"""
Flag to rollout routing replay(r3)
"""
def __post_init__(self):
"""
Post-initialization processing to set default tokenizer if not provided.
@@ -882,6 +888,12 @@ class EngineArgs:
default=EngineArgs.eplb_config,
help="Config of eplb.",
)
parallel_group.add_argument(
"--routing-replay-config",
type=json.loads,
default=EngineArgs.routing_replay_config,
help="Flag of rollout routing replay(r3).",
)
parallel_group.add_argument(
"--enable-chunked-moe",
action="store_true",
@@ -1235,6 +1247,14 @@ class EngineArgs:
eplb_args["enable_eplb"] = self.enable_eplb
return EPLBConfig(eplb_args)
def create_routing_repaly_config(self) -> RoutingReplayConfig:
""" """
routing_replay_args = asdict(self)
if self.routing_replay_config is not None:
for k, v in self.routing_replay_config.items():
routing_replay_args[k] = v
return RoutingReplayConfig(routing_replay_args)
def create_engine_config(self, port_availability_check=True) -> FDConfig:
"""
Create and return a Config object based on the current settings.
@@ -1278,6 +1298,7 @@ class EngineArgs:
graph_opt_cfg = self.create_graph_optimization_config()
plas_attention_config = self.create_plas_attention_config()
eplb_cfg = self.create_eplb_config()
routing_replay_config = self.create_routing_repaly_config()
router_config = RouterConfig(all_dict)
early_stop_cfg = self.create_early_stop_config()
@@ -1310,4 +1331,5 @@ class EngineArgs:
graph_opt_config=graph_opt_cfg,
plas_attention_config=plas_attention_config,
early_stop_config=early_stop_cfg,
routing_replay_config=routing_replay_config,
)