mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[New][RL] Support Rollout Routing Replay (#5405)
* [RL] Support Rollout Routing Replay
* add routing indices cache
* fix config bug and moe forward bug
* R3 Support GLM
* support eb4.5
* fix merge bug
* Apply suggestion from @Copilot
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
* Apply suggestion from @Copilot
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
* Apply suggestion from @Copilot
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
* Apply suggestion from @Copilot
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
* add routing replay ci
* support glm topk
* support orther top_k
* fix ci bug
* pre-commit
* only support chatcmpl
* Revert "Revert "[RL] Support Rollout Routing Replay (#5321)" (#5402)"
This reverts commit c45e064f3d.
* Fix XPU and NPU bug
---------
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Yuanle Liu <yuanlehome@163.com>
This commit is contained in:
@@ -35,6 +35,7 @@ from fastdeploy.config import (
|
||||
PlasAttentionConfig,
|
||||
PoolerConfig,
|
||||
RouterConfig,
|
||||
RoutingReplayConfig,
|
||||
RunnerOption,
|
||||
SpeculativeConfig,
|
||||
StructuredOutputsConfig,
|
||||
@@ -491,6 +492,11 @@ class EngineArgs:
|
||||
Configuration for eplb.
|
||||
"""
|
||||
|
||||
routing_replay_config: Optional[Dict[str, Any]] = None
|
||||
"""
|
||||
Flag to rollout routing replay(r3)
|
||||
"""
|
||||
|
||||
def __post_init__(self):
|
||||
"""
|
||||
Post-initialization processing to set default tokenizer if not provided.
|
||||
@@ -882,6 +888,12 @@ class EngineArgs:
|
||||
default=EngineArgs.eplb_config,
|
||||
help="Config of eplb.",
|
||||
)
|
||||
parallel_group.add_argument(
|
||||
"--routing-replay-config",
|
||||
type=json.loads,
|
||||
default=EngineArgs.routing_replay_config,
|
||||
help="Flag of rollout routing replay(r3).",
|
||||
)
|
||||
parallel_group.add_argument(
|
||||
"--enable-chunked-moe",
|
||||
action="store_true",
|
||||
@@ -1235,6 +1247,14 @@ class EngineArgs:
|
||||
eplb_args["enable_eplb"] = self.enable_eplb
|
||||
return EPLBConfig(eplb_args)
|
||||
|
||||
def create_routing_repaly_config(self) -> RoutingReplayConfig:
|
||||
""" """
|
||||
routing_replay_args = asdict(self)
|
||||
if self.routing_replay_config is not None:
|
||||
for k, v in self.routing_replay_config.items():
|
||||
routing_replay_args[k] = v
|
||||
return RoutingReplayConfig(routing_replay_args)
|
||||
|
||||
def create_engine_config(self, port_availability_check=True) -> FDConfig:
|
||||
"""
|
||||
Create and return a Config object based on the current settings.
|
||||
@@ -1278,6 +1298,7 @@ class EngineArgs:
|
||||
graph_opt_cfg = self.create_graph_optimization_config()
|
||||
plas_attention_config = self.create_plas_attention_config()
|
||||
eplb_cfg = self.create_eplb_config()
|
||||
routing_replay_config = self.create_routing_repaly_config()
|
||||
router_config = RouterConfig(all_dict)
|
||||
|
||||
early_stop_cfg = self.create_early_stop_config()
|
||||
@@ -1310,4 +1331,5 @@ class EngineArgs:
|
||||
graph_opt_config=graph_opt_cfg,
|
||||
plas_attention_config=plas_attention_config,
|
||||
early_stop_config=early_stop_cfg,
|
||||
routing_replay_config=routing_replay_config,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user