mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[New][RL] Support Rollout Routing Replay (#5405)
* [RL] Support Rollout Routing Replay
* add routing indices cache
* fix config bug and moe forward bug
* R3 Support GLM
* support eb4.5
* fix merge bug
* Apply suggestion from @Copilot
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
* Apply suggestion from @Copilot
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
* Apply suggestion from @Copilot
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
* Apply suggestion from @Copilot
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
* add routing replay ci
* support glm topk
* support orther top_k
* fix ci bug
* pre-commit
* only support chatcmpl
* Revert "Revert "[RL] Support Rollout Routing Replay (#5321)" (#5402)"
This reverts commit c45e064f3d.
* Fix XPU and NPU bug
---------
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: Yuanle Liu <yuanlehome@163.com>
This commit is contained in:
@@ -90,7 +90,7 @@ class MockAttentionBackend:
|
||||
|
||||
|
||||
class MockQuantMethod:
|
||||
def apply(self, layer, x, gate):
|
||||
def apply(self, layer, x, gate, topk_ids_hookfunc=None):
|
||||
return x
|
||||
|
||||
|
||||
@@ -129,6 +129,7 @@ class TestChunkedMoE(unittest.TestCase):
|
||||
model_runner.speculative_decoding = False
|
||||
model_runner._init_share_inputs(mock_fd_config.scheduler_config.max_num_seqs)
|
||||
model_runner.share_inputs["caches"] = None
|
||||
model_runner.routing_replay_manager = None
|
||||
|
||||
if dist.get_rank() == 0:
|
||||
model_runner.share_inputs["ids_remove_padding"] = paddle.ones([10])
|
||||
@@ -148,6 +149,7 @@ class TestChunkedMoE(unittest.TestCase):
|
||||
|
||||
fused_moe.fd_config = mock_fd_config
|
||||
fused_moe.quant_method = MockQuantMethod()
|
||||
fused_moe.enable_routing_replay = None
|
||||
return fused_moe
|
||||
|
||||
def run_model_runner(self):
|
||||
|
||||
Reference in New Issue
Block a user