[Executor] Adjust signal sending order in RL training (#3773) (#4066)

* Adjust processing order

* fix bug

* fix update_parameters bug

* refine code
This commit is contained in:
RAM
2025-09-11 15:41:32 +08:00
committed by GitHub
parent 48f2ab3fb3
commit 63d24b2210
3 changed files with 20 additions and 22 deletions

View File

@@ -1705,25 +1705,27 @@ class GPUModelRunner(ModelRunnerBase):
self.forward_meta.clear_caches()
def clear_parameters(self, pid):
""" " Dynamic model loader use to clear parameters use for RL"""
"""Dynamic model loader use to clear parameters use for RL"""
# Clear CUDAGraph
if self.use_cudagraph:
self.model.clear_grpah_opt_backend()
# Clear parameters and Send single
self.dynamic_weight_manager.clear_parameters(pid)
self.clear_cache()
paddle.device.cuda.empty_cache()
# Clear CudaGraph
if self.use_cudagraph:
self.model.clear_grpah_opt_backend()
self.dynamic_weight_manager._log_memory("dynamic weight manager clear all memory")
def update_parameters(self, pid):
""" " Dynamic model loader use to update parameters use for RL"""
"""Dynamic model loader use to update parameters use for RL"""
# Update parameters
self.dynamic_weight_manager.update_parameters(pid)
self.initialize_kv_cache()
# Recapture CudaGraph
# Recapture CUDAGraph
if self.use_cudagraph:
self.capture_model()
# Send single
self.dynamic_weight_manager.finalize_update(pid)
self.dynamic_weight_manager._log_memory("dynamic weight manager update all memory")