[BUGFIX] clear request (#4286)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled

* fix

* fix

* fix

* [Feature] support clear data

* update

* fix

* fix

* fix

* fix

* [BugFix] fix clear data

* Update api_server.py

* Update api_server.py

---------

Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
ltd0924
2025-09-27 14:08:48 +08:00
committed by GitHub
parent b176cba474
commit f8c6a354a1
3 changed files with 4 additions and 0 deletions

View File

@@ -497,6 +497,7 @@ def control_scheduler(request: ControlSchedulerRequest):
return JSONResponse(content=content.model_dump(), status_code=500) return JSONResponse(content=content.model_dump(), status_code=500)
if request.reset: if request.reset:
llm_engine.engine.clear_data()
llm_engine.engine.scheduler.reset() llm_engine.engine.scheduler.reset()
if request.load_shards_num or request.reallocate_shard: if request.load_shards_num or request.reallocate_shard:

View File

@@ -257,6 +257,7 @@ class DynamicWeightManager:
while model_weights_status.value[0] != 0: while model_weights_status.value[0] != 0:
if model_weights_status.value[0] == 1: if model_weights_status.value[0] == 1:
logger.info("infer engine stopped! start to load new checkpoint...") logger.info("infer engine stopped! start to load new checkpoint...")
model_runner.clear_requests()
model_runner.update_parameters(pid) model_runner.update_parameters(pid)
elif model_weights_status.value[0] == -1: elif model_weights_status.value[0] == -1:
logger.info("infer engine stopped! start to clear checkpoint...") logger.info("infer engine stopped! start to clear checkpoint...")

View File

@@ -315,6 +315,8 @@ class PaddleDisWorkerProc:
self.worker.model_runner, self.worker.model_runner,
self.parallel_config.engine_worker_queue_port, self.parallel_config.engine_worker_queue_port,
) )
logger.info(f"current task queue data: {self.task_queue.num_tasks()}")
self.task_queue.clear_data()
self.model_weights_signal[0] = 0 self.model_weights_signal[0] = 0
logger.info(f"Rank: {self.local_rank} has updated or cleared parameters.") logger.info(f"Rank: {self.local_rank} has updated or cleared parameters.")