diff --git a/fastdeploy/entrypoints/openai/api_server.py b/fastdeploy/entrypoints/openai/api_server.py index 71e6f8045..48e02ab46 100644 --- a/fastdeploy/entrypoints/openai/api_server.py +++ b/fastdeploy/entrypoints/openai/api_server.py @@ -497,6 +497,7 @@ def control_scheduler(request: ControlSchedulerRequest): return JSONResponse(content=content.model_dump(), status_code=500) if request.reset: + llm_engine.engine.clear_data() llm_engine.engine.scheduler.reset() if request.load_shards_num or request.reallocate_shard: diff --git a/fastdeploy/rl/dynamic_weight_manager.py b/fastdeploy/rl/dynamic_weight_manager.py index d43ca3017..e3eea99a6 100644 --- a/fastdeploy/rl/dynamic_weight_manager.py +++ b/fastdeploy/rl/dynamic_weight_manager.py @@ -257,6 +257,7 @@ class DynamicWeightManager: while model_weights_status.value[0] != 0: if model_weights_status.value[0] == 1: logger.info("infer engine stopped! start to load new checkpoint...") + model_runner.clear_requests() model_runner.update_parameters(pid) elif model_weights_status.value[0] == -1: logger.info("infer engine stopped! start to clear checkpoint...") diff --git a/fastdeploy/worker/worker_process.py b/fastdeploy/worker/worker_process.py index 9136a8ba8..666c90672 100644 --- a/fastdeploy/worker/worker_process.py +++ b/fastdeploy/worker/worker_process.py @@ -315,6 +315,8 @@ class PaddleDisWorkerProc: self.worker.model_runner, self.parallel_config.engine_worker_queue_port, ) + logger.info(f"current task queue data: {self.task_queue.num_tasks()}") + self.task_queue.clear_data() self.model_weights_signal[0] = 0 logger.info(f"Rank: {self.local_rank} has updated or cleared parameters.")