diff --git a/fastdeploy/entrypoints/openai/api_server.py b/fastdeploy/entrypoints/openai/api_server.py index 16d6e735b..f31cf32d4 100644 --- a/fastdeploy/entrypoints/openai/api_server.py +++ b/fastdeploy/entrypoints/openai/api_server.py @@ -487,7 +487,7 @@ def reset_scheduler(): if llm_engine is None: return Response("Engine not loaded", status_code=500) - llm_engine.scheduler.reset() + llm_engine.engine.scheduler.reset() return Response("Scheduler Reset Successfully", status_code=200) @@ -505,11 +505,13 @@ def control_scheduler(request: ControlSchedulerRequest): return JSONResponse(content=content.model_dump(), status_code=500) if request.reset: - llm_engine.scheduler.reset() + llm_engine.engine.scheduler.reset() if request.load_shards_num or request.reallocate_shard: - if hasattr(llm_engine.scheduler, "update_config") and callable(llm_engine.scheduler.update_config): - llm_engine.scheduler.update_config( + if hasattr(llm_engine.engine.scheduler, "update_config") and callable( + llm_engine.engine.scheduler.update_config + ): + llm_engine.engine.scheduler.update_config( load_shards_num=request.load_shards_num, reallocate=request.reallocate_shard, )