Global scheduler supports configuring hot updates (#2807)

* Check if the controller port is available

* Global scheduler supports configuring hot updates

* add interface: /controller/scheduler

* add interface: /controller/scheduler
This commit is contained in:
lddfym
2025-07-11 13:38:07 +08:00
committed by GitHub
parent abbbd0cddc
commit b5e4288704
6 changed files with 207 additions and 108 deletions

View File

@@ -32,7 +32,8 @@ from fastdeploy.entrypoints.openai.protocol import (ChatCompletionRequest,
ChatCompletionResponse,
CompletionRequest,
CompletionResponse,
ErrorResponse)
ErrorResponse,
ControlSchedulerRequest)
from fastdeploy.entrypoints.openai.serving_chat import OpenAIServingChat
from fastdeploy.entrypoints.openai.serving_completion import \
OpenAIServingCompletion
@@ -279,7 +280,7 @@ def launch_api_server() -> None:
"""
if not is_port_available(args.host, args.port):
raise Exception(f"The parameter `port`:{args.port} is already in use.")
api_server_logger.info(
f"launch Fastdeploy api server... port: {args.port}")
api_server_logger.info(f"args: {args.__dict__}")
@@ -326,7 +327,7 @@ def launch_metrics_server():
raise Exception(
f"The parameter `metrics_port`:{args.metrics_port} is already in use."
)
prom_dir = cleanup_prometheus_files(True)
os.environ["PROMETHEUS_MULTIPROC_DIR"] = prom_dir
metrics_server_thread = threading.Thread(target=run_metrics_server,
@@ -347,10 +348,39 @@ def reset_scheduler():
if llm_engine is None:
return Response("Engine not loaded", status_code=500)
llm_engine.reset_scheduler()
llm_engine.scheduler.reset_scheduler()
return Response("Scheduler Reset Successfully", status_code=200)
@controller_app.post("/controller/scheduler")
def control_scheduler(request: ControlSchedulerRequest):
"""
Control the scheduler behavior with the given parameters.
"""
content = ErrorResponse(object="", message="Scheduler updated successfully", code=0)
global llm_engine
if llm_engine is None:
content.message = "Engine is not loaded"
content.code = 500
return JSONResponse(content=content.model_dump(), status_code=500)
if request.reset:
llm_engine.scheduler.reset_scheduler()
if request.load_shards_num or request.reallocate_shard:
if hasattr(llm_engine.scheduler, "update_config") and callable(llm_engine.scheduler.update_config):
llm_engine.scheduler.update_config(
load_shards_num=request.load_shards_num,
reallocate=request.reallocate_shard)
else:
content.message="This scheduler doesn't support the `update_config()` method."
content.code=400
return JSONResponse(content=content.model_dump(), status_code=400)
return JSONResponse(content=content.model_dump(), status_code=200)
def run_controller_server():
"""
run controller server
@@ -371,6 +401,11 @@ def launch_controller_server():
f"The parameter `controller_port`:{args.controller_port} is already in use."
)
if not is_port_available(args.host, args.controller_port):
raise Exception(
f"The parameter `controller_port`:{args.controller_port} is already in use."
)
controller_server_thread = threading.Thread(target=run_controller_server,
daemon=True)
controller_server_thread.start()