mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
Global scheduler supports configuring hot updates (#2807)
* Check if the controller port is available * Global scheduler supports configuring hot updates * add interface: /controller/scheduler * add interface: /controller/scheduler
This commit is contained in:
@@ -32,7 +32,8 @@ from fastdeploy.entrypoints.openai.protocol import (ChatCompletionRequest,
|
||||
ChatCompletionResponse,
|
||||
CompletionRequest,
|
||||
CompletionResponse,
|
||||
ErrorResponse)
|
||||
ErrorResponse,
|
||||
ControlSchedulerRequest)
|
||||
from fastdeploy.entrypoints.openai.serving_chat import OpenAIServingChat
|
||||
from fastdeploy.entrypoints.openai.serving_completion import \
|
||||
OpenAIServingCompletion
|
||||
@@ -279,7 +280,7 @@ def launch_api_server() -> None:
|
||||
"""
|
||||
if not is_port_available(args.host, args.port):
|
||||
raise Exception(f"The parameter `port`:{args.port} is already in use.")
|
||||
|
||||
|
||||
api_server_logger.info(
|
||||
f"launch Fastdeploy api server... port: {args.port}")
|
||||
api_server_logger.info(f"args: {args.__dict__}")
|
||||
@@ -326,7 +327,7 @@ def launch_metrics_server():
|
||||
raise Exception(
|
||||
f"The parameter `metrics_port`:{args.metrics_port} is already in use."
|
||||
)
|
||||
|
||||
|
||||
prom_dir = cleanup_prometheus_files(True)
|
||||
os.environ["PROMETHEUS_MULTIPROC_DIR"] = prom_dir
|
||||
metrics_server_thread = threading.Thread(target=run_metrics_server,
|
||||
@@ -347,10 +348,39 @@ def reset_scheduler():
|
||||
|
||||
if llm_engine is None:
|
||||
return Response("Engine not loaded", status_code=500)
|
||||
llm_engine.reset_scheduler()
|
||||
llm_engine.scheduler.reset_scheduler()
|
||||
return Response("Scheduler Reset Successfully", status_code=200)
|
||||
|
||||
|
||||
@controller_app.post("/controller/scheduler")
|
||||
def control_scheduler(request: ControlSchedulerRequest):
|
||||
"""
|
||||
Control the scheduler behavior with the given parameters.
|
||||
"""
|
||||
content = ErrorResponse(object="", message="Scheduler updated successfully", code=0)
|
||||
|
||||
global llm_engine
|
||||
if llm_engine is None:
|
||||
content.message = "Engine is not loaded"
|
||||
content.code = 500
|
||||
return JSONResponse(content=content.model_dump(), status_code=500)
|
||||
|
||||
if request.reset:
|
||||
llm_engine.scheduler.reset_scheduler()
|
||||
|
||||
if request.load_shards_num or request.reallocate_shard:
|
||||
if hasattr(llm_engine.scheduler, "update_config") and callable(llm_engine.scheduler.update_config):
|
||||
llm_engine.scheduler.update_config(
|
||||
load_shards_num=request.load_shards_num,
|
||||
reallocate=request.reallocate_shard)
|
||||
else:
|
||||
content.message="This scheduler doesn't support the `update_config()` method."
|
||||
content.code=400
|
||||
return JSONResponse(content=content.model_dump(), status_code=400)
|
||||
|
||||
return JSONResponse(content=content.model_dump(), status_code=200)
|
||||
|
||||
|
||||
def run_controller_server():
|
||||
"""
|
||||
run controller server
|
||||
@@ -371,6 +401,11 @@ def launch_controller_server():
|
||||
f"The parameter `controller_port`:{args.controller_port} is already in use."
|
||||
)
|
||||
|
||||
if not is_port_available(args.host, args.controller_port):
|
||||
raise Exception(
|
||||
f"The parameter `controller_port`:{args.controller_port} is already in use."
|
||||
)
|
||||
|
||||
controller_server_thread = threading.Thread(target=run_controller_server,
|
||||
daemon=True)
|
||||
controller_server_thread.start()
|
||||
|
Reference in New Issue
Block a user