[LLM] support multi node deploy (#2708)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled

* [LLM] support multi node deploy

* Update engine.py

* fix bugs

* fix

* [LLM] support multi node deploy

* [LLM] support multi node deploy

---------

Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
ltd0924
2025-07-06 10:33:51 +08:00
committed by GitHub
parent 04a8e1ef2b
commit 68b4755587
13 changed files with 157 additions and 87 deletions

View File

@@ -39,19 +39,32 @@ from fastdeploy.entrypoints.openai.protocol import (
ToolCall,
FunctionCall
)
from fastdeploy.utils import api_server_logger
from fastdeploy.utils import api_server_logger, get_host_ip
from fastdeploy.engine.request import RequestOutput
class OpenAIServingCompletion:
def __init__(self, engine_client, pid):
def __init__(self, engine_client, pid, pod_ips):
self.engine_client = engine_client
self.pid = pid
self.pod_ips = pod_ips
self.host_ip = get_host_ip()
def _check_master(self):
if self.pod_ips is None:
return True
if self.host_ip == self.pod_ips[0]:
return True
return False
async def create_completion(self, request: CompletionRequest):
"""
Create a completion for the given prompt.
"""
if not self._check_master():
err_msg = f"Only master node can accept completion request, please send request to master node: {self.pod_ips[0]}"
api_server_logger.error(err_msg)
return ErrorResponse(message=err_msg, code=400)
created_time = int(time.time())
if request.user is not None:
request_id = f"cmpl-{request.user}-{uuid.uuid4()}"