[NewFeature]Support dp multi api server && Fix some bug in mixed ep && merge develop (#3598)

* [Feature] update ep

* fix ci

* fix ci

* fix ci

* fix ci

* fix ci

* fix ci

* fix ci

* fix queue ports idx

* fix ci

* fix ci

* fix ci

* fix ci

* fix ci

* fix ci

* fix ci

* fix ci

* Update engine.py

* fix ci

* fix some bug in mixed ep

* add server fix and op fix

* rm some log

* fix code style

* ltd fix

* fix

* fix

* fix some bug

* fix bug

* fix bug

* fix style

* Update config.py

* Update splitwise_connector.py

* Update cache_messager.py

* Update __init__.py

* merge and fix

* Update engine.py

* Update common_engine.py

* Update run_ci_xpu.sh

* Update ernie_processor.py

* Update ernie_processor.py

---------

Co-authored-by: ltd0924 <ltd0924@sina.com>
Co-authored-by: ltd0924 <32387785+ltd0924@users.noreply.github.com>
This commit is contained in:
gaoziyuan
2025-08-26 19:59:02 +08:00
committed by GitHub
parent cbce94a00e
commit 82e64b13e1
24 changed files with 1244 additions and 1200 deletions

View File

@@ -33,7 +33,7 @@ from fastdeploy.entrypoints.openai.protocol import (
ErrorResponse,
UsageInfo,
)
from fastdeploy.utils import api_server_logger, get_host_ip
from fastdeploy.utils import api_server_logger
from fastdeploy.worker.output import LogprobsLists
@@ -42,14 +42,14 @@ class OpenAIServingCompletion:
self.engine_client = engine_client
self.models = models
self.pid = pid
self.master_ip = ips
self.host_ip = get_host_ip()
self.max_waiting_time = max_waiting_time
if self.master_ip is not None:
if isinstance(self.master_ip, list):
self.master_ip = self.master_ip[0]
if ips is not None:
if isinstance(ips, list):
self.master_ip = ips[0]
else:
self.master_ip = self.master_ip.split(",")[0]
self.master_ip = ips.split(",")[0]
else:
self.master_ip = "0.0.0.0"
async def _ensure_connection_manager(self):
"""ensure connection manager initialized"""
@@ -58,18 +58,16 @@ class OpenAIServingCompletion:
self.engine_client.connection_initialized = True
def _check_master(self):
if self.master_ip is None:
return True
if self.host_ip == self.master_ip:
return True
return False
return self.engine_client.is_master
async def create_completion(self, request: CompletionRequest):
"""
Create a completion for the given prompt.
"""
if not self._check_master():
err_msg = f"Only master node can accept completion request, please send request to master node: {self.pod_ips[0]}"
err_msg = (
f"Only master node can accept completion request, please send request to master node: {self.master_ip}"
)
api_server_logger.error(err_msg)
return ErrorResponse(message=err_msg, code=400)
if self.models: