mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 00:57:33 +08:00
[NewFeature]Support dp multi api server && Fix some bug in mixed ep && merge develop (#3598)
* [Feature] update ep * fix ci * fix ci * fix ci * fix ci * fix ci * fix ci * fix ci * fix queue ports idx * fix ci * fix ci * fix ci * fix ci * fix ci * fix ci * fix ci * fix ci * Update engine.py * fix ci * fix some bug in mixed ep * add server fix and op fix * rm some log * fix code style * ltd fix * fix * fix * fix some bug * fix bug * fix bug * fix style * Update config.py * Update splitwise_connector.py * Update cache_messager.py * Update __init__.py * merge and fix * Update engine.py * Update common_engine.py * Update run_ci_xpu.sh * Update ernie_processor.py * Update ernie_processor.py --------- Co-authored-by: ltd0924 <ltd0924@sina.com> Co-authored-by: ltd0924 <32387785+ltd0924@users.noreply.github.com>
This commit is contained in:
@@ -33,7 +33,7 @@ from fastdeploy.entrypoints.openai.protocol import (
|
||||
ErrorResponse,
|
||||
UsageInfo,
|
||||
)
|
||||
from fastdeploy.utils import api_server_logger, get_host_ip
|
||||
from fastdeploy.utils import api_server_logger
|
||||
from fastdeploy.worker.output import LogprobsLists
|
||||
|
||||
|
||||
@@ -42,14 +42,14 @@ class OpenAIServingCompletion:
|
||||
self.engine_client = engine_client
|
||||
self.models = models
|
||||
self.pid = pid
|
||||
self.master_ip = ips
|
||||
self.host_ip = get_host_ip()
|
||||
self.max_waiting_time = max_waiting_time
|
||||
if self.master_ip is not None:
|
||||
if isinstance(self.master_ip, list):
|
||||
self.master_ip = self.master_ip[0]
|
||||
if ips is not None:
|
||||
if isinstance(ips, list):
|
||||
self.master_ip = ips[0]
|
||||
else:
|
||||
self.master_ip = self.master_ip.split(",")[0]
|
||||
self.master_ip = ips.split(",")[0]
|
||||
else:
|
||||
self.master_ip = "0.0.0.0"
|
||||
|
||||
async def _ensure_connection_manager(self):
|
||||
"""ensure connection manager initialized"""
|
||||
@@ -58,18 +58,16 @@ class OpenAIServingCompletion:
|
||||
self.engine_client.connection_initialized = True
|
||||
|
||||
def _check_master(self):
|
||||
if self.master_ip is None:
|
||||
return True
|
||||
if self.host_ip == self.master_ip:
|
||||
return True
|
||||
return False
|
||||
return self.engine_client.is_master
|
||||
|
||||
async def create_completion(self, request: CompletionRequest):
|
||||
"""
|
||||
Create a completion for the given prompt.
|
||||
"""
|
||||
if not self._check_master():
|
||||
err_msg = f"Only master node can accept completion request, please send request to master node: {self.pod_ips[0]}"
|
||||
err_msg = (
|
||||
f"Only master node can accept completion request, please send request to master node: {self.master_ip}"
|
||||
)
|
||||
api_server_logger.error(err_msg)
|
||||
return ErrorResponse(message=err_msg, code=400)
|
||||
if self.models:
|
||||
|
Reference in New Issue
Block a user