[BugFix] fix too many open files problem (#3275)

This commit is contained in:
ltd0924
2025-08-08 20:11:32 +08:00
committed by GitHub
parent 1b6f482c15
commit 6706ccb37e
6 changed files with 177 additions and 22 deletions

View File

@@ -40,11 +40,12 @@ from fastdeploy.worker.output import LogprobsLists
class OpenAIServingCompletion:
def __init__(self, engine_client, pid, ips):
def __init__(self, engine_client, pid, ips, max_waiting_time):
self.engine_client = engine_client
self.pid = pid
self.master_ip = ips
self.host_ip = get_host_ip()
self.max_waiting_time = max_waiting_time
if self.master_ip is not None:
if isinstance(self.master_ip, list):
self.master_ip = self.master_ip[0]
@@ -114,6 +115,14 @@ class OpenAIServingCompletion:
del current_req_dict
try:
if self.max_waiting_time < 0:
await self.engine_client.semaphore.acquire()
else:
await asyncio.wait_for(self.engine_client.semaphore.acquire(), timeout=self.max_waiting_time)
except Exception:
return ErrorResponse(code=408, message=f"Request queued time exceed {self.max_waiting_time}")
if request.stream:
return self.completion_stream_generator(
request=request,
@@ -223,6 +232,7 @@ class OpenAIServingCompletion:
finally:
if dealer is not None:
dealer.close()
self.engine_client.semaphore.release()
async def completion_stream_generator(
self,
@@ -372,6 +382,7 @@ class OpenAIServingCompletion:
del request
if dealer is not None:
dealer.close()
self.engine_client.semaphore.release()
yield "data: [DONE]\n\n"
def request_output_to_completion_response(