mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
[BugFix] fix too many open files problem (#3275)
This commit is contained in:
@@ -40,11 +40,12 @@ from fastdeploy.worker.output import LogprobsLists
|
||||
|
||||
|
||||
class OpenAIServingCompletion:
|
||||
def __init__(self, engine_client, pid, ips):
|
||||
def __init__(self, engine_client, pid, ips, max_waiting_time):
|
||||
self.engine_client = engine_client
|
||||
self.pid = pid
|
||||
self.master_ip = ips
|
||||
self.host_ip = get_host_ip()
|
||||
self.max_waiting_time = max_waiting_time
|
||||
if self.master_ip is not None:
|
||||
if isinstance(self.master_ip, list):
|
||||
self.master_ip = self.master_ip[0]
|
||||
@@ -114,6 +115,14 @@ class OpenAIServingCompletion:
|
||||
|
||||
del current_req_dict
|
||||
|
||||
try:
|
||||
if self.max_waiting_time < 0:
|
||||
await self.engine_client.semaphore.acquire()
|
||||
else:
|
||||
await asyncio.wait_for(self.engine_client.semaphore.acquire(), timeout=self.max_waiting_time)
|
||||
except Exception:
|
||||
return ErrorResponse(code=408, message=f"Request queued time exceed {self.max_waiting_time}")
|
||||
|
||||
if request.stream:
|
||||
return self.completion_stream_generator(
|
||||
request=request,
|
||||
@@ -223,6 +232,7 @@ class OpenAIServingCompletion:
|
||||
finally:
|
||||
if dealer is not None:
|
||||
dealer.close()
|
||||
self.engine_client.semaphore.release()
|
||||
|
||||
async def completion_stream_generator(
|
||||
self,
|
||||
@@ -372,6 +382,7 @@ class OpenAIServingCompletion:
|
||||
del request
|
||||
if dealer is not None:
|
||||
dealer.close()
|
||||
self.engine_client.semaphore.release()
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
def request_output_to_completion_response(
|
||||
|
Reference in New Issue
Block a user