mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 00:57:33 +08:00
[BugFix] fix too many open files problem (#3256)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
* Update cache_messager.py * fix too many open files problem * fix too many open files problem * fix too many open files problem * fix ci bugs * Update api_server.py * add parameter * format * format * format * format * Update parameters.md * Update parameters.md * Update serving_completion.py * Update serving_chat.py * Update envs.py --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
@@ -40,11 +40,12 @@ from fastdeploy.worker.output import LogprobsLists
|
||||
|
||||
|
||||
class OpenAIServingCompletion:
|
||||
def __init__(self, engine_client, pid, ips):
|
||||
def __init__(self, engine_client, pid, ips, max_waiting_time):
|
||||
self.engine_client = engine_client
|
||||
self.pid = pid
|
||||
self.master_ip = ips
|
||||
self.host_ip = get_host_ip()
|
||||
self.max_waiting_time = max_waiting_time
|
||||
if self.master_ip is not None:
|
||||
if isinstance(self.master_ip, list):
|
||||
self.master_ip = self.master_ip[0]
|
||||
@@ -114,6 +115,14 @@ class OpenAIServingCompletion:
|
||||
|
||||
del current_req_dict
|
||||
|
||||
try:
|
||||
if self.max_waiting_time < 0:
|
||||
await self.engine_client.semaphore.acquire()
|
||||
else:
|
||||
await asyncio.wait_for(self.engine_client.semaphore.acquire(), timeout=self.max_waiting_time)
|
||||
except Exception:
|
||||
return ErrorResponse(code=408, message=f"Request queued time exceed {self.max_waiting_time}")
|
||||
|
||||
if request.stream:
|
||||
return self.completion_stream_generator(
|
||||
request=request,
|
||||
@@ -221,6 +230,7 @@ class OpenAIServingCompletion:
|
||||
api_server_logger.error(f"Error in completion_full_generator: {e}", exc_info=True)
|
||||
raise
|
||||
finally:
|
||||
self.engine_client.semaphore.release()
|
||||
if dealer is not None:
|
||||
dealer.close()
|
||||
|
||||
@@ -371,6 +381,7 @@ class OpenAIServingCompletion:
|
||||
yield f"data: {ErrorResponse(message=str(e), code=400).model_dump_json(exclude_unset=True)}\n\n"
|
||||
finally:
|
||||
del request
|
||||
self.engine_client.semaphore.release()
|
||||
if dealer is not None:
|
||||
dealer.close()
|
||||
yield "data: [DONE]\n\n"
|
||||
|
Reference in New Issue
Block a user