mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 00:57:33 +08:00
[Sync Code] Update vs branch (#3403)
* Pre ce modified (#3335) (#3360) * Pre ce modified (#3335) * update * update * fix * fix * update * update * update * fix * update * update * update * add ut fix pr(3367) * [Bug Fix] Fix V1 video bug (#3387) * fix stopseq error info (#3342) Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> * [BugFix] Fix default log level of paddleformers (#3377) Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> * [Polish Code] Remove useless notes * feat(log):add_request_and_response_log (#3392) * Optimize CI execution workflow. (#3371) (#3384) * fix * [BugFix] fix control signal release failed (#3374) * [BugFix] * [BugFix] * [BugFix] * [BugFix] * fix * fix --------- Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com> --------- Co-authored-by: YUNSHEN XIE <1084314248@qq.com> Co-authored-by: ming1753 <61511741+ming1753@users.noreply.github.com> Co-authored-by: JYChen <zoooo0820@qq.com> Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> Co-authored-by: Jiang-Jia-Jun <jiangjiajun@baidu.com> Co-authored-by: xiaolei373 <zley373@gmail.com> Co-authored-by: ltd0924 <32387785+ltd0924@users.noreply.github.com>
This commit is contained in:
@@ -168,9 +168,9 @@ async def connection_manager():
|
||||
yield
|
||||
except asyncio.TimeoutError:
|
||||
api_server_logger.info(f"Reach max request release: {connection_semaphore.status()}")
|
||||
if connection_semaphore.locked():
|
||||
connection_semaphore.release()
|
||||
raise HTTPException(status_code=429, detail="Too many requests")
|
||||
raise HTTPException(
|
||||
status_code=429, detail=f"Too many requests, current max concurrency is {args.max_concurrency}"
|
||||
)
|
||||
|
||||
|
||||
def wrap_streaming_generator(original_generator: AsyncGenerator):
|
||||
@@ -183,7 +183,7 @@ def wrap_streaming_generator(original_generator: AsyncGenerator):
|
||||
async for chunk in original_generator:
|
||||
yield chunk
|
||||
finally:
|
||||
api_server_logger.debug(f"release: {connection_semaphore.status()}")
|
||||
api_server_logger.debug(f"current concurrency status: {connection_semaphore.status()}")
|
||||
connection_semaphore.release()
|
||||
|
||||
return wrapped_generator
|
||||
@@ -247,6 +247,7 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
||||
"""
|
||||
Create a chat completion for the provided prompt and parameters.
|
||||
"""
|
||||
api_server_logger.info(f"Chat Received request: {request.model_dump_json()}")
|
||||
if app.state.dynamic_load_weight:
|
||||
status, msg = app.state.engine_client.is_workers_alive()
|
||||
if not status:
|
||||
@@ -257,9 +258,11 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
||||
generator = await app.state.chat_handler.create_chat_completion(request)
|
||||
if isinstance(generator, ErrorResponse):
|
||||
connection_semaphore.release()
|
||||
api_server_logger.debug(f"current concurrency status: {connection_semaphore.status()}")
|
||||
return JSONResponse(content={"detail": generator.model_dump()}, status_code=generator.code)
|
||||
elif isinstance(generator, ChatCompletionResponse):
|
||||
connection_semaphore.release()
|
||||
api_server_logger.debug(f"current concurrency status: {connection_semaphore.status()}")
|
||||
return JSONResponse(content=generator.model_dump())
|
||||
else:
|
||||
wrapped_generator = wrap_streaming_generator(generator)
|
||||
@@ -275,6 +278,7 @@ async def create_completion(request: CompletionRequest):
|
||||
"""
|
||||
Create a completion for the provided prompt and parameters.
|
||||
"""
|
||||
api_server_logger.info(f"Completion Received request: {request.model_dump_json()}")
|
||||
if app.state.dynamic_load_weight:
|
||||
status, msg = app.state.engine_client.is_workers_alive()
|
||||
if not status:
|
||||
|
Reference in New Issue
Block a user