[BugFix] fix control signal release failed (#3390)

* [BugFix] fix control signal release failed

* [BugFix] fix control signal release failed

* update

* update

* update
This commit is contained in:
ltd0924
2025-08-19 13:51:38 +08:00
committed by GitHub
parent 8b12c80f90
commit bca8905b40
4 changed files with 58 additions and 48 deletions

View File

@@ -175,10 +175,10 @@ async def connection_manager():
await asyncio.wait_for(connection_semaphore.acquire(), timeout=0.001)
yield
except asyncio.TimeoutError:
api_server_logger.info(f"Reach max request release: {connection_semaphore.status()}")
if connection_semaphore.locked():
connection_semaphore.release()
raise HTTPException(status_code=429, detail="Too many requests")
api_server_logger.info(f"Reach max request concurrency, semaphore status: {connection_semaphore.status()}")
raise HTTPException(
status_code=429, detail=f"Too many requests,current max concurrency is {args.max_concurrency}"
)
# TODO 传递真实引擎值 通过pid 获取状态
@@ -265,9 +265,11 @@ async def create_chat_completion(request: ChatCompletionRequest):
inject_to_metadata(request)
generator = await app.state.chat_handler.create_chat_completion(request)
if isinstance(generator, ErrorResponse):
api_server_logger.debug(f"release: {connection_semaphore.status()}")
connection_semaphore.release()
return JSONResponse(content={"detail": generator.model_dump()}, status_code=generator.code)
elif isinstance(generator, ChatCompletionResponse):
api_server_logger.debug(f"release: {connection_semaphore.status()}")
connection_semaphore.release()
return JSONResponse(content=generator.model_dump())
else: