[Feature] add dealer manager to reuse the connection (#3471)

* [BugFix] fix control signal release failed

* [BugFix] fix control signal release failed

* update

* update

* update

* [Feature] add dealer manager to reuse the connection

* fix

* fix

* fix

* fix

* fix

* fix

* Create test_dealer_connection_manager.py

* Delete test/entrypoints/openai directory

* Update test_dealer_connection_manager.py

* Update test_dealer_connection_manager.py
This commit is contained in:
ltd0924
2025-08-21 13:11:13 +08:00
committed by GitHub
parent 985b1265c3
commit 51f68ae593
7 changed files with 360 additions and 25 deletions

View File

@@ -20,10 +20,7 @@ import traceback
import uuid
from typing import List, Optional
import aiozmq
import msgpack
import numpy as np
from aiozmq import zmq
from fastdeploy.entrypoints.openai.protocol import (
ChatCompletionRequest,
@@ -62,6 +59,12 @@ class OpenAIServingChat:
else:
self.master_ip = self.master_ip.split(",")[0]
async def _ensure_connection_manager(self):
"""ensure connection manager initialized"""
if not self.engine_client.connection_initialized:
await self.engine_client.connection_manager.initialize()
self.engine_client.connection_initialized = True
def _check_master(self):
if self.master_ip is None:
return True
@@ -180,14 +183,16 @@ class OpenAIServingChat:
choices=[],
model=model_name,
)
try:
dealer = await aiozmq.create_zmq_stream(zmq.DEALER, connect=f"ipc:///dev/shm/router_{self.pid}.ipc")
await self._ensure_connection_manager()
dealer, response_queue = await self.engine_client.connection_manager.get_connection(request_id)
dealer.write([b"", request_id.encode("utf-8")])
choices = []
current_waiting_time = 0
while num_choices > 0:
try:
raw_data = await asyncio.wait_for(dealer.read(), timeout=10)
response = await asyncio.wait_for(response_queue.get(), timeout=10)
current_waiting_time = 0
except asyncio.TimeoutError:
current_waiting_time += 10
@@ -202,7 +207,6 @@ class OpenAIServingChat:
current_waiting_time = 0
await asyncio.sleep(0.01)
continue
response = msgpack.unpackb(raw_data[-1])
for res in response:
if res.get("error_code", 200) != 200:
raise ValueError("{}".format(res["error_msg"]))
@@ -353,9 +357,9 @@ class OpenAIServingChat:
)
yield f"data: {error_data}\n\n"
finally:
dealer.close()
await self.engine_client.connection_manager.cleanup_request(request_id)
self.engine_client.semaphore.release()
api_server_logger.info(f"release {self.engine_client.semaphore.status()}")
api_server_logger.info(f"release {request_id} {self.engine_client.semaphore.status()}")
yield "data: [DONE]\n\n"
async def chat_completion_full_generator(
@@ -378,7 +382,8 @@ class OpenAIServingChat:
include_stop_str_in_output = request.include_stop_str_in_output
try:
dealer = await aiozmq.create_zmq_stream(zmq.DEALER, connect=f"ipc:///dev/shm/router_{self.pid}.ipc")
await self._ensure_connection_manager()
dealer, response_queue = await self.engine_client.connection_manager.get_connection(request_id)
dealer.write([b"", request_id.encode("utf-8")])
final_res = None
previous_num_tokens = 0
@@ -387,7 +392,7 @@ class OpenAIServingChat:
completion_token_ids = []
while True:
try:
raw_data = await asyncio.wait_for(dealer.read(), timeout=10)
response = await asyncio.wait_for(response_queue.get(), timeout=10)
current_waiting_time = 0
except asyncio.TimeoutError:
current_waiting_time += 10
@@ -400,7 +405,6 @@ class OpenAIServingChat:
await asyncio.sleep(0.1)
continue
response = msgpack.unpackb(raw_data[-1])
task_is_finished = False
for data in response:
if data.get("error_code", 200) != 200:
@@ -430,7 +434,7 @@ class OpenAIServingChat:
if task_is_finished:
break
finally:
dealer.close()
await self.engine_client.connection_manager.cleanup_request(request_id)
self.engine_client.semaphore.release()
api_server_logger.info(f"release {self.engine_client.semaphore.status()}")