[Feature] add dealer manager to reuse the connection (#3471)

* [BugFix] fix control signal release failed

* [BugFix] fix control signal release failed

* update

* update

* update

* [Feature] add dealer manager to reuse the connection

* fix

* fix

* fix

* fix

* fix

* fix

* Create test_dealer_connection_manager.py

* Delete test/entrypoints/openai directory

* Update test_dealer_connection_manager.py

* Update test_dealer_connection_manager.py
This commit is contained in:
ltd0924
2025-08-21 13:11:13 +08:00
committed by GitHub
parent 985b1265c3
commit 51f68ae593
7 changed files with 360 additions and 25 deletions

View File

@@ -20,10 +20,7 @@ import traceback
import uuid
from typing import List, Optional
import aiozmq
import msgpack
import numpy as np
from aiozmq import zmq
from fastdeploy.engine.request import RequestOutput
from fastdeploy.entrypoints.openai.protocol import (
@@ -53,6 +50,12 @@ class OpenAIServingCompletion:
else:
self.master_ip = self.master_ip.split(",")[0]
async def _ensure_connection_manager(self):
"""ensure connection manager initialized"""
if not self.engine_client.connection_initialized:
await self.engine_client.connection_manager.initialize()
self.engine_client.connection_initialized = True
def _check_master(self):
if self.master_ip is None:
return True
@@ -185,7 +188,10 @@ class OpenAIServingCompletion:
try:
request_ids = [f"{request_id}-{i}" for i in range(num_choices)]
# create dealer
dealer = await aiozmq.create_zmq_stream(zmq.DEALER, connect=f"ipc:///dev/shm/router_{self.pid}.ipc")
await self._ensure_connection_manager()
dealer, response_queue = await self.engine_client.connection_manager.get_connection(
request_id, num_choices
)
for rid in request_ids:
dealer.write([b"", rid.encode("utf-8")])
@@ -198,7 +204,7 @@ class OpenAIServingCompletion:
current_waiting_time = 0
while num_choices > 0:
try:
raw_data = await asyncio.wait_for(dealer.read(), timeout=10)
response = await asyncio.wait_for(response_queue.get(), timeout=10)
current_waiting_time = 0
except asyncio.TimeoutError:
current_waiting_time += 10
@@ -210,7 +216,7 @@ class OpenAIServingCompletion:
current_waiting_time = 0
await asyncio.sleep(0.1)
continue
response = msgpack.unpackb(raw_data[-1])
for data in response:
rid = int(data["request_id"].split("-")[-1])
if data.get("error_code", 200) != 200:
@@ -255,7 +261,7 @@ class OpenAIServingCompletion:
finally:
self.engine_client.semaphore.release()
if dealer is not None:
dealer.close()
await self.engine_client.connection_manager.cleanup_request(request_id)
async def _echo_back_prompt(self, request, res, idx):
if res["outputs"].get("send_idx", -1) == 0 and request.echo:
@@ -288,7 +294,10 @@ class OpenAIServingCompletion:
Process the stream completion request.
"""
try:
dealer = await aiozmq.create_zmq_stream(zmq.DEALER, connect=f"ipc:///dev/shm/router_{self.pid}.ipc")
await self._ensure_connection_manager()
dealer, response_queue = await self.engine_client.connection_manager.get_connection(
request_id, num_choices
)
for i in range(num_choices):
req_id = f"{request_id}-{i}"
@@ -312,7 +321,7 @@ class OpenAIServingCompletion:
current_waiting_time = 0
while num_choices > 0:
try:
raw_data = await asyncio.wait_for(dealer.read(), timeout=10)
response = await asyncio.wait_for(response_queue.get(), timeout=10)
current_waiting_time = 0
except asyncio.TimeoutError:
current_waiting_time += 10
@@ -325,7 +334,6 @@ class OpenAIServingCompletion:
await asyncio.sleep(0.1)
continue
response = msgpack.unpackb(raw_data[-1])
for res in response:
idx = int(res["request_id"].split("-")[-1])
if res.get("error_code", 200) != 200:
@@ -453,9 +461,9 @@ class OpenAIServingCompletion:
yield f"data: {ErrorResponse(message=str(e), code=400).model_dump_json(exclude_unset=True)}\n\n"
finally:
del request
self.engine_client.semaphore.release()
if dealer is not None:
dealer.close()
await self.engine_client.connection_manager.cleanup_request(request_id)
self.engine_client.semaphore.release()
yield "data: [DONE]\n\n"
def request_output_to_completion_response(