mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
[Feature] add dealer manager to reuse the connection (#3471)
* [BugFix] fix control signal release failed * [BugFix] fix control signal release failed * update * update * update * [Feature] add dealer manager to reuse the connection * fix * fix * fix * fix * fix * fix * Create test_dealer_connection_manager.py * Delete test/entrypoints/openai directory * Update test_dealer_connection_manager.py * Update test_dealer_connection_manager.py
This commit is contained in:
@@ -20,10 +20,7 @@ import traceback
|
||||
import uuid
|
||||
from typing import List, Optional
|
||||
|
||||
import aiozmq
|
||||
import msgpack
|
||||
import numpy as np
|
||||
from aiozmq import zmq
|
||||
|
||||
from fastdeploy.engine.request import RequestOutput
|
||||
from fastdeploy.entrypoints.openai.protocol import (
|
||||
@@ -53,6 +50,12 @@ class OpenAIServingCompletion:
|
||||
else:
|
||||
self.master_ip = self.master_ip.split(",")[0]
|
||||
|
||||
async def _ensure_connection_manager(self):
|
||||
"""ensure connection manager initialized"""
|
||||
if not self.engine_client.connection_initialized:
|
||||
await self.engine_client.connection_manager.initialize()
|
||||
self.engine_client.connection_initialized = True
|
||||
|
||||
def _check_master(self):
|
||||
if self.master_ip is None:
|
||||
return True
|
||||
@@ -185,7 +188,10 @@ class OpenAIServingCompletion:
|
||||
try:
|
||||
request_ids = [f"{request_id}-{i}" for i in range(num_choices)]
|
||||
# create dealer
|
||||
dealer = await aiozmq.create_zmq_stream(zmq.DEALER, connect=f"ipc:///dev/shm/router_{self.pid}.ipc")
|
||||
await self._ensure_connection_manager()
|
||||
dealer, response_queue = await self.engine_client.connection_manager.get_connection(
|
||||
request_id, num_choices
|
||||
)
|
||||
|
||||
for rid in request_ids:
|
||||
dealer.write([b"", rid.encode("utf-8")])
|
||||
@@ -198,7 +204,7 @@ class OpenAIServingCompletion:
|
||||
current_waiting_time = 0
|
||||
while num_choices > 0:
|
||||
try:
|
||||
raw_data = await asyncio.wait_for(dealer.read(), timeout=10)
|
||||
response = await asyncio.wait_for(response_queue.get(), timeout=10)
|
||||
current_waiting_time = 0
|
||||
except asyncio.TimeoutError:
|
||||
current_waiting_time += 10
|
||||
@@ -210,7 +216,7 @@ class OpenAIServingCompletion:
|
||||
current_waiting_time = 0
|
||||
await asyncio.sleep(0.1)
|
||||
continue
|
||||
response = msgpack.unpackb(raw_data[-1])
|
||||
|
||||
for data in response:
|
||||
rid = int(data["request_id"].split("-")[-1])
|
||||
if data.get("error_code", 200) != 200:
|
||||
@@ -255,7 +261,7 @@ class OpenAIServingCompletion:
|
||||
finally:
|
||||
self.engine_client.semaphore.release()
|
||||
if dealer is not None:
|
||||
dealer.close()
|
||||
await self.engine_client.connection_manager.cleanup_request(request_id)
|
||||
|
||||
async def _echo_back_prompt(self, request, res, idx):
|
||||
if res["outputs"].get("send_idx", -1) == 0 and request.echo:
|
||||
@@ -288,7 +294,10 @@ class OpenAIServingCompletion:
|
||||
Process the stream completion request.
|
||||
"""
|
||||
try:
|
||||
dealer = await aiozmq.create_zmq_stream(zmq.DEALER, connect=f"ipc:///dev/shm/router_{self.pid}.ipc")
|
||||
await self._ensure_connection_manager()
|
||||
dealer, response_queue = await self.engine_client.connection_manager.get_connection(
|
||||
request_id, num_choices
|
||||
)
|
||||
|
||||
for i in range(num_choices):
|
||||
req_id = f"{request_id}-{i}"
|
||||
@@ -312,7 +321,7 @@ class OpenAIServingCompletion:
|
||||
current_waiting_time = 0
|
||||
while num_choices > 0:
|
||||
try:
|
||||
raw_data = await asyncio.wait_for(dealer.read(), timeout=10)
|
||||
response = await asyncio.wait_for(response_queue.get(), timeout=10)
|
||||
current_waiting_time = 0
|
||||
except asyncio.TimeoutError:
|
||||
current_waiting_time += 10
|
||||
@@ -325,7 +334,6 @@ class OpenAIServingCompletion:
|
||||
await asyncio.sleep(0.1)
|
||||
continue
|
||||
|
||||
response = msgpack.unpackb(raw_data[-1])
|
||||
for res in response:
|
||||
idx = int(res["request_id"].split("-")[-1])
|
||||
if res.get("error_code", 200) != 200:
|
||||
@@ -453,9 +461,9 @@ class OpenAIServingCompletion:
|
||||
yield f"data: {ErrorResponse(message=str(e), code=400).model_dump_json(exclude_unset=True)}\n\n"
|
||||
finally:
|
||||
del request
|
||||
self.engine_client.semaphore.release()
|
||||
if dealer is not None:
|
||||
dealer.close()
|
||||
await self.engine_client.connection_manager.cleanup_request(request_id)
|
||||
self.engine_client.semaphore.release()
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
def request_output_to_completion_response(
|
||||
|
Reference in New Issue
Block a user