mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 16:22:57 +08:00
[Feature] add dealer manager to reuse the connection (#3471)
* [BugFix] fix control signal release failed * [BugFix] fix control signal release failed * update * update * update * [Feature] add dealer manager to reuse the connection * fix * fix * fix * fix * fix * fix * Create test_dealer_connection_manager.py * Delete test/entrypoints/openai directory * Update test_dealer_connection_manager.py * Update test_dealer_connection_manager.py
This commit is contained in:
@@ -20,10 +20,7 @@ import traceback
|
||||
import uuid
|
||||
from typing import List, Optional
|
||||
|
||||
import aiozmq
|
||||
import msgpack
|
||||
import numpy as np
|
||||
from aiozmq import zmq
|
||||
|
||||
from fastdeploy.entrypoints.openai.protocol import (
|
||||
ChatCompletionRequest,
|
||||
@@ -62,6 +59,12 @@ class OpenAIServingChat:
|
||||
else:
|
||||
self.master_ip = self.master_ip.split(",")[0]
|
||||
|
||||
async def _ensure_connection_manager(self):
|
||||
"""ensure connection manager initialized"""
|
||||
if not self.engine_client.connection_initialized:
|
||||
await self.engine_client.connection_manager.initialize()
|
||||
self.engine_client.connection_initialized = True
|
||||
|
||||
def _check_master(self):
|
||||
if self.master_ip is None:
|
||||
return True
|
||||
@@ -180,14 +183,16 @@ class OpenAIServingChat:
|
||||
choices=[],
|
||||
model=model_name,
|
||||
)
|
||||
|
||||
try:
|
||||
dealer = await aiozmq.create_zmq_stream(zmq.DEALER, connect=f"ipc:///dev/shm/router_{self.pid}.ipc")
|
||||
await self._ensure_connection_manager()
|
||||
dealer, response_queue = await self.engine_client.connection_manager.get_connection(request_id)
|
||||
dealer.write([b"", request_id.encode("utf-8")])
|
||||
choices = []
|
||||
current_waiting_time = 0
|
||||
while num_choices > 0:
|
||||
try:
|
||||
raw_data = await asyncio.wait_for(dealer.read(), timeout=10)
|
||||
response = await asyncio.wait_for(response_queue.get(), timeout=10)
|
||||
current_waiting_time = 0
|
||||
except asyncio.TimeoutError:
|
||||
current_waiting_time += 10
|
||||
@@ -202,7 +207,6 @@ class OpenAIServingChat:
|
||||
current_waiting_time = 0
|
||||
await asyncio.sleep(0.01)
|
||||
continue
|
||||
response = msgpack.unpackb(raw_data[-1])
|
||||
for res in response:
|
||||
if res.get("error_code", 200) != 200:
|
||||
raise ValueError("{}".format(res["error_msg"]))
|
||||
@@ -353,9 +357,9 @@ class OpenAIServingChat:
|
||||
)
|
||||
yield f"data: {error_data}\n\n"
|
||||
finally:
|
||||
dealer.close()
|
||||
await self.engine_client.connection_manager.cleanup_request(request_id)
|
||||
self.engine_client.semaphore.release()
|
||||
api_server_logger.info(f"release {self.engine_client.semaphore.status()}")
|
||||
api_server_logger.info(f"release {request_id} {self.engine_client.semaphore.status()}")
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
async def chat_completion_full_generator(
|
||||
@@ -378,7 +382,8 @@ class OpenAIServingChat:
|
||||
include_stop_str_in_output = request.include_stop_str_in_output
|
||||
|
||||
try:
|
||||
dealer = await aiozmq.create_zmq_stream(zmq.DEALER, connect=f"ipc:///dev/shm/router_{self.pid}.ipc")
|
||||
await self._ensure_connection_manager()
|
||||
dealer, response_queue = await self.engine_client.connection_manager.get_connection(request_id)
|
||||
dealer.write([b"", request_id.encode("utf-8")])
|
||||
final_res = None
|
||||
previous_num_tokens = 0
|
||||
@@ -387,7 +392,7 @@ class OpenAIServingChat:
|
||||
completion_token_ids = []
|
||||
while True:
|
||||
try:
|
||||
raw_data = await asyncio.wait_for(dealer.read(), timeout=10)
|
||||
response = await asyncio.wait_for(response_queue.get(), timeout=10)
|
||||
current_waiting_time = 0
|
||||
except asyncio.TimeoutError:
|
||||
current_waiting_time += 10
|
||||
@@ -400,7 +405,6 @@ class OpenAIServingChat:
|
||||
await asyncio.sleep(0.1)
|
||||
continue
|
||||
|
||||
response = msgpack.unpackb(raw_data[-1])
|
||||
task_is_finished = False
|
||||
for data in response:
|
||||
if data.get("error_code", 200) != 200:
|
||||
@@ -430,7 +434,7 @@ class OpenAIServingChat:
|
||||
if task_is_finished:
|
||||
break
|
||||
finally:
|
||||
dealer.close()
|
||||
await self.engine_client.connection_manager.cleanup_request(request_id)
|
||||
self.engine_client.semaphore.release()
|
||||
api_server_logger.info(f"release {self.engine_client.semaphore.status()}")
|
||||
|
||||
|
Reference in New Issue
Block a user