diff --git a/fastdeploy/entrypoints/openai/api_server.py b/fastdeploy/entrypoints/openai/api_server.py index da0e8bfc6..02962278f 100644 --- a/fastdeploy/entrypoints/openai/api_server.py +++ b/fastdeploy/entrypoints/openai/api_server.py @@ -182,6 +182,7 @@ async def lifespan(app: FastAPI): workers=args.workers, tool_parser=args.tool_call_parser, ) + await engine_client.connection_manager.initialize() app.state.dynamic_load_weight = args.dynamic_load_weight model_handler = OpenAIServingModels( model_paths, diff --git a/fastdeploy/entrypoints/openai/serving_chat.py b/fastdeploy/entrypoints/openai/serving_chat.py index 440f0571e..d8356ce52 100644 --- a/fastdeploy/entrypoints/openai/serving_chat.py +++ b/fastdeploy/entrypoints/openai/serving_chat.py @@ -74,12 +74,6 @@ class OpenAIServingChat: self.master_ip = "0.0.0.0" api_server_logger.info(f"master ip: {self.master_ip}") - async def _ensure_connection_manager(self): - """ensure connection manager initialized""" - if not self.engine_client.connection_initialized: - await self.engine_client.connection_manager.initialize() - self.engine_client.connection_initialized = True - def _check_master(self): return self.engine_client.is_master @@ -206,7 +200,6 @@ class OpenAIServingChat: api_server_logger.info(f"create chat completion request: {request_id}") try: - await self._ensure_connection_manager() dealer, response_queue = await self.engine_client.connection_manager.get_connection(request_id) dealer.write([b"", request_id.encode("utf-8")]) choices = [] @@ -419,7 +412,6 @@ class OpenAIServingChat: include_stop_str_in_output = request.include_stop_str_in_output try: - await self._ensure_connection_manager() dealer, response_queue = await self.engine_client.connection_manager.get_connection(request_id) dealer.write([b"", request_id.encode("utf-8")]) final_res = None diff --git a/fastdeploy/entrypoints/openai/serving_completion.py b/fastdeploy/entrypoints/openai/serving_completion.py index 7eb322535..ba81afc35 100644 --- a/fastdeploy/entrypoints/openai/serving_completion.py +++ b/fastdeploy/entrypoints/openai/serving_completion.py @@ -51,12 +51,6 @@ class OpenAIServingCompletion: else: self.master_ip = "0.0.0.0" - async def _ensure_connection_manager(self): - """ensure connection manager initialized""" - if not self.engine_client.connection_initialized: - await self.engine_client.connection_manager.initialize() - self.engine_client.connection_initialized = True - def _check_master(self): return self.engine_client.is_master @@ -208,7 +202,6 @@ class OpenAIServingCompletion: try: request_ids = [f"{request_id}-{i}" for i in range(num_choices)] # create dealer - await self._ensure_connection_manager() dealer, response_queue = await self.engine_client.connection_manager.get_connection( request_id, num_choices ) @@ -314,7 +307,6 @@ class OpenAIServingCompletion: Process the stream completion request. """ try: - await self._ensure_connection_manager() dealer, response_queue = await self.engine_client.connection_manager.get_connection( request_id, num_choices )