diff --git a/fastdeploy/engine/common_engine.py b/fastdeploy/engine/common_engine.py index 62442d8cb..41c12a2d6 100644 --- a/fastdeploy/engine/common_engine.py +++ b/fastdeploy/engine/common_engine.py @@ -589,7 +589,7 @@ class EngineSevice: else: err, data = self.zmq_server.receive_pyobj_once(block) if err is not None: - llm_logger.error("Engine stops inserting zmq task into scheduler, err:{err}") + llm_logger.error(f"Engine stops inserting zmq task into scheduler, err:{err}") break request, insert_task = None, [] diff --git a/fastdeploy/entrypoints/openai/api_server.py b/fastdeploy/entrypoints/openai/api_server.py index 4764ad9c7..16d6e735b 100644 --- a/fastdeploy/entrypoints/openai/api_server.py +++ b/fastdeploy/entrypoints/openai/api_server.py @@ -60,6 +60,7 @@ from fastdeploy.utils import ( StatefulSemaphore, api_server_logger, console_logger, + is_package_installed, is_port_available, retrive_model_from_server, ) @@ -67,7 +68,7 @@ from fastdeploy.utils import ( parser = FlexibleArgumentParser() parser.add_argument("--port", default=8000, type=int, help="port to the http server") parser.add_argument("--host", default="0.0.0.0", type=str, help="host to the http server") -parser.add_argument("--workers", default=1, type=int, help="number of workers") +parser.add_argument("--workers", default=None, type=int, help="number of workers") parser.add_argument("--metrics-port", default=8001, type=int, help="port for metrics server") parser.add_argument("--controller-port", default=-1, type=int, help="port for controller server") parser.add_argument( @@ -82,6 +83,15 @@ parser.add_argument( ) parser = EngineArgs.add_cli_args(parser) args = parser.parse_args() + +if args.workers is None: + # In GPU, the workers of uvicorn will be set according to the parameter `max-num-seqs` + if is_package_installed("paddlepaddle-gpu"): + args.workers = max(min(int(args.max_num_seqs // 32), 8), 1) + else: + args.workers = 1 +console_logger.info(f"Number of api-server workers: {args.workers}.") + args.model = retrive_model_from_server(args.model, args.revision) chat_template = load_chat_template(args.chat_template, args.model) if args.tool_parser_plugin: diff --git a/fastdeploy/utils.py b/fastdeploy/utils.py index ecefd87af..2d939fa41 100644 --- a/fastdeploy/utils.py +++ b/fastdeploy/utils.py @@ -27,6 +27,7 @@ import sys import tarfile import time from datetime import datetime +from importlib.metadata import PackageNotFoundError, distribution from logging.handlers import BaseRotatingHandler from pathlib import Path from typing import Literal, TypeVar, Union @@ -668,6 +669,14 @@ def import_from_path(module_name: str, file_path: Union[str, os.PathLike]): return module +def is_package_installed(package_name): + try: + distribution(package_name) + return True + except PackageNotFoundError: + return False + + def version(): """ Prints the contents of the version.txt file located in the parent directory of this script.