mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 00:33:03 +08:00
[Feature] Setting number of apiserver workers automatically (#3794)
Co-authored-by: Jiang-Jia-Jun <jiangjiajun@baidu.com>
This commit is contained in:
@@ -589,7 +589,7 @@ class EngineSevice:
|
|||||||
else:
|
else:
|
||||||
err, data = self.zmq_server.receive_pyobj_once(block)
|
err, data = self.zmq_server.receive_pyobj_once(block)
|
||||||
if err is not None:
|
if err is not None:
|
||||||
llm_logger.error("Engine stops inserting zmq task into scheduler, err:{err}")
|
llm_logger.error(f"Engine stops inserting zmq task into scheduler, err:{err}")
|
||||||
break
|
break
|
||||||
|
|
||||||
request, insert_task = None, []
|
request, insert_task = None, []
|
||||||
|
@@ -60,6 +60,7 @@ from fastdeploy.utils import (
|
|||||||
StatefulSemaphore,
|
StatefulSemaphore,
|
||||||
api_server_logger,
|
api_server_logger,
|
||||||
console_logger,
|
console_logger,
|
||||||
|
is_package_installed,
|
||||||
is_port_available,
|
is_port_available,
|
||||||
retrive_model_from_server,
|
retrive_model_from_server,
|
||||||
)
|
)
|
||||||
@@ -67,7 +68,7 @@ from fastdeploy.utils import (
|
|||||||
parser = FlexibleArgumentParser()
|
parser = FlexibleArgumentParser()
|
||||||
parser.add_argument("--port", default=8000, type=int, help="port to the http server")
|
parser.add_argument("--port", default=8000, type=int, help="port to the http server")
|
||||||
parser.add_argument("--host", default="0.0.0.0", type=str, help="host to the http server")
|
parser.add_argument("--host", default="0.0.0.0", type=str, help="host to the http server")
|
||||||
parser.add_argument("--workers", default=1, type=int, help="number of workers")
|
parser.add_argument("--workers", default=None, type=int, help="number of workers")
|
||||||
parser.add_argument("--metrics-port", default=8001, type=int, help="port for metrics server")
|
parser.add_argument("--metrics-port", default=8001, type=int, help="port for metrics server")
|
||||||
parser.add_argument("--controller-port", default=-1, type=int, help="port for controller server")
|
parser.add_argument("--controller-port", default=-1, type=int, help="port for controller server")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@@ -82,6 +83,16 @@ parser.add_argument(
|
|||||||
)
|
)
|
||||||
parser = EngineArgs.add_cli_args(parser)
|
parser = EngineArgs.add_cli_args(parser)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
if args.workers is None:
|
||||||
|
# In GPU, the workers of uvicorn will be set according to the parameter `max-num-seqs`
|
||||||
|
if is_package_installed("paddlepaddle-gpu"):
|
||||||
|
args.workers = max(min(int(args.max_num_seqs // 32), 8), 1)
|
||||||
|
else:
|
||||||
|
args.workers = 1
|
||||||
|
console_logger.info(f"Number of api-server workers: {args.workers}.")
|
||||||
|
|
||||||
args.model = retrive_model_from_server(args.model, args.revision)
|
args.model = retrive_model_from_server(args.model, args.revision)
|
||||||
chat_template = load_chat_template(args.chat_template, args.model)
|
chat_template = load_chat_template(args.chat_template, args.model)
|
||||||
if args.tool_parser_plugin:
|
if args.tool_parser_plugin:
|
||||||
|
@@ -27,6 +27,7 @@ import sys
|
|||||||
import tarfile
|
import tarfile
|
||||||
import time
|
import time
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from importlib.metadata import PackageNotFoundError, distribution
|
||||||
from logging.handlers import BaseRotatingHandler
|
from logging.handlers import BaseRotatingHandler
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Literal, TypeVar, Union
|
from typing import Literal, TypeVar, Union
|
||||||
@@ -668,6 +669,14 @@ def import_from_path(module_name: str, file_path: Union[str, os.PathLike]):
|
|||||||
return module
|
return module
|
||||||
|
|
||||||
|
|
||||||
|
def is_package_installed(package_name):
|
||||||
|
try:
|
||||||
|
distribution(package_name)
|
||||||
|
return True
|
||||||
|
except PackageNotFoundError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def version():
|
def version():
|
||||||
"""
|
"""
|
||||||
Prints the contents of the version.txt file located in the parent directory of this script.
|
Prints the contents of the version.txt file located in the parent directory of this script.
|
||||||
|
Reference in New Issue
Block a user