Automatically configure workers based on max-num-seqs (#3846)

Automatically configure workers based on max-num-seqs
This commit is contained in:
yyssys
2025-09-03 21:12:42 +08:00
committed by GitHub
parent 7dbd9412b0
commit abde903813

View File

@@ -60,7 +60,6 @@ from fastdeploy.utils import (
StatefulSemaphore,
api_server_logger,
console_logger,
is_package_installed,
is_port_available,
retrive_model_from_server,
)
@@ -85,11 +84,8 @@ parser = EngineArgs.add_cli_args(parser)
args = parser.parse_args()
if args.workers is None:
# In GPU, the workers of uvicorn will be set according to the parameter `max-num-seqs`
if is_package_installed("paddlepaddle-gpu"):
args.workers = max(min(int(args.max_num_seqs // 32), 8), 1)
else:
args.workers = 1
args.workers = max(min(int(args.max_num_seqs // 32), 8), 1)
console_logger.info(f"Number of api-server workers: {args.workers}.")
args.model = retrive_model_from_server(args.model, args.revision)