Automatically configure workers based on max-num-seqs (#3846)

Automatically configure workers based on max-num-seqs
2025-10-05 16:48:03 +08:00 · 2025-09-03 21:12:42 +08:00
parent 7dbd9412b0
commit abde903813
1 changed files with 2 additions and 6 deletions
--- a/fastdeploy/entrypoints/openai/api_server.py
+++ b/fastdeploy/entrypoints/openai/api_server.py
@@ -60,7 +60,6 @@ from fastdeploy.utils import (
    StatefulSemaphore,
    api_server_logger,
    console_logger,
-    is_package_installed,
    is_port_available,
    retrive_model_from_server,
 )
@@ -85,11 +84,8 @@ parser = EngineArgs.add_cli_args(parser)
 args = parser.parse_args()

 if args.workers is None:
-    # In GPU, the workers of uvicorn will be set according to the parameter `max-num-seqs`
-    if is_package_installed("paddlepaddle-gpu"):
-        args.workers = max(min(int(args.max_num_seqs // 32), 8), 1)
-    else:
-        args.workers = 1
+    args.workers = max(min(int(args.max_num_seqs // 32), 8), 1)
+
 console_logger.info(f"Number of api-server workers: {args.workers}.")

 args.model = retrive_model_from_server(args.model, args.revision)