[Feature] add cli command serve (#4226)

This commit is contained in:
memoryCoderC
2025-09-24 14:50:45 +08:00
committed by GitHub
parent 9566ae8827
commit 8b0ce8e3ab
12 changed files with 232 additions and 36 deletions

View File

@@ -22,7 +22,8 @@ import aiozmq
import msgpack
import zmq
from fastdeploy.utils import api_server_logger
from fastdeploy.engine.args_utils import EngineArgs
from fastdeploy.utils import FlexibleArgumentParser, api_server_logger
UVICORN_CONFIG = {
"version": 1,
@@ -201,3 +202,31 @@ class DealerConnectionManager:
self.request_map.clear()
api_server_logger.info("All connections and tasks closed")
def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
parser.add_argument("--port", default=8000, type=int, help="port to the http server")
parser.add_argument("--host", default="0.0.0.0", type=str, help="host to the http server")
parser.add_argument("--workers", default=1, type=int, help="number of workers")
parser.add_argument("--metrics-port", default=8001, type=int, help="port for metrics server")
parser.add_argument("--controller-port", default=-1, type=int, help="port for controller server")
parser.add_argument(
"--max-waiting-time",
default=-1,
type=int,
help="max waiting time for connection, if set value -1 means no waiting time limit",
)
parser.add_argument("--max-concurrency", default=512, type=int, help="max concurrency")
parser.add_argument(
"--enable-mm-output", action="store_true", help="Enable 'multimodal_content' field in response output. "
)
parser.add_argument(
"--timeout-graceful-shutdown",
default=0,
type=int,
help="timeout for graceful shutdown in seconds (used by uvicorn)",
)
parser = EngineArgs.add_cli_args(parser)
return parser