[Feature] add cli command serve (#4226)

This commit is contained in:
memoryCoderC
2025-09-24 14:50:45 +08:00
committed by GitHub
parent 9566ae8827
commit 8b0ce8e3ab
12 changed files with 232 additions and 36 deletions

View File

@@ -28,7 +28,7 @@ from paddleformers.utils.log import logger as pf_logger
from fastdeploy.engine.sampling_params import SamplingParams from fastdeploy.engine.sampling_params import SamplingParams
from fastdeploy.entrypoints.llm import LLM from fastdeploy.entrypoints.llm import LLM
from fastdeploy.utils import envs from fastdeploy.utils import current_package_version, envs
if envs.FD_DEBUG != "1": if envs.FD_DEBUG != "1":
import logging import logging
@@ -43,6 +43,8 @@ except ImportError:
pass pass
# TODO(tangbinhan): remove this code # TODO(tangbinhan): remove this code
__version__ = current_package_version()
def _patch_fastsafetensors(): def _patch_fastsafetensors():
try: try:

View File

@@ -17,17 +17,19 @@
# This file is modified from https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/cli/main.py # This file is modified from https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/cli/main.py
from __future__ import annotations from __future__ import annotations
import importlib.metadata from fastdeploy import __version__
def main(): def main():
import fastdeploy.entrypoints.cli.benchmark.main import fastdeploy.entrypoints.cli.benchmark.main
import fastdeploy.entrypoints.cli.openai import fastdeploy.entrypoints.cli.openai
import fastdeploy.entrypoints.cli.serve
from fastdeploy.utils import FlexibleArgumentParser from fastdeploy.utils import FlexibleArgumentParser
CMD_MODULES = [ CMD_MODULES = [
fastdeploy.entrypoints.cli.openai, fastdeploy.entrypoints.cli.openai,
fastdeploy.entrypoints.cli.benchmark.main, fastdeploy.entrypoints.cli.benchmark.main,
fastdeploy.entrypoints.cli.serve,
] ]
parser = FlexibleArgumentParser(description="FastDeploy CLI") parser = FlexibleArgumentParser(description="FastDeploy CLI")
@@ -35,7 +37,7 @@ def main():
"-v", "-v",
"--version", "--version",
action="version", action="version",
version=importlib.metadata.version("fastdeploy-gpu"), version=__version__,
) )
subparsers = parser.add_subparsers(required=False, dest="subparser") subparsers = parser.add_subparsers(required=False, dest="subparser")
cmds = {} cmds = {}

View File

@@ -86,7 +86,7 @@ def _add_query_options(parser: FlexibleArgumentParser) -> FlexibleArgumentParser
parser.add_argument( parser.add_argument(
"--url", "--url",
type=str, type=str,
default="http://localhost:9904/v1", default="http://localhost:8000/v1",
help="url of the running OpenAI-Compatible RESTful API server", help="url of the running OpenAI-Compatible RESTful API server",
) )
parser.add_argument( parser.add_argument(

View File

@@ -0,0 +1,84 @@
"""
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
# This file is modified from https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/cli/serve.py
import argparse
import atexit
import os
import signal
import subprocess
import sys
from fastdeploy.entrypoints.cli.types import CLISubcommand
from fastdeploy.entrypoints.openai.utils import make_arg_parser
from fastdeploy.utils import FlexibleArgumentParser
class ServeSubcommand(CLISubcommand):
"""The `serve` subcommand for the fastdeploy CLI."""
name = "serve"
@staticmethod
def cmd(args: argparse.Namespace) -> None:
env = os.environ.copy()
cmd = [
sys.executable,
"-m",
"fastdeploy.entrypoints.openai.api_server",
*sys.argv[2:],
]
# 启动子进程
proc = subprocess.Popen(cmd, env=env)
print(f"Starting server (PID: {proc.pid})")
# 定义清理函数
def cleanup():
"""终止子进程并确保资源释放"""
if proc.poll() is None: # 检查子进程是否仍在运行
print(f"\nTerminating child process (PID: {proc.pid})...")
proc.terminate() # 发送终止信号
# 注册退出时的清理函数
atexit.register(cleanup)
# 设置信号处理
def signal_handler(signum, frame):
cleanup()
sys.exit(0)
# 捕获 SIGINT (Ctrl+C) 和 SIGTERM
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
# 主进程阻塞等待子进程
proc.wait()
def subparser_init(self, subparsers: argparse._SubParsersAction) -> FlexibleArgumentParser:
serve_parser = subparsers.add_parser(
name=self.name,
help="Start the FastDeploy OpenAI Compatible API server.",
description="Start the FastDeploy OpenAI Compatible API server.",
usage="fastdeploy serve [model_tag] [options]",
)
serve_parser = make_arg_parser(serve_parser)
serve_parser.add_argument("--config", help="Read CLI options from a config file. Must be a YAML file")
return serve_parser
def cmd_init() -> list[CLISubcommand]:
return [ServeSubcommand()]

View File

@@ -49,7 +49,7 @@ from fastdeploy.entrypoints.openai.serving_chat import OpenAIServingChat
from fastdeploy.entrypoints.openai.serving_completion import OpenAIServingCompletion from fastdeploy.entrypoints.openai.serving_completion import OpenAIServingCompletion
from fastdeploy.entrypoints.openai.serving_models import ModelPath, OpenAIServingModels from fastdeploy.entrypoints.openai.serving_models import ModelPath, OpenAIServingModels
from fastdeploy.entrypoints.openai.tool_parsers import ToolParserManager from fastdeploy.entrypoints.openai.tool_parsers import ToolParserManager
from fastdeploy.entrypoints.openai.utils import UVICORN_CONFIG from fastdeploy.entrypoints.openai.utils import UVICORN_CONFIG, make_arg_parser
from fastdeploy.metrics.metrics import ( from fastdeploy.metrics.metrics import (
EXCLUDE_LABELS, EXCLUDE_LABELS,
cleanup_prometheus_files, cleanup_prometheus_files,
@@ -67,31 +67,7 @@ from fastdeploy.utils import (
retrive_model_from_server, retrive_model_from_server,
) )
parser = FlexibleArgumentParser() parser = make_arg_parser(FlexibleArgumentParser())
parser.add_argument("--port", default=8000, type=int, help="port to the http server")
parser.add_argument("--host", default="0.0.0.0", type=str, help="host to the http server")
parser.add_argument("--workers", default=1, type=int, help="number of workers")
parser.add_argument("--metrics-port", default=8001, type=int, help="port for metrics server")
parser.add_argument("--controller-port", default=-1, type=int, help="port for controller server")
parser.add_argument(
"--max-waiting-time",
default=-1,
type=int,
help="max waiting time for connection, if set value -1 means no waiting time limit",
)
parser.add_argument("--max-concurrency", default=512, type=int, help="max concurrency")
parser.add_argument(
"--enable-mm-output", action="store_true", help="Enable 'multimodal_content' field in response output. "
)
parser.add_argument(
"--timeout-graceful-shutdown",
default=0,
type=int,
help="timeout for graceful shutdown in seconds (used by uvicorn)",
)
parser = EngineArgs.add_cli_args(parser)
args = parser.parse_args() args = parser.parse_args()
console_logger.info(f"Number of api-server workers: {args.workers}.") console_logger.info(f"Number of api-server workers: {args.workers}.")

View File

@@ -22,7 +22,8 @@ import aiozmq
import msgpack import msgpack
import zmq import zmq
from fastdeploy.utils import api_server_logger from fastdeploy.engine.args_utils import EngineArgs
from fastdeploy.utils import FlexibleArgumentParser, api_server_logger
UVICORN_CONFIG = { UVICORN_CONFIG = {
"version": 1, "version": 1,
@@ -201,3 +202,31 @@ class DealerConnectionManager:
self.request_map.clear() self.request_map.clear()
api_server_logger.info("All connections and tasks closed") api_server_logger.info("All connections and tasks closed")
def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
parser.add_argument("--port", default=8000, type=int, help="port to the http server")
parser.add_argument("--host", default="0.0.0.0", type=str, help="host to the http server")
parser.add_argument("--workers", default=1, type=int, help="number of workers")
parser.add_argument("--metrics-port", default=8001, type=int, help="port for metrics server")
parser.add_argument("--controller-port", default=-1, type=int, help="port for controller server")
parser.add_argument(
"--max-waiting-time",
default=-1,
type=int,
help="max waiting time for connection, if set value -1 means no waiting time limit",
)
parser.add_argument("--max-concurrency", default=512, type=int, help="max concurrency")
parser.add_argument(
"--enable-mm-output", action="store_true", help="Enable 'multimodal_content' field in response output. "
)
parser.add_argument(
"--timeout-graceful-shutdown",
default=0,
type=int,
help="timeout for graceful shutdown in seconds (used by uvicorn)",
)
parser = EngineArgs.add_cli_args(parser)
return parser

View File

@@ -32,7 +32,7 @@ def load_plugins_by_group(group: str) -> dict[str, Callable[[], Any]]:
discovered_plugins = entry_points(group=group) discovered_plugins = entry_points(group=group)
if len(discovered_plugins) == 0: if len(discovered_plugins) == 0:
logger.info("No plugins for group %s found.", group) logger.debug("No plugins for group %s found.", group)
return {} return {}
logger.info("Available plugins for group %s:", group) logger.info("Available plugins for group %s:", group)

View File

@@ -757,6 +757,36 @@ def version():
return content return content
def current_package_version():
"""
读取version.txt文件,解析出fastdeploy version对应的版本号
Args:
Returns:
str: fastdeploy版本号,如果解析失败返回Unknown
"""
fd_version = "Unknown"
try:
content = version()
if content == "Unknown":
return fd_version
# 按行分割内容
lines = content.strip().split("\n")
# 查找包含"fastdeploy version:"的行
for line in lines:
if line.startswith("fastdeploy version:"):
# 提取版本号部分
fd_version = line.split("fastdeploy version:")[1].strip()
return fd_version
llm_logger.warning("fastdeploy version not found in version.txt")
# 如果没有找到对应的行返回None
return fd_version
except Exception as e:
llm_logger.error(f"Failed to parse fastdeploy version from version.txt: {e}")
return fd_version
class DeprecatedOptionWarning(argparse.Action): class DeprecatedOptionWarning(argparse.Action):
def __init__(self, option_strings, dest, **kwargs): def __init__(self, option_strings, dest, **kwargs):
super().__init__(option_strings, dest, nargs=0, **kwargs) super().__init__(option_strings, dest, nargs=0, **kwargs)

View File

@@ -190,6 +190,16 @@ cmdclass_dict["build_ext"] = CMakeBuild
FASTDEPLOY_VERSION = os.environ.get("FASTDEPLOY_VERSION", "2.3.0-dev") FASTDEPLOY_VERSION = os.environ.get("FASTDEPLOY_VERSION", "2.3.0-dev")
cmdclass_dict["build_optl"] = PostInstallCommand cmdclass_dict["build_optl"] = PostInstallCommand
def write_version_to_file():
current_dir = os.path.dirname(os.path.abspath(__file__))
version_file_path = os.path.join(current_dir, "fastdeploy/version.txt")
with open(version_file_path, "a") as f:
f.write(f"fastdeploy version: {FASTDEPLOY_VERSION}\n")
write_version_to_file()
setup( setup(
name=get_name(), name=get_name(),
version=FASTDEPLOY_VERSION, version=FASTDEPLOY_VERSION,

View File

@@ -6,10 +6,8 @@ from fastdeploy.entrypoints.cli.main import main as cli_main
class TestCliMain(unittest.TestCase): class TestCliMain(unittest.TestCase):
@patch("fastdeploy.utils.FlexibleArgumentParser") @patch("fastdeploy.utils.FlexibleArgumentParser")
@patch("fastdeploy.entrypoints.cli.main.importlib.metadata") def test_main_basic(self, mock_parser):
def test_main_basic(self, mock_metadata, mock_parser):
# Setup mocks # Setup mocks
mock_metadata.version.return_value = "1.0.0"
mock_args = MagicMock() mock_args = MagicMock()
mock_args.subparser = None mock_args.subparser = None
mock_parser.return_value.parse_args.return_value = mock_args mock_parser.return_value.parse_args.return_value = mock_args
@@ -18,7 +16,6 @@ class TestCliMain(unittest.TestCase):
cli_main() cli_main()
# Verify version check # Verify version check
mock_metadata.version.assert_called_once_with("fastdeploy-gpu")
mock_args.dispatch_function.assert_called_once() mock_args.dispatch_function.assert_called_once()

View File

@@ -0,0 +1,46 @@
import argparse
import unittest
from unittest.mock import MagicMock, patch
from fastdeploy.entrypoints.cli.serve import ServeSubcommand, cmd_init
class TestServeSubcommand(unittest.TestCase):
"""Tests for ServeSubcommand class."""
def test_name_property(self):
"""Test the name property is correctly set."""
self.assertEqual(ServeSubcommand.name, "serve")
@patch("subprocess.Popen", return_value=MagicMock())
def test_cmd_method(self, mock_subprocess):
"""Test the cmd method calls the expected API server functions."""
test_args = argparse.Namespace(port=8000)
mock_subprocess.return_value.pid = 1
ServeSubcommand.cmd(test_args)
mock_subprocess.assert_called_once()
def test_validate_method(self):
"""Test the validate method does nothing (no-op)."""
test_args = argparse.Namespace()
instance = ServeSubcommand()
instance.validate(test_args) # Should not raise any exceptions
@patch("argparse._SubParsersAction.add_parser")
def test_subparser_init(self, mock_add_parser):
"""Test the subparser initialization."""
mock_subparsers = MagicMock()
instance = ServeSubcommand()
result = instance.subparser_init(mock_subparsers)
self.assertIsNotNone(result)
def test_cmd_init_returns_list(self):
"""Test cmd_init returns a list of subcommands."""
result = cmd_init()
self.assertIsInstance(result, list)
self.assertEqual(len(result), 1)
self.assertIsInstance(result[0], ServeSubcommand)
if __name__ == "__main__":
unittest.main()

View File

@@ -1,6 +1,8 @@
import unittest import unittest
from unittest.mock import patch
import fastdeploy import fastdeploy
from fastdeploy.utils import current_package_version
class TestVersion(unittest.TestCase): class TestVersion(unittest.TestCase):
@@ -8,6 +10,24 @@ class TestVersion(unittest.TestCase):
ver = fastdeploy.version() ver = fastdeploy.version()
assert ver.count("COMMIT") > 0 assert ver.count("COMMIT") > 0
@patch("fastdeploy.utils.version")
def test_normal_version(self, mock_version):
"""测试正常版本号解析"""
mock_version.return_value = "fastdeploy version: 1.0.0\nother info"
self.assertEqual(current_package_version(), "1.0.0")
@patch("fastdeploy.utils.version")
def test_unknown_version(self, mock_version):
"""测试version返回Unknown的情况"""
mock_version.return_value = "Unknown"
self.assertEqual(current_package_version(), "Unknown")
@patch("fastdeploy.utils.version")
def test_no_version_line(self, mock_version):
"""测试找不到版本行的情况"""
mock_version.return_value = "some other content"
self.assertEqual(current_package_version(), "Unknown")
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()