mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[Feature] add cli command serve (#4226)
This commit is contained in:
@@ -28,7 +28,7 @@ from paddleformers.utils.log import logger as pf_logger
|
|||||||
|
|
||||||
from fastdeploy.engine.sampling_params import SamplingParams
|
from fastdeploy.engine.sampling_params import SamplingParams
|
||||||
from fastdeploy.entrypoints.llm import LLM
|
from fastdeploy.entrypoints.llm import LLM
|
||||||
from fastdeploy.utils import envs
|
from fastdeploy.utils import current_package_version, envs
|
||||||
|
|
||||||
if envs.FD_DEBUG != "1":
|
if envs.FD_DEBUG != "1":
|
||||||
import logging
|
import logging
|
||||||
@@ -43,6 +43,8 @@ except ImportError:
|
|||||||
pass
|
pass
|
||||||
# TODO(tangbinhan): remove this code
|
# TODO(tangbinhan): remove this code
|
||||||
|
|
||||||
|
__version__ = current_package_version()
|
||||||
|
|
||||||
|
|
||||||
def _patch_fastsafetensors():
|
def _patch_fastsafetensors():
|
||||||
try:
|
try:
|
||||||
|
@@ -17,17 +17,19 @@
|
|||||||
# This file is modified from https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/cli/main.py
|
# This file is modified from https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/cli/main.py
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import importlib.metadata
|
from fastdeploy import __version__
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
import fastdeploy.entrypoints.cli.benchmark.main
|
import fastdeploy.entrypoints.cli.benchmark.main
|
||||||
import fastdeploy.entrypoints.cli.openai
|
import fastdeploy.entrypoints.cli.openai
|
||||||
|
import fastdeploy.entrypoints.cli.serve
|
||||||
from fastdeploy.utils import FlexibleArgumentParser
|
from fastdeploy.utils import FlexibleArgumentParser
|
||||||
|
|
||||||
CMD_MODULES = [
|
CMD_MODULES = [
|
||||||
fastdeploy.entrypoints.cli.openai,
|
fastdeploy.entrypoints.cli.openai,
|
||||||
fastdeploy.entrypoints.cli.benchmark.main,
|
fastdeploy.entrypoints.cli.benchmark.main,
|
||||||
|
fastdeploy.entrypoints.cli.serve,
|
||||||
]
|
]
|
||||||
|
|
||||||
parser = FlexibleArgumentParser(description="FastDeploy CLI")
|
parser = FlexibleArgumentParser(description="FastDeploy CLI")
|
||||||
@@ -35,7 +37,7 @@ def main():
|
|||||||
"-v",
|
"-v",
|
||||||
"--version",
|
"--version",
|
||||||
action="version",
|
action="version",
|
||||||
version=importlib.metadata.version("fastdeploy-gpu"),
|
version=__version__,
|
||||||
)
|
)
|
||||||
subparsers = parser.add_subparsers(required=False, dest="subparser")
|
subparsers = parser.add_subparsers(required=False, dest="subparser")
|
||||||
cmds = {}
|
cmds = {}
|
||||||
|
@@ -86,7 +86,7 @@ def _add_query_options(parser: FlexibleArgumentParser) -> FlexibleArgumentParser
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--url",
|
"--url",
|
||||||
type=str,
|
type=str,
|
||||||
default="http://localhost:9904/v1",
|
default="http://localhost:8000/v1",
|
||||||
help="url of the running OpenAI-Compatible RESTful API server",
|
help="url of the running OpenAI-Compatible RESTful API server",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
84
fastdeploy/entrypoints/cli/serve.py
Normal file
84
fastdeploy/entrypoints/cli/serve.py
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
"""
|
||||||
|
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# This file is modified from https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/cli/serve.py
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import atexit
|
||||||
|
import os
|
||||||
|
import signal
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from fastdeploy.entrypoints.cli.types import CLISubcommand
|
||||||
|
from fastdeploy.entrypoints.openai.utils import make_arg_parser
|
||||||
|
from fastdeploy.utils import FlexibleArgumentParser
|
||||||
|
|
||||||
|
|
||||||
|
class ServeSubcommand(CLISubcommand):
|
||||||
|
"""The `serve` subcommand for the fastdeploy CLI."""
|
||||||
|
|
||||||
|
name = "serve"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def cmd(args: argparse.Namespace) -> None:
|
||||||
|
env = os.environ.copy()
|
||||||
|
cmd = [
|
||||||
|
sys.executable,
|
||||||
|
"-m",
|
||||||
|
"fastdeploy.entrypoints.openai.api_server",
|
||||||
|
*sys.argv[2:],
|
||||||
|
]
|
||||||
|
|
||||||
|
# 启动子进程
|
||||||
|
proc = subprocess.Popen(cmd, env=env)
|
||||||
|
print(f"Starting server (PID: {proc.pid})")
|
||||||
|
|
||||||
|
# 定义清理函数
|
||||||
|
def cleanup():
|
||||||
|
"""终止子进程并确保资源释放"""
|
||||||
|
if proc.poll() is None: # 检查子进程是否仍在运行
|
||||||
|
print(f"\nTerminating child process (PID: {proc.pid})...")
|
||||||
|
proc.terminate() # 发送终止信号
|
||||||
|
|
||||||
|
# 注册退出时的清理函数
|
||||||
|
atexit.register(cleanup)
|
||||||
|
# 设置信号处理
|
||||||
|
|
||||||
|
def signal_handler(signum, frame):
|
||||||
|
cleanup()
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
# 捕获 SIGINT (Ctrl+C) 和 SIGTERM
|
||||||
|
signal.signal(signal.SIGINT, signal_handler)
|
||||||
|
signal.signal(signal.SIGTERM, signal_handler)
|
||||||
|
# 主进程阻塞等待子进程
|
||||||
|
proc.wait()
|
||||||
|
|
||||||
|
def subparser_init(self, subparsers: argparse._SubParsersAction) -> FlexibleArgumentParser:
|
||||||
|
serve_parser = subparsers.add_parser(
|
||||||
|
name=self.name,
|
||||||
|
help="Start the FastDeploy OpenAI Compatible API server.",
|
||||||
|
description="Start the FastDeploy OpenAI Compatible API server.",
|
||||||
|
usage="fastdeploy serve [model_tag] [options]",
|
||||||
|
)
|
||||||
|
serve_parser = make_arg_parser(serve_parser)
|
||||||
|
serve_parser.add_argument("--config", help="Read CLI options from a config file. Must be a YAML file")
|
||||||
|
return serve_parser
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_init() -> list[CLISubcommand]:
|
||||||
|
return [ServeSubcommand()]
|
@@ -49,7 +49,7 @@ from fastdeploy.entrypoints.openai.serving_chat import OpenAIServingChat
|
|||||||
from fastdeploy.entrypoints.openai.serving_completion import OpenAIServingCompletion
|
from fastdeploy.entrypoints.openai.serving_completion import OpenAIServingCompletion
|
||||||
from fastdeploy.entrypoints.openai.serving_models import ModelPath, OpenAIServingModels
|
from fastdeploy.entrypoints.openai.serving_models import ModelPath, OpenAIServingModels
|
||||||
from fastdeploy.entrypoints.openai.tool_parsers import ToolParserManager
|
from fastdeploy.entrypoints.openai.tool_parsers import ToolParserManager
|
||||||
from fastdeploy.entrypoints.openai.utils import UVICORN_CONFIG
|
from fastdeploy.entrypoints.openai.utils import UVICORN_CONFIG, make_arg_parser
|
||||||
from fastdeploy.metrics.metrics import (
|
from fastdeploy.metrics.metrics import (
|
||||||
EXCLUDE_LABELS,
|
EXCLUDE_LABELS,
|
||||||
cleanup_prometheus_files,
|
cleanup_prometheus_files,
|
||||||
@@ -67,31 +67,7 @@ from fastdeploy.utils import (
|
|||||||
retrive_model_from_server,
|
retrive_model_from_server,
|
||||||
)
|
)
|
||||||
|
|
||||||
parser = FlexibleArgumentParser()
|
parser = make_arg_parser(FlexibleArgumentParser())
|
||||||
parser.add_argument("--port", default=8000, type=int, help="port to the http server")
|
|
||||||
parser.add_argument("--host", default="0.0.0.0", type=str, help="host to the http server")
|
|
||||||
parser.add_argument("--workers", default=1, type=int, help="number of workers")
|
|
||||||
parser.add_argument("--metrics-port", default=8001, type=int, help="port for metrics server")
|
|
||||||
parser.add_argument("--controller-port", default=-1, type=int, help="port for controller server")
|
|
||||||
parser.add_argument(
|
|
||||||
"--max-waiting-time",
|
|
||||||
default=-1,
|
|
||||||
type=int,
|
|
||||||
help="max waiting time for connection, if set value -1 means no waiting time limit",
|
|
||||||
)
|
|
||||||
parser.add_argument("--max-concurrency", default=512, type=int, help="max concurrency")
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"--enable-mm-output", action="store_true", help="Enable 'multimodal_content' field in response output. "
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--timeout-graceful-shutdown",
|
|
||||||
default=0,
|
|
||||||
type=int,
|
|
||||||
help="timeout for graceful shutdown in seconds (used by uvicorn)",
|
|
||||||
)
|
|
||||||
|
|
||||||
parser = EngineArgs.add_cli_args(parser)
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
console_logger.info(f"Number of api-server workers: {args.workers}.")
|
console_logger.info(f"Number of api-server workers: {args.workers}.")
|
||||||
|
@@ -22,7 +22,8 @@ import aiozmq
|
|||||||
import msgpack
|
import msgpack
|
||||||
import zmq
|
import zmq
|
||||||
|
|
||||||
from fastdeploy.utils import api_server_logger
|
from fastdeploy.engine.args_utils import EngineArgs
|
||||||
|
from fastdeploy.utils import FlexibleArgumentParser, api_server_logger
|
||||||
|
|
||||||
UVICORN_CONFIG = {
|
UVICORN_CONFIG = {
|
||||||
"version": 1,
|
"version": 1,
|
||||||
@@ -201,3 +202,31 @@ class DealerConnectionManager:
|
|||||||
self.request_map.clear()
|
self.request_map.clear()
|
||||||
|
|
||||||
api_server_logger.info("All connections and tasks closed")
|
api_server_logger.info("All connections and tasks closed")
|
||||||
|
|
||||||
|
|
||||||
|
def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
|
||||||
|
parser.add_argument("--port", default=8000, type=int, help="port to the http server")
|
||||||
|
parser.add_argument("--host", default="0.0.0.0", type=str, help="host to the http server")
|
||||||
|
parser.add_argument("--workers", default=1, type=int, help="number of workers")
|
||||||
|
parser.add_argument("--metrics-port", default=8001, type=int, help="port for metrics server")
|
||||||
|
parser.add_argument("--controller-port", default=-1, type=int, help="port for controller server")
|
||||||
|
parser.add_argument(
|
||||||
|
"--max-waiting-time",
|
||||||
|
default=-1,
|
||||||
|
type=int,
|
||||||
|
help="max waiting time for connection, if set value -1 means no waiting time limit",
|
||||||
|
)
|
||||||
|
parser.add_argument("--max-concurrency", default=512, type=int, help="max concurrency")
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--enable-mm-output", action="store_true", help="Enable 'multimodal_content' field in response output. "
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--timeout-graceful-shutdown",
|
||||||
|
default=0,
|
||||||
|
type=int,
|
||||||
|
help="timeout for graceful shutdown in seconds (used by uvicorn)",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser = EngineArgs.add_cli_args(parser)
|
||||||
|
return parser
|
||||||
|
@@ -32,7 +32,7 @@ def load_plugins_by_group(group: str) -> dict[str, Callable[[], Any]]:
|
|||||||
|
|
||||||
discovered_plugins = entry_points(group=group)
|
discovered_plugins = entry_points(group=group)
|
||||||
if len(discovered_plugins) == 0:
|
if len(discovered_plugins) == 0:
|
||||||
logger.info("No plugins for group %s found.", group)
|
logger.debug("No plugins for group %s found.", group)
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
logger.info("Available plugins for group %s:", group)
|
logger.info("Available plugins for group %s:", group)
|
||||||
|
@@ -757,6 +757,36 @@ def version():
|
|||||||
return content
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
def current_package_version():
|
||||||
|
"""
|
||||||
|
读取version.txt文件,解析出fastdeploy version对应的版本号
|
||||||
|
|
||||||
|
Args:
|
||||||
|
Returns:
|
||||||
|
str: fastdeploy版本号,如果解析失败返回Unknown
|
||||||
|
"""
|
||||||
|
fd_version = "Unknown"
|
||||||
|
try:
|
||||||
|
content = version()
|
||||||
|
if content == "Unknown":
|
||||||
|
return fd_version
|
||||||
|
|
||||||
|
# 按行分割内容
|
||||||
|
lines = content.strip().split("\n")
|
||||||
|
# 查找包含"fastdeploy version:"的行
|
||||||
|
for line in lines:
|
||||||
|
if line.startswith("fastdeploy version:"):
|
||||||
|
# 提取版本号部分
|
||||||
|
fd_version = line.split("fastdeploy version:")[1].strip()
|
||||||
|
return fd_version
|
||||||
|
llm_logger.warning("fastdeploy version not found in version.txt")
|
||||||
|
# 如果没有找到对应的行,返回None
|
||||||
|
return fd_version
|
||||||
|
except Exception as e:
|
||||||
|
llm_logger.error(f"Failed to parse fastdeploy version from version.txt: {e}")
|
||||||
|
return fd_version
|
||||||
|
|
||||||
|
|
||||||
class DeprecatedOptionWarning(argparse.Action):
|
class DeprecatedOptionWarning(argparse.Action):
|
||||||
def __init__(self, option_strings, dest, **kwargs):
|
def __init__(self, option_strings, dest, **kwargs):
|
||||||
super().__init__(option_strings, dest, nargs=0, **kwargs)
|
super().__init__(option_strings, dest, nargs=0, **kwargs)
|
||||||
|
10
setup.py
10
setup.py
@@ -190,6 +190,16 @@ cmdclass_dict["build_ext"] = CMakeBuild
|
|||||||
FASTDEPLOY_VERSION = os.environ.get("FASTDEPLOY_VERSION", "2.3.0-dev")
|
FASTDEPLOY_VERSION = os.environ.get("FASTDEPLOY_VERSION", "2.3.0-dev")
|
||||||
cmdclass_dict["build_optl"] = PostInstallCommand
|
cmdclass_dict["build_optl"] = PostInstallCommand
|
||||||
|
|
||||||
|
|
||||||
|
def write_version_to_file():
|
||||||
|
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
version_file_path = os.path.join(current_dir, "fastdeploy/version.txt")
|
||||||
|
with open(version_file_path, "a") as f:
|
||||||
|
f.write(f"fastdeploy version: {FASTDEPLOY_VERSION}\n")
|
||||||
|
|
||||||
|
|
||||||
|
write_version_to_file()
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name=get_name(),
|
name=get_name(),
|
||||||
version=FASTDEPLOY_VERSION,
|
version=FASTDEPLOY_VERSION,
|
||||||
|
@@ -6,10 +6,8 @@ from fastdeploy.entrypoints.cli.main import main as cli_main
|
|||||||
|
|
||||||
class TestCliMain(unittest.TestCase):
|
class TestCliMain(unittest.TestCase):
|
||||||
@patch("fastdeploy.utils.FlexibleArgumentParser")
|
@patch("fastdeploy.utils.FlexibleArgumentParser")
|
||||||
@patch("fastdeploy.entrypoints.cli.main.importlib.metadata")
|
def test_main_basic(self, mock_parser):
|
||||||
def test_main_basic(self, mock_metadata, mock_parser):
|
|
||||||
# Setup mocks
|
# Setup mocks
|
||||||
mock_metadata.version.return_value = "1.0.0"
|
|
||||||
mock_args = MagicMock()
|
mock_args = MagicMock()
|
||||||
mock_args.subparser = None
|
mock_args.subparser = None
|
||||||
mock_parser.return_value.parse_args.return_value = mock_args
|
mock_parser.return_value.parse_args.return_value = mock_args
|
||||||
@@ -18,7 +16,6 @@ class TestCliMain(unittest.TestCase):
|
|||||||
cli_main()
|
cli_main()
|
||||||
|
|
||||||
# Verify version check
|
# Verify version check
|
||||||
mock_metadata.version.assert_called_once_with("fastdeploy-gpu")
|
|
||||||
mock_args.dispatch_function.assert_called_once()
|
mock_args.dispatch_function.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
46
tests/entrypoints/cli/test_serve.py
Normal file
46
tests/entrypoints/cli/test_serve.py
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
import argparse
|
||||||
|
import unittest
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
from fastdeploy.entrypoints.cli.serve import ServeSubcommand, cmd_init
|
||||||
|
|
||||||
|
|
||||||
|
class TestServeSubcommand(unittest.TestCase):
|
||||||
|
"""Tests for ServeSubcommand class."""
|
||||||
|
|
||||||
|
def test_name_property(self):
|
||||||
|
"""Test the name property is correctly set."""
|
||||||
|
self.assertEqual(ServeSubcommand.name, "serve")
|
||||||
|
|
||||||
|
@patch("subprocess.Popen", return_value=MagicMock())
|
||||||
|
def test_cmd_method(self, mock_subprocess):
|
||||||
|
"""Test the cmd method calls the expected API server functions."""
|
||||||
|
test_args = argparse.Namespace(port=8000)
|
||||||
|
mock_subprocess.return_value.pid = 1
|
||||||
|
ServeSubcommand.cmd(test_args)
|
||||||
|
mock_subprocess.assert_called_once()
|
||||||
|
|
||||||
|
def test_validate_method(self):
|
||||||
|
"""Test the validate method does nothing (no-op)."""
|
||||||
|
test_args = argparse.Namespace()
|
||||||
|
instance = ServeSubcommand()
|
||||||
|
instance.validate(test_args) # Should not raise any exceptions
|
||||||
|
|
||||||
|
@patch("argparse._SubParsersAction.add_parser")
|
||||||
|
def test_subparser_init(self, mock_add_parser):
|
||||||
|
"""Test the subparser initialization."""
|
||||||
|
mock_subparsers = MagicMock()
|
||||||
|
instance = ServeSubcommand()
|
||||||
|
result = instance.subparser_init(mock_subparsers)
|
||||||
|
self.assertIsNotNone(result)
|
||||||
|
|
||||||
|
def test_cmd_init_returns_list(self):
|
||||||
|
"""Test cmd_init returns a list of subcommands."""
|
||||||
|
result = cmd_init()
|
||||||
|
self.assertIsInstance(result, list)
|
||||||
|
self.assertEqual(len(result), 1)
|
||||||
|
self.assertIsInstance(result[0], ServeSubcommand)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
@@ -1,6 +1,8 @@
|
|||||||
import unittest
|
import unittest
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
import fastdeploy
|
import fastdeploy
|
||||||
|
from fastdeploy.utils import current_package_version
|
||||||
|
|
||||||
|
|
||||||
class TestVersion(unittest.TestCase):
|
class TestVersion(unittest.TestCase):
|
||||||
@@ -8,6 +10,24 @@ class TestVersion(unittest.TestCase):
|
|||||||
ver = fastdeploy.version()
|
ver = fastdeploy.version()
|
||||||
assert ver.count("COMMIT") > 0
|
assert ver.count("COMMIT") > 0
|
||||||
|
|
||||||
|
@patch("fastdeploy.utils.version")
|
||||||
|
def test_normal_version(self, mock_version):
|
||||||
|
"""测试正常版本号解析"""
|
||||||
|
mock_version.return_value = "fastdeploy version: 1.0.0\nother info"
|
||||||
|
self.assertEqual(current_package_version(), "1.0.0")
|
||||||
|
|
||||||
|
@patch("fastdeploy.utils.version")
|
||||||
|
def test_unknown_version(self, mock_version):
|
||||||
|
"""测试version返回Unknown的情况"""
|
||||||
|
mock_version.return_value = "Unknown"
|
||||||
|
self.assertEqual(current_package_version(), "Unknown")
|
||||||
|
|
||||||
|
@patch("fastdeploy.utils.version")
|
||||||
|
def test_no_version_line(self, mock_version):
|
||||||
|
"""测试找不到版本行的情况"""
|
||||||
|
mock_version.return_value = "some other content"
|
||||||
|
self.assertEqual(current_package_version(), "Unknown")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
Reference in New Issue
Block a user