diff --git a/fastdeploy/__init__.py b/fastdeploy/__init__.py index edb2aa43a..ebf2eea5b 100644 --- a/fastdeploy/__init__.py +++ b/fastdeploy/__init__.py @@ -28,7 +28,7 @@ from paddleformers.utils.log import logger as pf_logger from fastdeploy.engine.sampling_params import SamplingParams from fastdeploy.entrypoints.llm import LLM -from fastdeploy.utils import envs +from fastdeploy.utils import current_package_version, envs if envs.FD_DEBUG != "1": import logging @@ -43,6 +43,8 @@ except ImportError: pass # TODO(tangbinhan): remove this code +__version__ = current_package_version() + def _patch_fastsafetensors(): try: diff --git a/fastdeploy/entrypoints/cli/main.py b/fastdeploy/entrypoints/cli/main.py index 0686e1e16..b770dc604 100644 --- a/fastdeploy/entrypoints/cli/main.py +++ b/fastdeploy/entrypoints/cli/main.py @@ -17,17 +17,19 @@ # This file is modified from https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/cli/main.py from __future__ import annotations -import importlib.metadata +from fastdeploy import __version__ def main(): import fastdeploy.entrypoints.cli.benchmark.main import fastdeploy.entrypoints.cli.openai + import fastdeploy.entrypoints.cli.serve from fastdeploy.utils import FlexibleArgumentParser CMD_MODULES = [ fastdeploy.entrypoints.cli.openai, fastdeploy.entrypoints.cli.benchmark.main, + fastdeploy.entrypoints.cli.serve, ] parser = FlexibleArgumentParser(description="FastDeploy CLI") @@ -35,7 +37,7 @@ def main(): "-v", "--version", action="version", - version=importlib.metadata.version("fastdeploy-gpu"), + version=__version__, ) subparsers = parser.add_subparsers(required=False, dest="subparser") cmds = {} diff --git a/fastdeploy/entrypoints/cli/openai.py b/fastdeploy/entrypoints/cli/openai.py index 0ab4c9ae0..7a92925ee 100644 --- a/fastdeploy/entrypoints/cli/openai.py +++ b/fastdeploy/entrypoints/cli/openai.py @@ -86,7 +86,7 @@ def _add_query_options(parser: FlexibleArgumentParser) -> FlexibleArgumentParser parser.add_argument( "--url", type=str, - default="http://localhost:9904/v1", + default="http://localhost:8000/v1", help="url of the running OpenAI-Compatible RESTful API server", ) parser.add_argument( diff --git a/fastdeploy/entrypoints/cli/serve.py b/fastdeploy/entrypoints/cli/serve.py new file mode 100644 index 000000000..ead694695 --- /dev/null +++ b/fastdeploy/entrypoints/cli/serve.py @@ -0,0 +1,84 @@ +""" +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +# This file is modified from https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/cli/serve.py + +import argparse +import atexit +import os +import signal +import subprocess +import sys + +from fastdeploy.entrypoints.cli.types import CLISubcommand +from fastdeploy.entrypoints.openai.utils import make_arg_parser +from fastdeploy.utils import FlexibleArgumentParser + + +class ServeSubcommand(CLISubcommand): + """The `serve` subcommand for the fastdeploy CLI.""" + + name = "serve" + + @staticmethod + def cmd(args: argparse.Namespace) -> None: + env = os.environ.copy() + cmd = [ + sys.executable, + "-m", + "fastdeploy.entrypoints.openai.api_server", + *sys.argv[2:], + ] + + # 启动子进程 + proc = subprocess.Popen(cmd, env=env) + print(f"Starting server (PID: {proc.pid})") + + # 定义清理函数 + def cleanup(): + """终止子进程并确保资源释放""" + if proc.poll() is None: # 检查子进程是否仍在运行 + print(f"\nTerminating child process (PID: {proc.pid})...") + proc.terminate() # 发送终止信号 + + # 注册退出时的清理函数 + atexit.register(cleanup) + # 设置信号处理 + + def signal_handler(signum, frame): + cleanup() + sys.exit(0) + + # 捕获 SIGINT (Ctrl+C) 和 SIGTERM + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + # 主进程阻塞等待子进程 + proc.wait() + + def subparser_init(self, subparsers: argparse._SubParsersAction) -> FlexibleArgumentParser: + serve_parser = subparsers.add_parser( + name=self.name, + help="Start the FastDeploy OpenAI Compatible API server.", + description="Start the FastDeploy OpenAI Compatible API server.", + usage="fastdeploy serve [model_tag] [options]", + ) + serve_parser = make_arg_parser(serve_parser) + serve_parser.add_argument("--config", help="Read CLI options from a config file. Must be a YAML file") + return serve_parser + + +def cmd_init() -> list[CLISubcommand]: + return [ServeSubcommand()] diff --git a/fastdeploy/entrypoints/openai/api_server.py b/fastdeploy/entrypoints/openai/api_server.py index 9f90fbf10..25a38b89d 100644 --- a/fastdeploy/entrypoints/openai/api_server.py +++ b/fastdeploy/entrypoints/openai/api_server.py @@ -49,7 +49,7 @@ from fastdeploy.entrypoints.openai.serving_chat import OpenAIServingChat from fastdeploy.entrypoints.openai.serving_completion import OpenAIServingCompletion from fastdeploy.entrypoints.openai.serving_models import ModelPath, OpenAIServingModels from fastdeploy.entrypoints.openai.tool_parsers import ToolParserManager -from fastdeploy.entrypoints.openai.utils import UVICORN_CONFIG +from fastdeploy.entrypoints.openai.utils import UVICORN_CONFIG, make_arg_parser from fastdeploy.metrics.metrics import ( EXCLUDE_LABELS, cleanup_prometheus_files, @@ -67,31 +67,7 @@ from fastdeploy.utils import ( retrive_model_from_server, ) -parser = FlexibleArgumentParser() -parser.add_argument("--port", default=8000, type=int, help="port to the http server") -parser.add_argument("--host", default="0.0.0.0", type=str, help="host to the http server") -parser.add_argument("--workers", default=1, type=int, help="number of workers") -parser.add_argument("--metrics-port", default=8001, type=int, help="port for metrics server") -parser.add_argument("--controller-port", default=-1, type=int, help="port for controller server") -parser.add_argument( - "--max-waiting-time", - default=-1, - type=int, - help="max waiting time for connection, if set value -1 means no waiting time limit", -) -parser.add_argument("--max-concurrency", default=512, type=int, help="max concurrency") - -parser.add_argument( - "--enable-mm-output", action="store_true", help="Enable 'multimodal_content' field in response output. " -) -parser.add_argument( - "--timeout-graceful-shutdown", - default=0, - type=int, - help="timeout for graceful shutdown in seconds (used by uvicorn)", -) - -parser = EngineArgs.add_cli_args(parser) +parser = make_arg_parser(FlexibleArgumentParser()) args = parser.parse_args() console_logger.info(f"Number of api-server workers: {args.workers}.") diff --git a/fastdeploy/entrypoints/openai/utils.py b/fastdeploy/entrypoints/openai/utils.py index 58855f91e..99212e0ee 100644 --- a/fastdeploy/entrypoints/openai/utils.py +++ b/fastdeploy/entrypoints/openai/utils.py @@ -22,7 +22,8 @@ import aiozmq import msgpack import zmq -from fastdeploy.utils import api_server_logger +from fastdeploy.engine.args_utils import EngineArgs +from fastdeploy.utils import FlexibleArgumentParser, api_server_logger UVICORN_CONFIG = { "version": 1, @@ -201,3 +202,31 @@ class DealerConnectionManager: self.request_map.clear() api_server_logger.info("All connections and tasks closed") + + +def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: + parser.add_argument("--port", default=8000, type=int, help="port to the http server") + parser.add_argument("--host", default="0.0.0.0", type=str, help="host to the http server") + parser.add_argument("--workers", default=1, type=int, help="number of workers") + parser.add_argument("--metrics-port", default=8001, type=int, help="port for metrics server") + parser.add_argument("--controller-port", default=-1, type=int, help="port for controller server") + parser.add_argument( + "--max-waiting-time", + default=-1, + type=int, + help="max waiting time for connection, if set value -1 means no waiting time limit", + ) + parser.add_argument("--max-concurrency", default=512, type=int, help="max concurrency") + + parser.add_argument( + "--enable-mm-output", action="store_true", help="Enable 'multimodal_content' field in response output. " + ) + parser.add_argument( + "--timeout-graceful-shutdown", + default=0, + type=int, + help="timeout for graceful shutdown in seconds (used by uvicorn)", + ) + + parser = EngineArgs.add_cli_args(parser) + return parser diff --git a/fastdeploy/plugins/utils.py b/fastdeploy/plugins/utils.py index e457223ac..572b1a157 100644 --- a/fastdeploy/plugins/utils.py +++ b/fastdeploy/plugins/utils.py @@ -32,7 +32,7 @@ def load_plugins_by_group(group: str) -> dict[str, Callable[[], Any]]: discovered_plugins = entry_points(group=group) if len(discovered_plugins) == 0: - logger.info("No plugins for group %s found.", group) + logger.debug("No plugins for group %s found.", group) return {} logger.info("Available plugins for group %s:", group) diff --git a/fastdeploy/utils.py b/fastdeploy/utils.py index 924d283c3..5975f1a5c 100644 --- a/fastdeploy/utils.py +++ b/fastdeploy/utils.py @@ -757,6 +757,36 @@ def version(): return content +def current_package_version(): + """ + 读取version.txt文件,解析出fastdeploy version对应的版本号 + + Args: + Returns: + str: fastdeploy版本号,如果解析失败返回Unknown + """ + fd_version = "Unknown" + try: + content = version() + if content == "Unknown": + return fd_version + + # 按行分割内容 + lines = content.strip().split("\n") + # 查找包含"fastdeploy version:"的行 + for line in lines: + if line.startswith("fastdeploy version:"): + # 提取版本号部分 + fd_version = line.split("fastdeploy version:")[1].strip() + return fd_version + llm_logger.warning("fastdeploy version not found in version.txt") + # 如果没有找到对应的行,返回None + return fd_version + except Exception as e: + llm_logger.error(f"Failed to parse fastdeploy version from version.txt: {e}") + return fd_version + + class DeprecatedOptionWarning(argparse.Action): def __init__(self, option_strings, dest, **kwargs): super().__init__(option_strings, dest, nargs=0, **kwargs) diff --git a/setup.py b/setup.py index 6c79b6826..41cf71e26 100644 --- a/setup.py +++ b/setup.py @@ -190,6 +190,16 @@ cmdclass_dict["build_ext"] = CMakeBuild FASTDEPLOY_VERSION = os.environ.get("FASTDEPLOY_VERSION", "2.3.0-dev") cmdclass_dict["build_optl"] = PostInstallCommand + +def write_version_to_file(): + current_dir = os.path.dirname(os.path.abspath(__file__)) + version_file_path = os.path.join(current_dir, "fastdeploy/version.txt") + with open(version_file_path, "a") as f: + f.write(f"fastdeploy version: {FASTDEPLOY_VERSION}\n") + + +write_version_to_file() + setup( name=get_name(), version=FASTDEPLOY_VERSION, diff --git a/tests/entrypoints/cli/test_main.py b/tests/entrypoints/cli/test_main.py index 4b82ecba6..787d3d035 100644 --- a/tests/entrypoints/cli/test_main.py +++ b/tests/entrypoints/cli/test_main.py @@ -6,10 +6,8 @@ from fastdeploy.entrypoints.cli.main import main as cli_main class TestCliMain(unittest.TestCase): @patch("fastdeploy.utils.FlexibleArgumentParser") - @patch("fastdeploy.entrypoints.cli.main.importlib.metadata") - def test_main_basic(self, mock_metadata, mock_parser): + def test_main_basic(self, mock_parser): # Setup mocks - mock_metadata.version.return_value = "1.0.0" mock_args = MagicMock() mock_args.subparser = None mock_parser.return_value.parse_args.return_value = mock_args @@ -18,7 +16,6 @@ class TestCliMain(unittest.TestCase): cli_main() # Verify version check - mock_metadata.version.assert_called_once_with("fastdeploy-gpu") mock_args.dispatch_function.assert_called_once() diff --git a/tests/entrypoints/cli/test_serve.py b/tests/entrypoints/cli/test_serve.py new file mode 100644 index 000000000..9c3235183 --- /dev/null +++ b/tests/entrypoints/cli/test_serve.py @@ -0,0 +1,46 @@ +import argparse +import unittest +from unittest.mock import MagicMock, patch + +from fastdeploy.entrypoints.cli.serve import ServeSubcommand, cmd_init + + +class TestServeSubcommand(unittest.TestCase): + """Tests for ServeSubcommand class.""" + + def test_name_property(self): + """Test the name property is correctly set.""" + self.assertEqual(ServeSubcommand.name, "serve") + + @patch("subprocess.Popen", return_value=MagicMock()) + def test_cmd_method(self, mock_subprocess): + """Test the cmd method calls the expected API server functions.""" + test_args = argparse.Namespace(port=8000) + mock_subprocess.return_value.pid = 1 + ServeSubcommand.cmd(test_args) + mock_subprocess.assert_called_once() + + def test_validate_method(self): + """Test the validate method does nothing (no-op).""" + test_args = argparse.Namespace() + instance = ServeSubcommand() + instance.validate(test_args) # Should not raise any exceptions + + @patch("argparse._SubParsersAction.add_parser") + def test_subparser_init(self, mock_add_parser): + """Test the subparser initialization.""" + mock_subparsers = MagicMock() + instance = ServeSubcommand() + result = instance.subparser_init(mock_subparsers) + self.assertIsNotNone(result) + + def test_cmd_init_returns_list(self): + """Test cmd_init returns a list of subcommands.""" + result = cmd_init() + self.assertIsInstance(result, list) + self.assertEqual(len(result), 1) + self.assertIsInstance(result[0], ServeSubcommand) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/utils/test_version.py b/tests/utils/test_version.py index b5ea2f4a7..ddbd28992 100644 --- a/tests/utils/test_version.py +++ b/tests/utils/test_version.py @@ -1,6 +1,8 @@ import unittest +from unittest.mock import patch import fastdeploy +from fastdeploy.utils import current_package_version class TestVersion(unittest.TestCase): @@ -8,6 +10,24 @@ class TestVersion(unittest.TestCase): ver = fastdeploy.version() assert ver.count("COMMIT") > 0 + @patch("fastdeploy.utils.version") + def test_normal_version(self, mock_version): + """测试正常版本号解析""" + mock_version.return_value = "fastdeploy version: 1.0.0\nother info" + self.assertEqual(current_package_version(), "1.0.0") + + @patch("fastdeploy.utils.version") + def test_unknown_version(self, mock_version): + """测试version返回Unknown的情况""" + mock_version.return_value = "Unknown" + self.assertEqual(current_package_version(), "Unknown") + + @patch("fastdeploy.utils.version") + def test_no_version_line(self, mock_version): + """测试找不到版本行的情况""" + mock_version.return_value = "some other content" + self.assertEqual(current_package_version(), "Unknown") + if __name__ == "__main__": unittest.main()