# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import argparse import unittest from unittest import IsolatedAsyncioTestCase from unittest.mock import AsyncMock, MagicMock, patch import pytest from fastdeploy.benchmarks.serve import ( BenchmarkMetrics, add_cli_args, benchmark, calculate_metrics, check_goodput_args, convert_to_pytorch_benchmark_format, get_request, save_to_pytorch_benchmark_format, write_to_json, ) class TestServe(IsolatedAsyncioTestCase): def test_add_cli_args(self): parser = argparse.ArgumentParser() add_cli_args(parser) args = parser.parse_args(["--model", "test_model"]) self.assertEqual(args.backend, "openai-chat") self.assertEqual(args.host, "127.0.0.1") self.assertEqual(args.port, 8000) self.assertEqual(args.model, "test_model") def test_benchmark_metrics_init(self): metrics = BenchmarkMetrics( completed=10, total_input=100, total_output=200, request_throughput=5.0, request_goodput=4.0, output_throughput=10.0, total_token_throughput=15.0, mean_s_decode=0.5, median_s_decode=0.5, std_s_decode=0.1, percentiles_s_decode=[(99, 0.6)], mean_ttft_ms=100.0, median_ttft_ms=100.0, std_ttft_ms=10.0, percentiles_ttft_ms=[(99, 110.0)], mean_s_ttft_ms=90.0, median_s_ttft_ms=90.0, std_s_ttft_ms=9.0, percentiles_s_ttft_ms=[(99, 100.0)], mean_tpot_ms=50.0, median_tpot_ms=50.0, std_tpot_ms=5.0, percentiles_tpot_ms=[(99, 60.0)], mean_itl_ms=20.0, median_itl_ms=20.0, std_itl_ms=2.0, percentiles_itl_ms=[(99, 25.0)], mean_s_itl_ms=18.0, median_s_itl_ms=18.0, std_s_itl_ms=1.8, percentiles_s_itl_ms=[(99, 20.0)], mean_e2el_ms=500.0, median_e2el_ms=500.0, std_e2el_ms=50.0, percentiles_e2el_ms=[(99, 600.0)], mean_s_e2el_ms=450.0, median_s_e2el_ms=450.0, std_s_e2el_ms=45.0, percentiles_s_e2el_ms=[(99, 500.0)], mean_input_len=10.0, median_input_len=10.0, std_input_len=1.0, percentiles_input_len=[(99, 12.0)], mean_s_input_len=9.0, median_s_input_len=9.0, std_s_input_len=0.9, percentiles_s_input_len=[(99, 10.0)], mean_output_len=20.0, median_output_len=20.0, std_output_len=2.0, percentiles_output_len=[(99, 25.0)], ) self.assertEqual(metrics.completed, 10) self.assertEqual(metrics.total_input, 100) self.assertEqual(metrics.total_output, 200) def test_calculate_metrics(self): from fastdeploy.benchmarks.datasets import SampleRequest from fastdeploy.benchmarks.lib.endpoint_request_func import RequestFuncOutput input_requests = [ SampleRequest(no=1, prompt="test1", prompt_len=10, expected_output_len=20, history_QA=[], json_data=None) ] outputs = [ RequestFuncOutput( success=True, prompt_len=10, prompt_tokens=10, output_tokens=20, ttft=0.1, itl=[0.02, 0.02, 0.02], latency=0.5, arrival_time=[0, 0.1, 0.12, 0.14, 0.16], generated_text="test output", reasoning_content=None, error=None, ) ] metrics, _ = calculate_metrics( input_requests=input_requests, outputs=outputs, dur_s=1.0, selected_percentiles=[99], goodput_config_dict={}, ) self.assertEqual(metrics.completed, 1) self.assertEqual(metrics.total_input, 10) self.assertEqual(metrics.total_output, 20) @pytest.mark.asyncio @patch("fastdeploy.benchmarks.serve.get_request") @patch("asyncio.gather", new_callable=AsyncMock) async def test_benchmark(self, mock_gather, mock_get_request): # 直接在测试中设置ASYNC_REQUEST_FUNCS from fastdeploy.benchmarks.serve import ASYNC_REQUEST_FUNCS mock_func = AsyncMock() ASYNC_REQUEST_FUNCS["test_backend"] = mock_func from fastdeploy.benchmarks.datasets import SampleRequest # 创建一个异步生成器函数来模拟get_request async def mock_request_gen(): yield SampleRequest( no=1, prompt="test", prompt_len=10, expected_output_len=20, history_QA=[], json_data=None ) mock_get_request.return_value = mock_request_gen() mock_func.return_value = MagicMock( success=True, prompt_len=10, prompt_tokens=10, output_tokens=20, ttft=0.1, itl=[0.02, 0.02, 0.02], latency=0.5, arrival_time=[0, 0.1, 0.12, 0.14, 0.16], generated_text="test output", reasoning_content=None, error=None, ) result = await benchmark( backend="test_backend", api_url="http://test", base_url="http://test", model_id="test_model", model_name="test_model", input_requests=[ SampleRequest( no=1, prompt="test", prompt_len=10, expected_output_len=20, history_QA=[], json_data=None ) ], hyper_parameters={}, logprobs=None, request_rate=1.0, burstiness=1.0, disable_tqdm=True, profile=False, selected_percentile_metrics=["ttft", "tpot", "itl"], selected_percentiles=[99], ignore_eos=False, debug=False, goodput_config_dict={}, max_concurrency=None, lora_modules=None, extra_body=None, ) self.assertEqual(result["total_input_tokens"], 0) @pytest.mark.asyncio @patch("asyncio.sleep", new_callable=AsyncMock) async def test_get_request(self, mock_sleep): from fastdeploy.benchmarks.datasets import SampleRequest input_requests = [ SampleRequest(no=1, prompt="test1", prompt_len=10, expected_output_len=20, history_QA=[], json_data=None), SampleRequest(no=2, prompt="test2", prompt_len=15, expected_output_len=25, history_QA=[], json_data=None), ] # Test infinite request rate count = 0 async for _ in get_request(input_requests, float("inf")): count += 1 if count >= 2: break self.assertEqual(count, 2) # Test finite request rate mock_sleep.return_value = None count = 0 async for _ in get_request(input_requests, 1.0, 1.0): count += 1 if count >= 2: break self.assertEqual(count, 2) mock_sleep.assert_called() def test_check_goodput_args(self): # Test valid goodput args class Args: goodput = ["ttft:100", "tpot:50"] goodput_config = check_goodput_args(Args()) self.assertEqual(goodput_config["ttft"], 100) self.assertEqual(goodput_config["tpot"], 50) # Test invalid goodput args class InvalidArgs: goodput = ["invalid:100"] with self.assertRaises(ValueError): check_goodput_args(InvalidArgs()) @patch("os.environ.get", return_value="1") def test_convert_to_pytorch_benchmark_format(self, mock_env): class Args: model = "test_model" metrics = {"mean_ttft_ms": [100.0], "median_ttft_ms": [100.0]} extra_info = {"tensor_parallel_size": 1} records = convert_to_pytorch_benchmark_format(Args(), metrics, extra_info) self.assertEqual(len(records), 2) self.assertEqual(records[0]["model"]["name"], "test_model") @patch("builtins.open", new_callable=MagicMock) @patch("json.dump") def test_write_to_json(self, mock_dump, mock_open): records = [{"test": "data"}] write_to_json("test.json", records) mock_dump.assert_called_once() @patch("os.environ.get", return_value="1") @patch("builtins.open", new_callable=MagicMock) @patch("json.dump") def test_save_to_pytorch_benchmark_format(self, mock_dump, mock_open, mock_env): class Args: model = "test_model" results = { "mean_ttft_ms": 100.0, "median_ttft_ms": 100.0, "std_ttft_ms": 10.0, "p99_ttft_ms": 110.0, "mean_tpot_ms": 50.0, "median_tpot_ms": 50.0, "std_tpot_ms": 5.0, "p99_tpot_ms": 60.0, "median_itl_ms": 20.0, "mean_itl_ms": 20.0, "std_itl_ms": 2.0, "p99_itl_ms": 25.0, } save_to_pytorch_benchmark_format(Args(), results, "test.json") mock_dump.assert_called_once() @pytest.mark.asyncio @patch("builtins.open", new_callable=MagicMock) @patch("yaml.safe_load") @patch("fastdeploy.benchmarks.serve.benchmark", new_callable=AsyncMock) @patch("fastdeploy.benchmarks.serve.get_samples", new_callable=MagicMock) @patch("fastdeploy.benchmarks.serve.add_cli_args") @patch("argparse.ArgumentParser.parse_args") async def test_main_async( self, mock_parse_args, mock_add_cli_args, mock_get_samples, mock_benchmark, mock_safe_load, mock_open ): """Test main_async function with successful execution""" from fastdeploy.benchmarks.datasets import SampleRequest from fastdeploy.benchmarks.serve import main_async # Setup mock args mock_args = MagicMock() mock_args.backend = "openai-chat" # Use openai-compatible backend mock_args.model = "test_model" mock_args.request_rate = float("inf") mock_args.burstiness = 1.0 mock_args.disable_tqdm = True mock_args.profile = False mock_args.ignore_eos = False mock_args.debug = False mock_args.max_concurrency = None mock_args.lora_modules = None mock_args.extra_body = None mock_args.percentile_metrics = "ttft,tpot,itl" mock_args.metric_percentiles = "99" mock_args.goodput = None mock_args.ramp_up_strategy = "1" mock_args.ramp_up_start_rps = 1 mock_args.ramp_up_end_rps = 1 mock_args.dataset_name = "EB" mock_args.dataset_path = MagicMock() mock_args.dataset_split = None mock_args.dataset_sample_ratio = 1.0 mock_args.dataset_shard_size = None mock_args.dataset_shard_rank = None mock_args.dataset_shuffle_seed = None mock_args.top_p = 0.9 # Add sampling parameters for openai-compatible backend mock_args.top_k = 50 mock_args.temperature = 0.7 mock_args.result_dir = MagicMock() # Mock result_dir mock_args.result_filename = MagicMock() # Mock result_filename mock_args.save_result = True # Enable file saving for test mock_args.save_detailed = False mock_args.append_result = False mock_args.hyperparameter_path = "test_params.yaml" mock_parse_args.return_value = mock_args # Mock YAML loading mock_safe_load.return_value = {"param1": "value1", "param2": 42} # Mock file operations mock_file = MagicMock() mock_file.tell.return_value = 100 # Simulate non-empty file for append test mock_open.return_value.__enter__.return_value = mock_file # Mock get_samples return value mock_get_samples.return_value = [ SampleRequest(no=1, prompt="test", prompt_len=10, expected_output_len=20, history_QA=[], json_data=None) ] # Mock benchmark return value with complete JSON-serializable data mock_benchmark.return_value = { "completed": 1, "total_input_tokens": 10, "total_output_tokens": 20, "request_throughput": 1.0, "mean_ttft_ms": 100.0, "median_ttft_ms": 100.0, "std_ttft_ms": 10.0, "p99_ttft_ms": 110.0, "mean_tpot_ms": 50.0, "median_tpot_ms": 50.0, "std_tpot_ms": 5.0, "p99_tpot_ms": 60.0, "median_itl_ms": 20.0, "mean_itl_ms": 20.0, "std_itl_ms": 2.0, "p99_itl_ms": 25.0, "hyper_parameters": {"param1": "value1", "param2": 42}, "input_requests": [ { "no": 1, "prompt": "test", "prompt_len": 10, "expected_output_len": 20, "history_QA": [], "json_data": None, } ], } # Mock json.dump to verify serialization with patch("json.dump") as mock_json_dump: # Call main_async with args await main_async(mock_args) # Verify mocks were called mock_get_samples.assert_called_once() # Verify YAML file was loaded mock_open.assert_any_call("test_params.yaml", "r") mock_safe_load.assert_called_once() # Verify json.dump was called with serializable data mock_json_dump.assert_called_once() args, _ = mock_json_dump.call_args self.assertIsInstance(args[0], dict) # Verify data is dict (JSON-serializable) self.assertIn("completed", args[0]) # Verify benchmark results are included if __name__ == "__main__": unittest.main()