diff --git a/fastdeploy/benchmarks/latency.py b/fastdeploy/benchmarks/latency.py index e750b225e..980a2f7ec 100644 --- a/fastdeploy/benchmarks/latency.py +++ b/fastdeploy/benchmarks/latency.py @@ -83,7 +83,7 @@ def main(args: argparse.Namespace): # NOTE(woosuk): If the request cannot be processed in a single batch, # the engine will automatically process the request in multiple batches. llm = LLM(**dataclasses.asdict(engine_args)) - assert llm.llm_engine.cfg.max_model_len >= (args.input_len + args.output_len), ( + assert llm.llm_engine.cfg.model_config.max_model_len >= (args.input_len + args.output_len), ( "Please ensure that max_model_len is greater than" " the sum of input_len and output_len." ) diff --git a/fastdeploy/entrypoints/cli/main.py b/fastdeploy/entrypoints/cli/main.py index de7e7d1a4..28d20bb66 100644 --- a/fastdeploy/entrypoints/cli/main.py +++ b/fastdeploy/entrypoints/cli/main.py @@ -22,6 +22,7 @@ from fastdeploy import __version__ def main(): import fastdeploy.entrypoints.cli.benchmark.main + import fastdeploy.entrypoints.cli.collect_env import fastdeploy.entrypoints.cli.openai import fastdeploy.entrypoints.cli.run_batch import fastdeploy.entrypoints.cli.serve @@ -34,6 +35,7 @@ def main(): fastdeploy.entrypoints.cli.openai, fastdeploy.entrypoints.cli.benchmark.main, fastdeploy.entrypoints.cli.serve, + fastdeploy.entrypoints.cli.collect_env, ] parser = FlexibleArgumentParser(description="FastDeploy CLI") diff --git a/tests/benchmarks/test_latency_benchmarks.py b/tests/benchmarks/test_latency_benchmarks.py index 6d92b9366..4126d4381 100644 --- a/tests/benchmarks/test_latency_benchmarks.py +++ b/tests/benchmarks/test_latency_benchmarks.py @@ -38,7 +38,7 @@ class TestLatency(unittest.TestCase): mock_llm_instance = MagicMock() mock_llm.return_value = mock_llm_instance mock_cfg = MagicMock() - mock_cfg.max_model_len = 2048 + mock_cfg.model_config.max_model_len = 2048 mock_llm_instance.llm_engine.cfg = mock_cfg mock_randint.return_value = np.zeros((8, 32)) @@ -74,7 +74,7 @@ class TestLatency(unittest.TestCase): mock_llm_instance = MagicMock() mock_llm.return_value = mock_llm_instance mock_cfg = MagicMock() - mock_cfg.max_model_len = 2048 + mock_cfg.model_config.max_model_len = 2048 mock_llm_instance.llm_engine.cfg = mock_cfg # Build args using parser