[CLI]Update parameters in bench latecy cli tool and fix collect-env cli tool (#4558)

* add collect-env * del files
2025-11-02 12:44:20 +08:00 · 2025-10-24 16:46:45 +08:00
parent 83d45af1f3
commit e02a812880
3 changed files with 5 additions and 3 deletions
--- a/fastdeploy/benchmarks/latency.py
+++ b/fastdeploy/benchmarks/latency.py
@@ -83,7 +83,7 @@ def main(args: argparse.Namespace):
    # NOTE(woosuk): If the request cannot be processed in a single batch,
    # the engine will automatically process the request in multiple batches.
    llm = LLM(**dataclasses.asdict(engine_args))
-    assert llm.llm_engine.cfg.max_model_len >= (args.input_len + args.output_len), (
+    assert llm.llm_engine.cfg.model_config.max_model_len >= (args.input_len + args.output_len), (
        "Please ensure that max_model_len is greater than" " the sum of input_len and output_len."
    )

--- a/fastdeploy/entrypoints/cli/main.py
+++ b/fastdeploy/entrypoints/cli/main.py
@@ -22,6 +22,7 @@ from fastdeploy import __version__

 def main():
    import fastdeploy.entrypoints.cli.benchmark.main
+    import fastdeploy.entrypoints.cli.collect_env
    import fastdeploy.entrypoints.cli.openai
    import fastdeploy.entrypoints.cli.run_batch
    import fastdeploy.entrypoints.cli.serve
@@ -34,6 +35,7 @@ def main():
        fastdeploy.entrypoints.cli.openai,
        fastdeploy.entrypoints.cli.benchmark.main,
        fastdeploy.entrypoints.cli.serve,
+        fastdeploy.entrypoints.cli.collect_env,
    ]

    parser = FlexibleArgumentParser(description="FastDeploy CLI")
--- a/tests/benchmarks/test_latency_benchmarks.py
+++ b/tests/benchmarks/test_latency_benchmarks.py
@@ -38,7 +38,7 @@ class TestLatency(unittest.TestCase):
        mock_llm_instance = MagicMock()
        mock_llm.return_value = mock_llm_instance
        mock_cfg = MagicMock()
-        mock_cfg.max_model_len = 2048
+        mock_cfg.model_config.max_model_len = 2048
        mock_llm_instance.llm_engine.cfg = mock_cfg

        mock_randint.return_value = np.zeros((8, 32))
@@ -74,7 +74,7 @@ class TestLatency(unittest.TestCase):
        mock_llm_instance = MagicMock()
        mock_llm.return_value = mock_llm_instance
        mock_cfg = MagicMock()
-        mock_cfg.max_model_len = 2048
+        mock_cfg.model_config.max_model_len = 2048
        mock_llm_instance.llm_engine.cfg = mock_cfg

        # Build args using parser