diff --git a/fastdeploy/benchmarks/latency.py b/fastdeploy/benchmarks/latency.py
index e750b225e..980a2f7ec 100644
--- a/fastdeploy/benchmarks/latency.py
+++ b/fastdeploy/benchmarks/latency.py
@@ -83,7 +83,7 @@ def main(args: argparse.Namespace):
     # NOTE(woosuk): If the request cannot be processed in a single batch,
     # the engine will automatically process the request in multiple batches.
     llm = LLM(**dataclasses.asdict(engine_args))
-    assert llm.llm_engine.cfg.max_model_len >= (args.input_len + args.output_len), (
+    assert llm.llm_engine.cfg.model_config.max_model_len >= (args.input_len + args.output_len), (
         "Please ensure that max_model_len is greater than" " the sum of input_len and output_len."
     )
 
diff --git a/fastdeploy/entrypoints/cli/main.py b/fastdeploy/entrypoints/cli/main.py
index de7e7d1a4..28d20bb66 100644
--- a/fastdeploy/entrypoints/cli/main.py
+++ b/fastdeploy/entrypoints/cli/main.py
@@ -22,6 +22,7 @@ from fastdeploy import __version__
 
 def main():
     import fastdeploy.entrypoints.cli.benchmark.main
+    import fastdeploy.entrypoints.cli.collect_env
     import fastdeploy.entrypoints.cli.openai
     import fastdeploy.entrypoints.cli.run_batch
     import fastdeploy.entrypoints.cli.serve
@@ -34,6 +35,7 @@ def main():
         fastdeploy.entrypoints.cli.openai,
         fastdeploy.entrypoints.cli.benchmark.main,
         fastdeploy.entrypoints.cli.serve,
+        fastdeploy.entrypoints.cli.collect_env,
     ]
 
     parser = FlexibleArgumentParser(description="FastDeploy CLI")
diff --git a/tests/benchmarks/test_latency_benchmarks.py b/tests/benchmarks/test_latency_benchmarks.py
index 6d92b9366..4126d4381 100644
--- a/tests/benchmarks/test_latency_benchmarks.py
+++ b/tests/benchmarks/test_latency_benchmarks.py
@@ -38,7 +38,7 @@ class TestLatency(unittest.TestCase):
         mock_llm_instance = MagicMock()
         mock_llm.return_value = mock_llm_instance
         mock_cfg = MagicMock()
-        mock_cfg.max_model_len = 2048
+        mock_cfg.model_config.max_model_len = 2048
         mock_llm_instance.llm_engine.cfg = mock_cfg
 
         mock_randint.return_value = np.zeros((8, 32))
@@ -74,7 +74,7 @@ class TestLatency(unittest.TestCase):
         mock_llm_instance = MagicMock()
         mock_llm.return_value = mock_llm_instance
         mock_cfg = MagicMock()
-        mock_cfg.max_model_len = 2048
+        mock_cfg.model_config.max_model_len = 2048
         mock_llm_instance.llm_engine.cfg = mock_cfg
 
         # Build args using parser