[CLI]Update parameters in bench latecy cli tool and fix collect-env cli tool (#4558)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled

* add collect-env

* del files
This commit is contained in:
qwes5s5
2025-10-24 16:46:45 +08:00
committed by GitHub
parent 83d45af1f3
commit e02a812880
3 changed files with 5 additions and 3 deletions

View File

@@ -83,7 +83,7 @@ def main(args: argparse.Namespace):
# NOTE(woosuk): If the request cannot be processed in a single batch,
# the engine will automatically process the request in multiple batches.
llm = LLM(**dataclasses.asdict(engine_args))
assert llm.llm_engine.cfg.max_model_len >= (args.input_len + args.output_len), (
assert llm.llm_engine.cfg.model_config.max_model_len >= (args.input_len + args.output_len), (
"Please ensure that max_model_len is greater than" " the sum of input_len and output_len."
)

View File

@@ -22,6 +22,7 @@ from fastdeploy import __version__
def main():
import fastdeploy.entrypoints.cli.benchmark.main
import fastdeploy.entrypoints.cli.collect_env
import fastdeploy.entrypoints.cli.openai
import fastdeploy.entrypoints.cli.run_batch
import fastdeploy.entrypoints.cli.serve
@@ -34,6 +35,7 @@ def main():
fastdeploy.entrypoints.cli.openai,
fastdeploy.entrypoints.cli.benchmark.main,
fastdeploy.entrypoints.cli.serve,
fastdeploy.entrypoints.cli.collect_env,
]
parser = FlexibleArgumentParser(description="FastDeploy CLI")

View File

@@ -38,7 +38,7 @@ class TestLatency(unittest.TestCase):
mock_llm_instance = MagicMock()
mock_llm.return_value = mock_llm_instance
mock_cfg = MagicMock()
mock_cfg.max_model_len = 2048
mock_cfg.model_config.max_model_len = 2048
mock_llm_instance.llm_engine.cfg = mock_cfg
mock_randint.return_value = np.zeros((8, 32))
@@ -74,7 +74,7 @@ class TestLatency(unittest.TestCase):
mock_llm_instance = MagicMock()
mock_llm.return_value = mock_llm_instance
mock_cfg = MagicMock()
mock_cfg.max_model_len = 2048
mock_cfg.model_config.max_model_len = 2048
mock_llm_instance.llm_engine.cfg = mock_cfg
# Build args using parser