mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-11-02 12:44:20 +08:00
[CLI]Update parameters in bench latecy cli tool and fix collect-env cli tool (#4558)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
* add collect-env * del files
This commit is contained in:
@@ -83,7 +83,7 @@ def main(args: argparse.Namespace):
|
||||
# NOTE(woosuk): If the request cannot be processed in a single batch,
|
||||
# the engine will automatically process the request in multiple batches.
|
||||
llm = LLM(**dataclasses.asdict(engine_args))
|
||||
assert llm.llm_engine.cfg.max_model_len >= (args.input_len + args.output_len), (
|
||||
assert llm.llm_engine.cfg.model_config.max_model_len >= (args.input_len + args.output_len), (
|
||||
"Please ensure that max_model_len is greater than" " the sum of input_len and output_len."
|
||||
)
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ from fastdeploy import __version__
|
||||
|
||||
def main():
|
||||
import fastdeploy.entrypoints.cli.benchmark.main
|
||||
import fastdeploy.entrypoints.cli.collect_env
|
||||
import fastdeploy.entrypoints.cli.openai
|
||||
import fastdeploy.entrypoints.cli.run_batch
|
||||
import fastdeploy.entrypoints.cli.serve
|
||||
@@ -34,6 +35,7 @@ def main():
|
||||
fastdeploy.entrypoints.cli.openai,
|
||||
fastdeploy.entrypoints.cli.benchmark.main,
|
||||
fastdeploy.entrypoints.cli.serve,
|
||||
fastdeploy.entrypoints.cli.collect_env,
|
||||
]
|
||||
|
||||
parser = FlexibleArgumentParser(description="FastDeploy CLI")
|
||||
|
||||
@@ -38,7 +38,7 @@ class TestLatency(unittest.TestCase):
|
||||
mock_llm_instance = MagicMock()
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
mock_cfg = MagicMock()
|
||||
mock_cfg.max_model_len = 2048
|
||||
mock_cfg.model_config.max_model_len = 2048
|
||||
mock_llm_instance.llm_engine.cfg = mock_cfg
|
||||
|
||||
mock_randint.return_value = np.zeros((8, 32))
|
||||
@@ -74,7 +74,7 @@ class TestLatency(unittest.TestCase):
|
||||
mock_llm_instance = MagicMock()
|
||||
mock_llm.return_value = mock_llm_instance
|
||||
mock_cfg = MagicMock()
|
||||
mock_cfg.max_model_len = 2048
|
||||
mock_cfg.model_config.max_model_len = 2048
|
||||
mock_llm_instance.llm_engine.cfg = mock_cfg
|
||||
|
||||
# Build args using parser
|
||||
|
||||
Reference in New Issue
Block a user