mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-11-03 11:02:01 +08:00
[FDConfig]Remove max_num_batched_tokens/max_num_seqs in parallel config (#4116)
* remove max_num_batched_tokens in parallel config * remove max_num_seqs * update test case * fix test * fix --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
@@ -2,7 +2,7 @@ from dataclasses import asdict
|
||||
from types import SimpleNamespace
|
||||
|
||||
from fastdeploy.cache_manager.prefix_cache_manager import PrefixCacheManager
|
||||
from fastdeploy.config import CacheConfig, FDConfig, ParallelConfig
|
||||
from fastdeploy.config import CacheConfig, FDConfig, ParallelConfig, SchedulerConfig
|
||||
from fastdeploy.engine.args_utils import EngineArgs
|
||||
from fastdeploy.engine.request import Request
|
||||
|
||||
@@ -18,6 +18,7 @@ def test_normal_case():
|
||||
model_cfg.print = print
|
||||
cache_cfg.bytes_per_layer_per_block = 1
|
||||
parallel_cfg = ParallelConfig(args)
|
||||
scheduler_cfg = SchedulerConfig(args)
|
||||
graph_opt_cfg = engine_args.create_graph_optimization_config()
|
||||
fd_config = FDConfig(
|
||||
model_config=model_cfg,
|
||||
@@ -25,7 +26,7 @@ def test_normal_case():
|
||||
parallel_config=parallel_cfg,
|
||||
graph_opt_config=graph_opt_cfg,
|
||||
speculative_config=speculative_cfg,
|
||||
max_num_batched_tokens=engine_args.max_num_batched_tokens,
|
||||
scheduler_cfg=scheduler_cfg,
|
||||
)
|
||||
cache_manager = PrefixCacheManager(config=fd_config, tensor_parallel_size=8, splitwise_role="mixed")
|
||||
req1 = Request.from_dict({"request_id": "req1", "prompt_token_ids": [1] * 3200, "prompt_token_ids_len": 3200})
|
||||
|
||||
Reference in New Issue
Block a user