[FDConfig]Remove max_num_batched_tokens/max_num_seqs in parallel config (#4116)

* remove max_num_batched_tokens in parallel config

* remove max_num_seqs

* update test case

* fix test

* fix

---------

Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
YuanRisheng
2025-09-17 10:43:35 +08:00
committed by GitHub
parent c01a756912
commit 2e9e53ff7e
30 changed files with 169 additions and 131 deletions

View File

@@ -1,7 +1,7 @@
from dataclasses import asdict
from types import SimpleNamespace
from fastdeploy.config import CacheConfig, FDConfig, ParallelConfig
from fastdeploy.config import CacheConfig, FDConfig, ParallelConfig, SchedulerConfig
from fastdeploy.engine.args_utils import EngineArgs
from fastdeploy.engine.request import Request
from fastdeploy.engine.sched.resource_manager_v1 import ResourceManagerV1
@@ -17,6 +17,7 @@ def test_normal_schedule():
model_cfg.print = print
cache_cfg.bytes_per_layer_per_block = 1
parallel_cfg = ParallelConfig(args)
scheduler_cfg = SchedulerConfig(args)
graph_opt_cfg = engine_args.create_graph_optimization_config()
fd_config = FDConfig(
model_config=model_cfg,
@@ -24,7 +25,7 @@ def test_normal_schedule():
parallel_config=parallel_cfg,
speculative_config=speculative_cfg,
graph_opt_config=graph_opt_cfg,
max_num_batched_tokens=engine_args.max_num_batched_tokens,
scheduler_config=scheduler_cfg,
)
resource_manager_v1 = ResourceManagerV1(
max_num_seqs=max_num_seqs, config=fd_config, tensor_parallel_size=8, splitwise_role="mixed"
@@ -80,6 +81,7 @@ def test_preempted_request():
model_cfg.print = print
cache_cfg.bytes_per_layer_per_block = 1
parallel_cfg = ParallelConfig(args)
scheduler_cfg = SchedulerConfig(args)
graph_opt_cfg = engine_args.create_graph_optimization_config()
fd_config = FDConfig(
model_config=model_cfg,
@@ -87,7 +89,7 @@ def test_preempted_request():
parallel_config=parallel_cfg,
graph_opt_config=graph_opt_cfg,
speculative_config=speculative_cfg,
max_num_batched_tokens=engine_args.max_num_batched_tokens,
scheduler_config=scheduler_cfg,
)
resource_manager_v1 = ResourceManagerV1(
max_num_seqs=max_num_seqs, config=fd_config, tensor_parallel_size=8, splitwise_role="mixed"