mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-07 09:31:35 +08:00
[FDConfig]Remove max_num_batched_tokens/max_num_seqs in parallel config (#4116)
* remove max_num_batched_tokens in parallel config * remove max_num_seqs * update test case * fix test * fix --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
from dataclasses import asdict
|
||||
from types import SimpleNamespace
|
||||
|
||||
from fastdeploy.config import CacheConfig, FDConfig, ParallelConfig
|
||||
from fastdeploy.config import CacheConfig, FDConfig, ParallelConfig, SchedulerConfig
|
||||
from fastdeploy.engine.args_utils import EngineArgs
|
||||
from fastdeploy.engine.request import Request
|
||||
from fastdeploy.engine.sched.resource_manager_v1 import ResourceManagerV1
|
||||
@@ -17,6 +17,7 @@ def test_normal_schedule():
|
||||
model_cfg.print = print
|
||||
cache_cfg.bytes_per_layer_per_block = 1
|
||||
parallel_cfg = ParallelConfig(args)
|
||||
scheduler_cfg = SchedulerConfig(args)
|
||||
graph_opt_cfg = engine_args.create_graph_optimization_config()
|
||||
fd_config = FDConfig(
|
||||
model_config=model_cfg,
|
||||
@@ -24,7 +25,7 @@ def test_normal_schedule():
|
||||
parallel_config=parallel_cfg,
|
||||
speculative_config=speculative_cfg,
|
||||
graph_opt_config=graph_opt_cfg,
|
||||
max_num_batched_tokens=engine_args.max_num_batched_tokens,
|
||||
scheduler_config=scheduler_cfg,
|
||||
)
|
||||
resource_manager_v1 = ResourceManagerV1(
|
||||
max_num_seqs=max_num_seqs, config=fd_config, tensor_parallel_size=8, splitwise_role="mixed"
|
||||
@@ -80,6 +81,7 @@ def test_preempted_request():
|
||||
model_cfg.print = print
|
||||
cache_cfg.bytes_per_layer_per_block = 1
|
||||
parallel_cfg = ParallelConfig(args)
|
||||
scheduler_cfg = SchedulerConfig(args)
|
||||
graph_opt_cfg = engine_args.create_graph_optimization_config()
|
||||
fd_config = FDConfig(
|
||||
model_config=model_cfg,
|
||||
@@ -87,7 +89,7 @@ def test_preempted_request():
|
||||
parallel_config=parallel_cfg,
|
||||
graph_opt_config=graph_opt_cfg,
|
||||
speculative_config=speculative_cfg,
|
||||
max_num_batched_tokens=engine_args.max_num_batched_tokens,
|
||||
scheduler_config=scheduler_cfg,
|
||||
)
|
||||
resource_manager_v1 = ResourceManagerV1(
|
||||
max_num_seqs=max_num_seqs, config=fd_config, tensor_parallel_size=8, splitwise_role="mixed"
|
||||
|
Reference in New Issue
Block a user