[FDConfig]Remove max_num_batched_tokens/max_num_seqs in parallel config (#4116)

* remove max_num_batched_tokens in parallel config

* remove max_num_seqs

* update test case

* fix test

* fix

---------

Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
YuanRisheng
2025-09-17 10:43:35 +08:00
committed by GitHub
parent c01a756912
commit 2e9e53ff7e
30 changed files with 169 additions and 131 deletions

View File

@@ -943,23 +943,15 @@ class EngineArgs:
"""
prefix = "scheduler_"
prefix_len = len(prefix)
extra_params = [
"max_model_len",
"enable_chunked_prefill",
"max_num_partial_prefills",
"max_long_partial_prefills",
"long_prefill_token_threshold",
]
all = asdict(self)
params = dict()
for k, v in all.items():
if k[:prefix_len] == prefix:
params[k[prefix_len:]] = v
elif k in extra_params:
else:
params[k] = v
return SchedulerConfig(**params)
return SchedulerConfig(params)
def create_graph_optimization_config(self) -> GraphOptimizationConfig:
"""
@@ -1059,9 +1051,7 @@ class EngineArgs:
load_config=load_cfg,
parallel_config=parallel_cfg,
max_model_len=self.max_model_len,
max_num_seqs=self.max_num_seqs,
speculative_config=speculative_cfg,
max_num_batched_tokens=self.max_num_batched_tokens,
ips=self.ips,
use_warmup=self.use_warmup,
engine_worker_queue_port=self.engine_worker_queue_port,