[Speculative Decoding][MTP]Update extract_mtp_weight script and optimize config (#5183)

* update extract_mtp_model

* modify config usage
This commit is contained in:
freeliuzc
2025-11-25 14:09:03 +08:00
committed by GitHub
parent edf0d09257
commit 5c8c2d47eb
4 changed files with 43 additions and 6 deletions

View File

@@ -270,7 +270,6 @@ class SchedulerConfig:
self.name = "local" # "local" for LocalScheduler or "global" for GlobalScheduler
self.max_num_batched_tokens = 2048 # base token_num for text inputs
self.max_extra_num_batched_tokens = 16384 # extra token_num for multimodal inputs
self.max_chunk_len = 18432 # max supported token_num = max_num_batched_tokens + max_extra_num_batched_tokens
self.max_num_seqs = 34
self.splitwise_role = "mixed"
self.config = None