[Speculative Decoding][MTP]Update extract_mtp_weight script and optimize config (#5183)

* update extract_mtp_model

* modify config usage
This commit is contained in:
freeliuzc
2025-11-25 14:09:03 +08:00
committed by GitHub
parent edf0d09257
commit 5c8c2d47eb
4 changed files with 43 additions and 6 deletions

View File

@@ -1628,10 +1628,6 @@ class FDConfig:
else:
self.scheduler_config.max_num_batched_tokens = self.model_config.max_model_len
self.scheduler_config.max_chunk_len = (
self.scheduler_config.max_num_batched_tokens + self.scheduler_config.max_extra_num_batched_tokens
)
if self.long_prefill_token_threshold == 0:
self.long_prefill_token_threshold = int(self.model_config.max_model_len * 0.04)