[Speculative Decoding][MTP]Update extract_mtp_weight script and optimize config (#5183)

* update extract_mtp_model * modify config usage
2025-12-24 13:28:13 +08:00 · 2025-11-25 14:09:03 +08:00
parent edf0d09257
commit 5c8c2d47eb
4 changed files with 43 additions and 6 deletions
--- a/fastdeploy/config.py
+++ b/fastdeploy/config.py
@@ -1628,10 +1628,6 @@ class FDConfig:
                else:
                    self.scheduler_config.max_num_batched_tokens = self.model_config.max_model_len

-        self.scheduler_config.max_chunk_len = (
-            self.scheduler_config.max_num_batched_tokens + self.scheduler_config.max_extra_num_batched_tokens
-        )
-
        if self.long_prefill_token_threshold == 0:
            self.long_prefill_token_threshold = int(self.model_config.max_model_len * 0.04)