[Feature][MTP]support new speculative decoding method named hybrid mtp with ngram (#3610)

This commit is contained in:
freeliuzc
2025-08-26 14:29:22 +08:00
committed by GitHub
parent 0a0d2959b9
commit 52eda7fdb3
20 changed files with 454 additions and 571 deletions

View File

@@ -45,6 +45,10 @@ class Proposer(ABC):
self.max_model_len = self.parallel_config.max_model_len
self.speculative_method = self.speculative_config.method
self.max_draft_token_num = self.speculative_config.num_speculative_tokens
self.num_model_steps = self.speculative_config.num_model_steps
self.max_ngram_size = self.speculative_config.max_ngram_size
self.min_ngram_size = self.speculative_config.min_ngram_size
spec_logger.info(f"Speculate config: {self.speculative_config}")