[Feature][MTP]support new speculative decoding method named hybrid mtp with ngram (#3610)

This commit is contained in:
freeliuzc
2025-08-26 14:29:22 +08:00
committed by GitHub
parent 0a0d2959b9
commit 52eda7fdb3
20 changed files with 454 additions and 571 deletions

View File

@@ -252,12 +252,13 @@ class TokenProcessor:
def _compute_speculative_status(self):
# TODO(liuzichang): Supplement more statistics
interval = 50
interval = 10
if self.speculative_stats_step % interval == 0:
accept_ratio = 1 - self.total_step * 1.0 / self.number_of_output_tokens
spec_logger.info(
f"Speculate global accept ratio(Accept draft_tokens/Generated tokens): {accept_ratio}"
f" total step: {self.total_step}. total output token num: {self.number_of_output_tokens}"
f" avarage accept len: {self.number_of_output_tokens / self.total_step}"
)
if self.cfg.speculative_config.method in ["mtp"]: