mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 16:22:57 +08:00
[Feature][MTP]support new speculative decoding method named hybrid mtp with ngram (#3610)
This commit is contained in:
@@ -252,12 +252,13 @@ class TokenProcessor:
|
||||
|
||||
def _compute_speculative_status(self):
|
||||
# TODO(liuzichang): Supplement more statistics
|
||||
interval = 50
|
||||
interval = 10
|
||||
if self.speculative_stats_step % interval == 0:
|
||||
accept_ratio = 1 - self.total_step * 1.0 / self.number_of_output_tokens
|
||||
spec_logger.info(
|
||||
f"Speculate global accept ratio(Accept draft_tokens/Generated tokens): {accept_ratio}"
|
||||
f" total step: {self.total_step}. total output token num: {self.number_of_output_tokens}"
|
||||
f" avarage accept len: {self.number_of_output_tokens / self.total_step}"
|
||||
)
|
||||
|
||||
if self.cfg.speculative_config.method in ["mtp"]:
|
||||
|
Reference in New Issue
Block a user