[MTP] optimize mtp infer speed (#2840)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled

This commit is contained in:
freeliuzc
2025-07-14 19:50:22 +08:00
committed by GitHub
parent 4c7b8bc458
commit 7cdd8d290d
6 changed files with 253 additions and 24 deletions

View File

@@ -123,7 +123,7 @@ def apply_speculative_penalty_multi_scores(
from fastdeploy.model_executor.ops.gpu import \
speculate_get_token_penalty_multi_scores
logits = speculate_get_token_penalty_multi_scores(
speculate_get_token_penalty_multi_scores(
pre_token_ids,
logits,
repetition_penalties,
@@ -141,5 +141,5 @@ def apply_speculative_penalty_multi_scores(
)
else:
raise NotImplementedError()
# inplace
return logits