mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[MTP] optimize mtp infer speed (#2840)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
This commit is contained in:
@@ -123,7 +123,7 @@ def apply_speculative_penalty_multi_scores(
|
||||
from fastdeploy.model_executor.ops.gpu import \
|
||||
speculate_get_token_penalty_multi_scores
|
||||
|
||||
logits = speculate_get_token_penalty_multi_scores(
|
||||
speculate_get_token_penalty_multi_scores(
|
||||
pre_token_ids,
|
||||
logits,
|
||||
repetition_penalties,
|
||||
@@ -141,5 +141,5 @@ def apply_speculative_penalty_multi_scores(
|
||||
)
|
||||
else:
|
||||
raise NotImplementedError()
|
||||
|
||||
# inplace
|
||||
return logits
|
||||
|
||||
Reference in New Issue
Block a user