[Feature] support prompt repetition_penalty (#2806)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled

This commit is contained in:
ming1753
2025-07-17 12:05:52 +08:00
committed by GitHub
parent 7dfd2ea052
commit 1f15ca21e4
8 changed files with 305 additions and 64 deletions

View File

@@ -43,3 +43,5 @@ class SamplingMetadata:
top_p: paddle.Tensor
top_k: Optional[paddle.Tensor] = None
max_num_logprobs: Optional[int] = None
prompt_ids: Optional[paddle.Tensor] = None
prompt_lens: Optional[paddle.Tensor] = None

View File

@@ -21,6 +21,8 @@ from fastdeploy.platforms import current_platform
def apply_penalty_multi_scores(
pre_token_ids: paddle.Tensor,
prompt_ids: paddle.Tensor,
prompt_lens: paddle.Tensor,
logits: paddle.Tensor,
repetition_penalties: paddle.Tensor,
frequency_penalties: paddle.Tensor,
@@ -39,6 +41,8 @@ def apply_penalty_multi_scores(
get_token_penalty_multi_scores
logits = get_token_penalty_multi_scores(
pre_token_ids,
prompt_ids,
prompt_lens,
logits,
repetition_penalties,
frequency_penalties,
@@ -69,6 +73,8 @@ def apply_penalty_multi_scores(
get_token_penalty_multi_scores
logits = get_token_penalty_multi_scores(
pre_token_ids,
prompt_ids,
prompt_lens,
logits,
repetition_penalties,
frequency_penalties,

View File

@@ -253,6 +253,8 @@ class Sampler(nn.Layer):
logits = apply_penalty_multi_scores(
sampling_metadata.pre_token_ids,
sampling_metadata.prompt_ids,
sampling_metadata.prompt_lens,
logits,
sampling_metadata.repetition_penalties,
sampling_metadata.frequency_penalties,