[Feature] Add temp_scaled_logprobs and top_p_normalized_logprobs parameters for logits and logprobs post processing (#3536)

* [feature] Add temp_scaled_logprobs and top_p_normalized_logprobs parameters for logits and logprobs post processing

* infer engine support temp_scaled_logprobs and top_p_normalized_logprobs

* code check

* code check

* fix tokenizer.decoder(-1), return 'Invalid Token'

* check seq len time shape

* logprob clip inf

* code check

---------

Co-authored-by: sunlei1024 <sunlei5788@gmail.com>
This commit is contained in:
chen
2025-08-25 14:11:18 +08:00
committed by GitHub
parent b7890cbe8d
commit 2136990144
5 changed files with 84 additions and 4 deletions

View File

@@ -333,6 +333,9 @@ class CompletionRequest(BaseModel):
echo: Optional[bool] = False
frequency_penalty: Optional[float] = None
logprobs: Optional[int] = None
# For logits and logprobs post processing
temp_scaled_logprobs: bool = False
top_p_normalized_logprobs: bool = False
max_tokens: Optional[int] = None
n: int = 1
presence_penalty: Optional[float] = None
@@ -461,6 +464,11 @@ class ChatCompletionRequest(BaseModel):
frequency_penalty: Optional[float] = None
logprobs: Optional[bool] = False
top_logprobs: Optional[int] = 0
# For logits and logprobs post processing
temp_scaled_logprobs: bool = False
top_p_normalized_logprobs: bool = False
# remove max_tokens when field is removed from OpenAI API
max_tokens: Optional[int] = Field(
default=None,
@@ -515,6 +523,8 @@ class ChatCompletionRequest(BaseModel):
req_dict["max_tokens"] = self.max_completion_tokens or self.max_tokens
req_dict["logprobs"] = self.top_logprobs if self.logprobs else None
req_dict["temp_scaled_logprobs"] = self.temp_scaled_logprobs
req_dict["top_p_normalized_logprobs"] = self.top_p_normalized_logprobs
# parse request model into dict, priority: request params > metadata params
if self.metadata is not None: