mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-11-02 12:44:20 +08:00
[fix]Modify follow-up push parameters and Modify the verification method for thinking length (#4177)
* [fix]Modify follow-up push parameters and Modify the verification method for thinking length (#4086) * 续推参数 generated_token_ids 修改成 completion_token_ids;修改思考长度校验方式 * 续推参数 generated_token_ids 修改成 completion_token_ids;修改思考长度校验方式 * 续推参数 generated_token_ids 修改成 completion_token_ids;修改思考长度校验方式 * 续推参数 generated_token_ids 修改成 completion_token_ids;修改思考长度校验方式 * add completion_token_ids * add logger * fix reasoning_max_tokens ParameterError * add unittest * add unittest * add unittest * add unittest * add unittest * add unit test * fix
This commit is contained in:
@@ -255,8 +255,13 @@ class EngineClient:
|
||||
raise ValueError(f"max_tokens can be defined [1, {self.max_model_len}).")
|
||||
|
||||
if data.get("reasoning_max_tokens") is not None:
|
||||
if data["reasoning_max_tokens"] > data["max_tokens"] or data["reasoning_max_tokens"] < 1:
|
||||
raise ValueError("reasoning_max_tokens must be between max_tokens and 1")
|
||||
if data["reasoning_max_tokens"] < 1:
|
||||
raise ValueError("reasoning_max_tokens must be greater than 1")
|
||||
if data["reasoning_max_tokens"] > data["max_tokens"]:
|
||||
data["reasoning_max_tokens"] = data["max_tokens"]
|
||||
api_server_logger.warning(
|
||||
f"req_id: {data['request_id']}, reasoning_max_tokens exceeds max_tokens, the value of reasoning_max_tokens will be adjusted to match that of max_tokens"
|
||||
)
|
||||
|
||||
if data.get("top_p") is not None:
|
||||
if data["top_p"] > 1 or data["top_p"] < 0:
|
||||
|
||||
@@ -588,6 +588,7 @@ class ChatCompletionRequest(BaseModel):
|
||||
prompt_token_ids: Optional[List[int]] = None
|
||||
max_streaming_response_tokens: Optional[int] = None
|
||||
disable_chat_template: Optional[bool] = False
|
||||
completion_token_ids: Optional[List[int]] = None
|
||||
# doc: end-chat-completion-extra-params
|
||||
|
||||
def to_dict_for_infer(self, request_id=None):
|
||||
@@ -613,6 +614,9 @@ class ChatCompletionRequest(BaseModel):
|
||||
), "The parameter `raw_request` is not supported now, please use completion api instead."
|
||||
for key, value in self.metadata.items():
|
||||
req_dict[key] = value
|
||||
from fastdeploy.utils import api_server_logger
|
||||
|
||||
api_server_logger.warning("The parameter metadata is obsolete.")
|
||||
for key, value in self.dict().items():
|
||||
if value is not None:
|
||||
req_dict[key] = value
|
||||
|
||||
Reference in New Issue
Block a user