mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[fix]Modify follow-up push parameters and Modify the verification method for thinking length (#4086)
* 续推参数 generated_token_ids 修改成 completion_token_ids;修改思考长度校验方式 * 续推参数 generated_token_ids 修改成 completion_token_ids;修改思考长度校验方式 * 续推参数 generated_token_ids 修改成 completion_token_ids;修改思考长度校验方式 * 续推参数 generated_token_ids 修改成 completion_token_ids;修改思考长度校验方式 * add completion_token_ids * add logger * fix reasoning_max_tokens ParameterError * add unittest * add unittest * add unittest * add unittest * add unittest * add unit test
This commit is contained in:
@@ -245,15 +245,11 @@ class QwenVLProcessor(TextProcessor):
|
||||
else:
|
||||
raise ValueError(f"Request must contain 'prompt', or 'messages': {request}")
|
||||
|
||||
metadata = request.get("metadata")
|
||||
# Handle continuation of previous generation by appending existing tokens
|
||||
if metadata and metadata.get("generated_token_ids"):
|
||||
self.append_generated_tokens(outputs, metadata["generated_token_ids"])
|
||||
if request.get("completion_token_ids"):
|
||||
self.append_completion_tokens(outputs, request["completion_token_ids"])
|
||||
|
||||
enable_thinking = False
|
||||
if metadata:
|
||||
enable_thinking = metadata.get("enable_thinking", False)
|
||||
|
||||
if request.get("chat_template_kwargs"):
|
||||
chat_template_kwargs = request.get("chat_template_kwargs")
|
||||
enable_thinking = chat_template_kwargs.get("enable_thinking", False)
|
||||
@@ -278,16 +274,16 @@ class QwenVLProcessor(TextProcessor):
|
||||
|
||||
return request
|
||||
|
||||
def append_generated_tokens(self, outputs, generated_token_ids):
|
||||
def append_completion_tokens(self, outputs, completion_token_ids):
|
||||
"""
|
||||
Append generated tokens to existing outputs.
|
||||
Append completion tokens to existing outputs.
|
||||
|
||||
Args:
|
||||
outputs: Current model outputs
|
||||
generated_token_ids: Generated tokens to append
|
||||
completion_token_ids: completion tokens to append
|
||||
"""
|
||||
out = {"input_ids": [], "token_type_ids": [], "position_ids": [], "cur_position": outputs["cur_position"]}
|
||||
self.processor._add_text(generated_token_ids, out)
|
||||
self.processor._add_text(completion_token_ids, out)
|
||||
|
||||
outputs["input_ids"] = np.concatenate(
|
||||
[outputs["input_ids"], np.array(out["input_ids"], dtype=np.int64)], axis=0
|
||||
|
||||
Reference in New Issue
Block a user