mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 08:16:42 +08:00
[Fix]fix empty prompt_token_ids,update the parser's triggering condit… (#2891)
This commit is contained in:
@@ -111,6 +111,8 @@ class ErnieProcessor(BaseDataProcessor):
|
|||||||
else:
|
else:
|
||||||
request.prompt_token_ids = self.messages2ids(request.to_dict())
|
request.prompt_token_ids = self.messages2ids(request.to_dict())
|
||||||
|
|
||||||
|
if len(request.prompt_token_ids) == 0:
|
||||||
|
raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs")
|
||||||
if max_model_len is not None and len(request.prompt_token_ids) > max_model_len:
|
if max_model_len is not None and len(request.prompt_token_ids) > max_model_len:
|
||||||
request.prompt_token_ids = request.prompt_token_ids[: max_model_len - 1]
|
request.prompt_token_ids = request.prompt_token_ids[: max_model_len - 1]
|
||||||
if request.get("max_tokens") is None:
|
if request.get("max_tokens") is None:
|
||||||
@@ -160,7 +162,9 @@ class ErnieProcessor(BaseDataProcessor):
|
|||||||
req_id = request.get("request_id", None)
|
req_id = request.get("request_id", None)
|
||||||
data_processor_logger.info(f"req_id:{req_id}, tokens:{tokens}, token_ids: {token_ids}")
|
data_processor_logger.info(f"req_id:{req_id}, tokens:{tokens}, token_ids: {token_ids}")
|
||||||
else:
|
else:
|
||||||
request["prompt_token_ids"] = self.messages2ids(request)
|
request['prompt_token_ids'] = self.messages2ids(request)
|
||||||
|
if len(request['prompt_token_ids']) == 0:
|
||||||
|
raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs")
|
||||||
|
|
||||||
# truncate prompts that exceed the length limit
|
# truncate prompts that exceed the length limit
|
||||||
if max_model_len is not None and len(request["prompt_token_ids"]) > max_model_len:
|
if max_model_len is not None and len(request["prompt_token_ids"]) > max_model_len:
|
||||||
@@ -184,7 +188,6 @@ class ErnieProcessor(BaseDataProcessor):
|
|||||||
Returns:
|
Returns:
|
||||||
Dict: response contain text fields
|
Dict: response contain text fields
|
||||||
"""
|
"""
|
||||||
|
|
||||||
req_id = response_dict.request_id
|
req_id = response_dict.request_id
|
||||||
token_ids = response_dict.outputs.token_ids
|
token_ids = response_dict.outputs.token_ids
|
||||||
|
|
||||||
@@ -228,6 +231,7 @@ class ErnieProcessor(BaseDataProcessor):
|
|||||||
Returns:
|
Returns:
|
||||||
Dict: response contain text fields
|
Dict: response contain text fields
|
||||||
"""
|
"""
|
||||||
|
enable_thinking = kwargs.get("enable_thinking")
|
||||||
token_ids = response_dict["outputs"]["token_ids"]
|
token_ids = response_dict["outputs"]["token_ids"]
|
||||||
is_end = response_dict["finished"]
|
is_end = response_dict["finished"]
|
||||||
req_id = response_dict["request_id"]
|
req_id = response_dict["request_id"]
|
||||||
@@ -237,8 +241,9 @@ class ErnieProcessor(BaseDataProcessor):
|
|||||||
delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id)
|
delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id)
|
||||||
if is_end:
|
if is_end:
|
||||||
full_text = previous_texts + delta_text
|
full_text = previous_texts + delta_text
|
||||||
if self.reasoning_parser:
|
if enable_thinking and self.reasoning_parser:
|
||||||
reasoning_content, text = self.reasoning_parser.extract_reasoning_content(full_text, response_dict)
|
reasoning_content, text = self.reasoning_parser.extract_reasoning_content(
|
||||||
|
full_text, response_dict)
|
||||||
response_dict["outputs"]["text"] = text
|
response_dict["outputs"]["text"] = text
|
||||||
response_dict["outputs"]["reasoning_content"] = reasoning_content
|
response_dict["outputs"]["reasoning_content"] = reasoning_content
|
||||||
else:
|
else:
|
||||||
|
@@ -27,6 +27,7 @@ from PIL import Image
|
|||||||
|
|
||||||
from fastdeploy.entrypoints.chat_utils import parse_chat_messages
|
from fastdeploy.entrypoints.chat_utils import parse_chat_messages
|
||||||
from fastdeploy.input.ernie_tokenizer import ErnieBotTokenizer
|
from fastdeploy.input.ernie_tokenizer import ErnieBotTokenizer
|
||||||
|
from fastdeploy.utils import data_processor_logger
|
||||||
|
|
||||||
from .image_preprocessor.image_preprocessor_adaptive import AdaptiveImageProcessor
|
from .image_preprocessor.image_preprocessor_adaptive import AdaptiveImageProcessor
|
||||||
from .process_video import read_frames_decord, read_video_decord
|
from .process_video import read_frames_decord, read_video_decord
|
||||||
@@ -252,6 +253,8 @@ class DataProcessor:
|
|||||||
image_message_list.append(item)
|
image_message_list.append(item)
|
||||||
|
|
||||||
prompt_token_ids = self.apply_chat_template(request)
|
prompt_token_ids = self.apply_chat_template(request)
|
||||||
|
if len(prompt_token_ids) == 0:
|
||||||
|
raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs")
|
||||||
image_start_index = 0
|
image_start_index = 0
|
||||||
image_message_index = 0
|
image_message_index = 0
|
||||||
for i in range(len(prompt_token_ids)):
|
for i in range(len(prompt_token_ids)):
|
||||||
@@ -503,4 +506,6 @@ class DataProcessor:
|
|||||||
)
|
)
|
||||||
tokens = self.tokenizer.tokenize(prompt_token_str)
|
tokens = self.tokenizer.tokenize(prompt_token_str)
|
||||||
token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
|
token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
|
||||||
|
data_processor_logger.info(
|
||||||
|
f"req_id:{request.get('request_id', ''),} tokens: {tokens}, token_ids: {token_ids}")
|
||||||
return token_ids
|
return token_ids
|
||||||
|
@@ -239,7 +239,11 @@ class DataProcessor(BaseDataProcessor):
|
|||||||
task["enable_thinking"] = kwargs.get("enable_thinking", True)
|
task["enable_thinking"] = kwargs.get("enable_thinking", True)
|
||||||
request.prompt_token_ids = self.messages2ids(task)
|
request.prompt_token_ids = self.messages2ids(task)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"The request should have `input_ids`, `text` or `messages`: {request}.")
|
raise ValueError(
|
||||||
|
f"The request should have `input_ids`, `text` or `messages`: {request}."
|
||||||
|
)
|
||||||
|
if len(request.prompt_token_ids) == 0:
|
||||||
|
raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs")
|
||||||
if request.get("max_tokens") is None:
|
if request.get("max_tokens") is None:
|
||||||
request.set(
|
request.set(
|
||||||
"max_tokens",
|
"max_tokens",
|
||||||
@@ -283,8 +287,11 @@ class DataProcessor(BaseDataProcessor):
|
|||||||
raise ValueError("This model does not support chat_template.")
|
raise ValueError("This model does not support chat_template.")
|
||||||
request["prompt_token_ids"] = self.messages2ids(request)
|
request["prompt_token_ids"] = self.messages2ids(request)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Request must contain 'prompt_token_ids', 'prompt', or 'messages': {request}")
|
raise ValueError(
|
||||||
|
f"Request must contain 'prompt_token_ids', 'prompt', or 'messages': {request}"
|
||||||
|
)
|
||||||
|
if len(request['prompt_token_ids']) == 0:
|
||||||
|
raise ValueError("Invalid input: prompt_token_ids must be a non-empty sequence of token IDs")
|
||||||
if request.get("max_tokens") is None:
|
if request.get("max_tokens") is None:
|
||||||
request["max_tokens"] = max(1, max_model_len - len(request["prompt_token_ids"]))
|
request["max_tokens"] = max(1, max_model_len - len(request["prompt_token_ids"]))
|
||||||
if request.get("temperature") < _SAMPLING_EPS:
|
if request.get("temperature") < _SAMPLING_EPS:
|
||||||
@@ -335,6 +342,7 @@ class DataProcessor(BaseDataProcessor):
|
|||||||
Returns:
|
Returns:
|
||||||
Dict: response contain text fields
|
Dict: response contain text fields
|
||||||
"""
|
"""
|
||||||
|
enable_thinking = kwargs.get("enable_thinking")
|
||||||
token_ids = response_dict["outputs"]["token_ids"]
|
token_ids = response_dict["outputs"]["token_ids"]
|
||||||
is_end = response_dict["finished"]
|
is_end = response_dict["finished"]
|
||||||
req_id = response_dict["request_id"]
|
req_id = response_dict["request_id"]
|
||||||
@@ -344,8 +352,9 @@ class DataProcessor(BaseDataProcessor):
|
|||||||
delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id)
|
delta_text, _, previous_texts = self.ids2tokens(token_ids, req_id)
|
||||||
if is_end:
|
if is_end:
|
||||||
full_text = previous_texts + delta_text
|
full_text = previous_texts + delta_text
|
||||||
if self.reasoning_parser:
|
if enable_thinking and self.reasoning_parser:
|
||||||
reasoning_content, text = self.reasoning_parser.extract_reasoning_content(full_text, response_dict)
|
reasoning_content, text = self.reasoning_parser.extract_reasoning_content(
|
||||||
|
full_text, response_dict)
|
||||||
response_dict["outputs"]["text"] = text
|
response_dict["outputs"]["text"] = text
|
||||||
response_dict["outputs"]["reasoning_content"] = reasoning_content
|
response_dict["outputs"]["reasoning_content"] = reasoning_content
|
||||||
else:
|
else:
|
||||||
|
Reference in New Issue
Block a user