[fix]update apply_chat_template (#4137)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Publish Job / publish_pre_check (push) Has been cancelled
Publish Job / print_publish_pre_check_outputs (push) Has been cancelled
Publish Job / FD-Clone-Linux (push) Has been cancelled
Publish Job / Show Code Archive Output (push) Has been cancelled
Publish Job / BUILD_SM8090 (push) Has been cancelled
Publish Job / BUILD_SM8689 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8090 (push) Has been cancelled
Publish Job / PADDLE_PYPI_UPLOAD_8689 (push) Has been cancelled
Publish Job / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
Publish Job / Run FastDeploy LogProb Tests (push) Has been cancelled
Publish Job / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
Publish Job / Run Base Tests (push) Has been cancelled
Publish Job / Run Accuracy Tests (push) Has been cancelled
Publish Job / Run Stable Tests (push) Has been cancelled
CI Images Build / FD-Clone-Linux (push) Has been cancelled
CI Images Build / Show Code Archive Output (push) Has been cancelled
CI Images Build / CI Images Build (push) Has been cancelled
CI Images Build / BUILD_SM8090 (push) Has been cancelled
CI Images Build / Run FastDeploy Unit Tests and Coverage (push) Has been cancelled
CI Images Build / Run FastDeploy LogProb Tests (push) Has been cancelled
CI Images Build / Extracted partial CE model tasks to run in CI. (push) Has been cancelled
CI Images Build / Run Base Tests (push) Has been cancelled
CI Images Build / Run Accuracy Tests (push) Has been cancelled
CI Images Build / Run Stable Tests (push) Has been cancelled
CI Images Build / Publish Docker Images Pre Check (push) Has been cancelled

* update apply_chat_template

* fix unittest

* fix unittest

* fix

* fix

* fix unit test

* fix

* fix unit test

* add unit test
This commit is contained in:
luukunn
2025-09-24 18:56:32 +08:00
committed by GitHub
parent 7c1fd19f0f
commit 18f4977aec
10 changed files with 146 additions and 109 deletions

View File

@@ -88,7 +88,6 @@ class Ernie4_5Processor(BaseDataProcessor):
str: error message
"""
data_processor_logger.info(f"Start processing request: {request}")
request.chat_template = kwargs.get("chat_template")
request = self._apply_default_parameters(request)
if request.get("eos_token_ids") is None or len(request.eos_token_ids) == 0:
request.eos_token_ids = self.eos_token_ids
@@ -127,7 +126,7 @@ class Ernie4_5Processor(BaseDataProcessor):
)
elif request.messages is not None:
task = request.to_dict()
chat_template_kwargs = kwargs.get("chat_template_kwargs")
chat_template_kwargs = kwargs.get("chat_template_kwargs", {})
if chat_template_kwargs:
if isinstance(chat_template_kwargs, dict):
for k, v in chat_template_kwargs.items():
@@ -135,7 +134,7 @@ class Ernie4_5Processor(BaseDataProcessor):
task[k] = v
else:
raise ValueError("Invalid input: chat_template_kwargs must be a dict")
request.prompt_token_ids = self.messages2ids(task)
request.prompt_token_ids = self.messages2ids(task, **chat_template_kwargs)
else:
raise ValueError(f"The request should have `prompt_token_ids`, `prompt` or `messages`: {request}.")
@@ -205,7 +204,7 @@ class Ernie4_5Processor(BaseDataProcessor):
req_id = request.get("request_id", None)
data_processor_logger.info(f"req_id:{req_id}, tokens:{tokens}, token_ids: {token_ids}")
elif request.get("messages"):
chat_template_kwargs = request.get("chat_template_kwargs")
chat_template_kwargs = request.get("chat_template_kwargs", {})
if chat_template_kwargs:
if isinstance(chat_template_kwargs, dict):
for k, v in chat_template_kwargs.items():
@@ -213,7 +212,7 @@ class Ernie4_5Processor(BaseDataProcessor):
request[k] = v
else:
raise ValueError("Invalid input: chat_template_kwargs must be a dict")
request["prompt_token_ids"] = self.messages2ids(request)
request["prompt_token_ids"] = self.messages2ids(request, **chat_template_kwargs)
else:
raise ValueError(f"Request must contain 'prompt_token_ids', 'prompt', or 'messages': {request}")
@@ -379,7 +378,7 @@ class Ernie4_5Processor(BaseDataProcessor):
del self.tool_parser_dict[req_id]
return response_dict
def messages2ids(self, request_or_messages):
def messages2ids(self, request_or_messages, **kwargs):
"""
Convert multi-turn messages into ID sequences.
@@ -397,7 +396,7 @@ class Ernie4_5Processor(BaseDataProcessor):
tokenize=False,
split_special_tokens=False,
add_special_tokens=False,
chat_template=request_or_messages.get("chat_template", None),
**kwargs,
)
request_or_messages["text_after_process"] = spliced_message
req_id = None