[Feature] Pass through the chat_template_kwargs to the data processing module (#3421) (#3469)

* fix chat_template_args

* fix args

* add offline

* add offline

* fix

* fix

* fix default enable_thinking value

* fix default enable_thinking value

* modify condition

* Revert "modify condition"

This reverts commit 26430bdeb1.

* fix unit test
This commit is contained in:
luukunn
2025-08-19 17:40:12 +08:00
committed by GitHub
parent 3ffbc98179
commit d07338f932
6 changed files with 50 additions and 13 deletions

View File

@@ -222,7 +222,6 @@ class DataProcessor(BaseDataProcessor):
request = self._apply_default_parameters(request)
if request.get("eos_token_ids") is None or len(request.eos_token_ids) == 0:
request.eos_token_ids = self.eos_token_ids
stop_sequences = request.get("stop", [])
if stop_sequences is not None and len(stop_sequences) != 0:
stop_seqs, stop_seqs_len = self.update_stop_seq(stop_sequences)
@@ -236,7 +235,15 @@ class DataProcessor(BaseDataProcessor):
if self.tokenizer.chat_template is None:
raise ValueError("This model does not support chat_template.")
task = request.to_dict()
task["enable_thinking"] = kwargs.get("enable_thinking", True)
chat_template_kwargs = kwargs.get("chat_template_kwargs")
if chat_template_kwargs:
if isinstance(chat_template_kwargs, dict):
for k, v in chat_template_kwargs.items():
if k not in task:
task[k] = v
else:
raise ValueError("Invalid input: chat_template_kwargs must be a dict")
task.setdefault("enable_thinking", True)
request.prompt_token_ids = self.messages2ids(task)
else:
raise ValueError(f"The request should have `input_ids`, `text` or `messages`: {request}.")
@@ -286,6 +293,15 @@ class DataProcessor(BaseDataProcessor):
elif "messages" in request:
if self.tokenizer.chat_template is None:
raise ValueError("This model does not support chat_template.")
chat_template_kwargs = request.get("chat_template_kwargs")
if chat_template_kwargs:
if isinstance(chat_template_kwargs, dict):
for k, v in chat_template_kwargs.items():
if k not in request:
request[k] = v
else:
raise ValueError("Invalid input: chat_template_kwargs must be a dict")
request.setdefault("enable_thinking", True)
request["prompt_token_ids"] = self.messages2ids(request)
else:
raise ValueError(f"Request must contain 'prompt_token_ids', 'prompt', or 'messages': {request}")