【DataProcessor】add options thinking_mode (#4735)

* add thinking_mode

* add thinking_mode

* add thinking_mode

* add thinking_mode

* add thinking_mode

* add thinking_mode

* add unit test
This commit is contained in:
luukunn
2025-11-03 14:30:07 +08:00
committed by GitHub
parent 377f3bf5f2
commit 7b35488779
5 changed files with 215 additions and 6 deletions

View File

@@ -200,9 +200,7 @@ class OpenAIServingChat:
max_streaming_response_tokens = max(1, max_streaming_response_tokens)
enable_thinking = request.chat_template_kwargs.get("enable_thinking") if request.chat_template_kwargs else None
if enable_thinking is None:
enable_thinking = request.metadata.get("enable_thinking") if request.metadata else None
enable_thinking = self._get_thinking_status(request)
include_stop_str_in_output = request.include_stop_str_in_output
@@ -461,9 +459,7 @@ class OpenAIServingChat:
"""
created_time = int(time.time())
num_choices = 1 if request.n is None else request.n
enable_thinking = request.chat_template_kwargs.get("enable_thinking") if request.chat_template_kwargs else None
if enable_thinking is None:
enable_thinking = request.metadata.get("enable_thinking") if request.metadata else None
enable_thinking = self._get_thinking_status(request)
include_stop_str_in_output = request.include_stop_str_in_output
try:
@@ -750,3 +746,20 @@ class OpenAIServingChat:
error_msg = f"Error in _build_logprobs_response: {e}, {str(traceback.format_exc())}"
api_server_logger.error(error_msg)
return None
def _get_thinking_status(self, request: ChatCompletionRequest) -> bool:
"""
Get the thinking status from the request.
"""
enable_thinking = request.chat_template_kwargs.get("enable_thinking") if request.chat_template_kwargs else None
if enable_thinking is None:
enable_thinking = request.metadata.get("enable_thinking") if request.metadata else None
options = request.chat_template_kwargs.get("options") if request.chat_template_kwargs else None
if options:
thinking_mode = options.get("thinking_mode")
if thinking_mode:
if thinking_mode == "close" or thinking_mode == "false":
enable_thinking = False
else:
enable_thinking = True
return enable_thinking

View File

@@ -237,6 +237,14 @@ class Ernie4_5_VLProcessor(Ernie4_5Processor):
request[k] = v
else:
raise ValueError("Invalid input: chat_template_kwargs must be a dict")
options = chat_template_kwargs.get("options")
if options:
thinking_mode = options.get("thinking_mode")
if thinking_mode:
if thinking_mode == "close" or thinking_mode == "false":
request["enable_thinking"] = False
else:
request["enable_thinking"] = True
request.setdefault("enable_thinking", True)
outputs = self.ernie4_5_processor.request2ids(request)
else:

View File

@@ -147,6 +147,7 @@ class DataProcessor:
"user": "User: ",
"bot": "Assistant: ",
"assistant": "Assistant: ",
"tool": "Tool: ",
}
def _build_token_type_mapping(self) -> Dict[Any, int]:

View File

@@ -0,0 +1,71 @@
"""
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
import unittest
from unittest.mock import MagicMock
from fastdeploy.entrypoints.openai.protocol import ChatCompletionRequest
from fastdeploy.entrypoints.openai.serving_chat import OpenAIServingChat
class TestOpenAIServingCompletion(unittest.TestCase):
def setUp(self):
"""
Set up the test environment by creating an instance of the OpenAIServingChat class using Mock.
"""
self.mock_engine = MagicMock()
self.chat_completion_handler = OpenAIServingChat(
self.mock_engine,
models=None,
pid=123,
ips=None,
max_waiting_time=10,
chat_template=None,
)
def test_enable_thinking(self):
request = ChatCompletionRequest(messages=[], chat_template_kwargs={})
enable_thinking = self.chat_completion_handler._get_thinking_status(request)
self.assertEqual(enable_thinking, None)
request = ChatCompletionRequest(messages=[], chat_template_kwargs={"enable_thinking": True})
enable_thinking = self.chat_completion_handler._get_thinking_status(request)
self.assertEqual(enable_thinking, True)
request = ChatCompletionRequest(messages=[], chat_template_kwargs={"enable_thinking": False})
enable_thinking = self.chat_completion_handler._get_thinking_status(request)
self.assertEqual(enable_thinking, False)
request = ChatCompletionRequest(messages=[], chat_template_kwargs={"options": {"thinking_mode": "close"}})
enable_thinking = self.chat_completion_handler._get_thinking_status(request)
self.assertEqual(enable_thinking, False)
request = ChatCompletionRequest(messages=[], chat_template_kwargs={"options": {"thinking_mode": "false"}})
enable_thinking = self.chat_completion_handler._get_thinking_status(request)
self.assertEqual(enable_thinking, False)
request = ChatCompletionRequest(messages=[], chat_template_kwargs={"options": {"thinking_mode": "open"}})
enable_thinking = self.chat_completion_handler._get_thinking_status(request)
self.assertEqual(enable_thinking, True)
request = ChatCompletionRequest(messages=[], chat_template_kwargs={"options": {"thinking_mode": "123"}})
enable_thinking = self.chat_completion_handler._get_thinking_status(request)
self.assertEqual(enable_thinking, True)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,116 @@
import unittest
from unittest.mock import MagicMock, patch
from fastdeploy.input.ernie4_5_vl_processor import Ernie4_5_VLProcessor
class TestErnie4_5_vl_ProcessorProcessResponseDictStreaming(unittest.TestCase):
def setUp(self):
# 创建 Ernie4_5Processor 实例的模拟对象
with patch.object(Ernie4_5_VLProcessor, "__init__", return_value=None) as mock_init:
self.processor = Ernie4_5_VLProcessor("model_path")
mock_init.side_effect = lambda *args, **kwargs: print(f"__init__ called with {args}, {kwargs}")
# 设置必要的属性
self.processor.tokenizer = MagicMock()
self.processor.tokenizer.eos_token_id = 1
self.processor.decode_status = {}
self.processor.reasoning_end_dict = {}
self.processor.tool_parser_dict = {}
self.processor.generation_config = MagicMock()
self.processor.eos_token_ids = [1]
self.processor.reasoning_parser = MagicMock()
self.processor._check_mm_limits = MagicMock()
self.processor.ernie4_5_processor = MagicMock()
self.processor.pack_outputs = MagicMock()
# 模拟 ids2tokens 方法
def mock_ids2tokens(token_ids, task_id):
self.processor.decode_status[task_id] = "mock_decode_status"
return "delta_text", [2, 3], "previous_texts"
self.processor.ids2tokens = mock_ids2tokens
def mock_messages2ids(request, **kwargs):
if "chat_template" in kwargs:
return [1]
else:
return [0]
def mock_apply_default_parameters(request):
return request
self.processor._apply_default_parameters = mock_apply_default_parameters
# 模拟推理解析器
self.mock_reasoning_parser = MagicMock()
self.mock_reasoning_parser.__class__.__name__ = "ErnieX1ReasoningParser"
# self.mock_reasoning_parser.extract_reasoning_content_streaming.return_value = ("reasoning", "text")
self.processor.reasoning_parser = self.mock_reasoning_parser
# 模拟工具解析器
self.mock_tool_parser = MagicMock()
self.mock_tool_parser.extract_tool_calls_streaming.return_value = None
self.mock_tool_parser_obj = MagicMock()
self.mock_tool_parser_obj.return_value = self.mock_tool_parser
self.processor.tool_parser_obj = self.mock_tool_parser_obj
def test_process_request_dict_with_options(self):
request_dict = {
"messages": [{"role": "user", "content": "Hello"}],
"prompt_token_ids": [1, 1, 1],
}
self.processor.process_request_dict(request_dict, 100)
self.assertEqual(request_dict["enable_thinking"], True)
request_dict = {
"messages": [{"role": "user", "content": "Hello"}],
"chat_template_kwargs": {"enable_thinking": True},
"prompt_token_ids": [1, 1, 1],
}
self.processor.process_request_dict(request_dict, 100)
self.assertEqual(request_dict["enable_thinking"], True)
request_dict = {
"messages": [{"role": "user", "content": "Hello"}],
"chat_template_kwargs": {"enable_thinking": False},
"prompt_token_ids": [1, 1, 1],
}
self.processor.process_request_dict(request_dict, 100)
self.assertEqual(request_dict["enable_thinking"], False)
request_dict = {
"messages": [{"role": "user", "content": "Hello"}],
"chat_template_kwargs": {"options": {"thinking_mode": "open"}},
"prompt_token_ids": [1, 1, 1],
}
self.processor.process_request_dict(request_dict, 100)
self.assertEqual(request_dict["enable_thinking"], True)
request_dict = {
"messages": [{"role": "user", "content": "Hello"}],
"chat_template_kwargs": {"options": {"thinking_mode": "close"}},
"prompt_token_ids": [1, 1, 1],
}
self.processor.process_request_dict(request_dict, 100)
self.assertEqual(request_dict["enable_thinking"], False)
request_dict = {
"messages": [{"role": "user", "content": "Hello"}],
"chat_template_kwargs": {"options": {"thinking_mode": "false"}},
"prompt_token_ids": [1, 1, 1],
}
self.processor.process_request_dict(request_dict, 100)
self.assertEqual(request_dict["enable_thinking"], False)
request_dict = {
"messages": [{"role": "user", "content": "Hello"}],
"chat_template_kwargs": {"options": {"thinking_mode": "123"}},
"prompt_token_ids": [1, 1, 1],
}
self.processor.process_request_dict(request_dict, 100)
self.assertEqual(request_dict["enable_thinking"], True)
if __name__ == "__main__":
unittest.main()