mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
【DataProcessor】add options thinking_mode (#4735)
* add thinking_mode * add thinking_mode * add thinking_mode * add thinking_mode * add thinking_mode * add thinking_mode * add unit test
This commit is contained in:
@@ -200,9 +200,7 @@ class OpenAIServingChat:
|
||||
|
||||
max_streaming_response_tokens = max(1, max_streaming_response_tokens)
|
||||
|
||||
enable_thinking = request.chat_template_kwargs.get("enable_thinking") if request.chat_template_kwargs else None
|
||||
if enable_thinking is None:
|
||||
enable_thinking = request.metadata.get("enable_thinking") if request.metadata else None
|
||||
enable_thinking = self._get_thinking_status(request)
|
||||
|
||||
include_stop_str_in_output = request.include_stop_str_in_output
|
||||
|
||||
@@ -461,9 +459,7 @@ class OpenAIServingChat:
|
||||
"""
|
||||
created_time = int(time.time())
|
||||
num_choices = 1 if request.n is None else request.n
|
||||
enable_thinking = request.chat_template_kwargs.get("enable_thinking") if request.chat_template_kwargs else None
|
||||
if enable_thinking is None:
|
||||
enable_thinking = request.metadata.get("enable_thinking") if request.metadata else None
|
||||
enable_thinking = self._get_thinking_status(request)
|
||||
|
||||
include_stop_str_in_output = request.include_stop_str_in_output
|
||||
try:
|
||||
@@ -750,3 +746,20 @@ class OpenAIServingChat:
|
||||
error_msg = f"Error in _build_logprobs_response: {e}, {str(traceback.format_exc())}"
|
||||
api_server_logger.error(error_msg)
|
||||
return None
|
||||
|
||||
def _get_thinking_status(self, request: ChatCompletionRequest) -> bool:
|
||||
"""
|
||||
Get the thinking status from the request.
|
||||
"""
|
||||
enable_thinking = request.chat_template_kwargs.get("enable_thinking") if request.chat_template_kwargs else None
|
||||
if enable_thinking is None:
|
||||
enable_thinking = request.metadata.get("enable_thinking") if request.metadata else None
|
||||
options = request.chat_template_kwargs.get("options") if request.chat_template_kwargs else None
|
||||
if options:
|
||||
thinking_mode = options.get("thinking_mode")
|
||||
if thinking_mode:
|
||||
if thinking_mode == "close" or thinking_mode == "false":
|
||||
enable_thinking = False
|
||||
else:
|
||||
enable_thinking = True
|
||||
return enable_thinking
|
||||
|
||||
@@ -237,6 +237,14 @@ class Ernie4_5_VLProcessor(Ernie4_5Processor):
|
||||
request[k] = v
|
||||
else:
|
||||
raise ValueError("Invalid input: chat_template_kwargs must be a dict")
|
||||
options = chat_template_kwargs.get("options")
|
||||
if options:
|
||||
thinking_mode = options.get("thinking_mode")
|
||||
if thinking_mode:
|
||||
if thinking_mode == "close" or thinking_mode == "false":
|
||||
request["enable_thinking"] = False
|
||||
else:
|
||||
request["enable_thinking"] = True
|
||||
request.setdefault("enable_thinking", True)
|
||||
outputs = self.ernie4_5_processor.request2ids(request)
|
||||
else:
|
||||
|
||||
@@ -147,6 +147,7 @@ class DataProcessor:
|
||||
"user": "User: ",
|
||||
"bot": "Assistant: ",
|
||||
"assistant": "Assistant: ",
|
||||
"tool": "Tool: ",
|
||||
}
|
||||
|
||||
def _build_token_type_mapping(self) -> Dict[Any, int]:
|
||||
|
||||
71
tests/entrypoints/openai/test_serving_chat.py
Normal file
71
tests/entrypoints/openai/test_serving_chat.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""
|
||||
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
|
||||
import unittest
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from fastdeploy.entrypoints.openai.protocol import ChatCompletionRequest
|
||||
from fastdeploy.entrypoints.openai.serving_chat import OpenAIServingChat
|
||||
|
||||
|
||||
class TestOpenAIServingCompletion(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
"""
|
||||
Set up the test environment by creating an instance of the OpenAIServingChat class using Mock.
|
||||
"""
|
||||
self.mock_engine = MagicMock()
|
||||
self.chat_completion_handler = OpenAIServingChat(
|
||||
self.mock_engine,
|
||||
models=None,
|
||||
pid=123,
|
||||
ips=None,
|
||||
max_waiting_time=10,
|
||||
chat_template=None,
|
||||
)
|
||||
|
||||
def test_enable_thinking(self):
|
||||
request = ChatCompletionRequest(messages=[], chat_template_kwargs={})
|
||||
enable_thinking = self.chat_completion_handler._get_thinking_status(request)
|
||||
self.assertEqual(enable_thinking, None)
|
||||
|
||||
request = ChatCompletionRequest(messages=[], chat_template_kwargs={"enable_thinking": True})
|
||||
enable_thinking = self.chat_completion_handler._get_thinking_status(request)
|
||||
self.assertEqual(enable_thinking, True)
|
||||
|
||||
request = ChatCompletionRequest(messages=[], chat_template_kwargs={"enable_thinking": False})
|
||||
enable_thinking = self.chat_completion_handler._get_thinking_status(request)
|
||||
self.assertEqual(enable_thinking, False)
|
||||
|
||||
request = ChatCompletionRequest(messages=[], chat_template_kwargs={"options": {"thinking_mode": "close"}})
|
||||
enable_thinking = self.chat_completion_handler._get_thinking_status(request)
|
||||
self.assertEqual(enable_thinking, False)
|
||||
|
||||
request = ChatCompletionRequest(messages=[], chat_template_kwargs={"options": {"thinking_mode": "false"}})
|
||||
enable_thinking = self.chat_completion_handler._get_thinking_status(request)
|
||||
self.assertEqual(enable_thinking, False)
|
||||
|
||||
request = ChatCompletionRequest(messages=[], chat_template_kwargs={"options": {"thinking_mode": "open"}})
|
||||
enable_thinking = self.chat_completion_handler._get_thinking_status(request)
|
||||
self.assertEqual(enable_thinking, True)
|
||||
|
||||
request = ChatCompletionRequest(messages=[], chat_template_kwargs={"options": {"thinking_mode": "123"}})
|
||||
enable_thinking = self.chat_completion_handler._get_thinking_status(request)
|
||||
self.assertEqual(enable_thinking, True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
116
tests/input/test_ernie_vl_processor.py
Normal file
116
tests/input/test_ernie_vl_processor.py
Normal file
@@ -0,0 +1,116 @@
|
||||
import unittest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from fastdeploy.input.ernie4_5_vl_processor import Ernie4_5_VLProcessor
|
||||
|
||||
|
||||
class TestErnie4_5_vl_ProcessorProcessResponseDictStreaming(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# 创建 Ernie4_5Processor 实例的模拟对象
|
||||
with patch.object(Ernie4_5_VLProcessor, "__init__", return_value=None) as mock_init:
|
||||
self.processor = Ernie4_5_VLProcessor("model_path")
|
||||
mock_init.side_effect = lambda *args, **kwargs: print(f"__init__ called with {args}, {kwargs}")
|
||||
|
||||
# 设置必要的属性
|
||||
self.processor.tokenizer = MagicMock()
|
||||
self.processor.tokenizer.eos_token_id = 1
|
||||
self.processor.decode_status = {}
|
||||
self.processor.reasoning_end_dict = {}
|
||||
self.processor.tool_parser_dict = {}
|
||||
self.processor.generation_config = MagicMock()
|
||||
self.processor.eos_token_ids = [1]
|
||||
self.processor.reasoning_parser = MagicMock()
|
||||
self.processor._check_mm_limits = MagicMock()
|
||||
self.processor.ernie4_5_processor = MagicMock()
|
||||
self.processor.pack_outputs = MagicMock()
|
||||
|
||||
# 模拟 ids2tokens 方法
|
||||
def mock_ids2tokens(token_ids, task_id):
|
||||
self.processor.decode_status[task_id] = "mock_decode_status"
|
||||
return "delta_text", [2, 3], "previous_texts"
|
||||
|
||||
self.processor.ids2tokens = mock_ids2tokens
|
||||
|
||||
def mock_messages2ids(request, **kwargs):
|
||||
if "chat_template" in kwargs:
|
||||
return [1]
|
||||
else:
|
||||
return [0]
|
||||
|
||||
def mock_apply_default_parameters(request):
|
||||
return request
|
||||
|
||||
self.processor._apply_default_parameters = mock_apply_default_parameters
|
||||
|
||||
# 模拟推理解析器
|
||||
self.mock_reasoning_parser = MagicMock()
|
||||
self.mock_reasoning_parser.__class__.__name__ = "ErnieX1ReasoningParser"
|
||||
# self.mock_reasoning_parser.extract_reasoning_content_streaming.return_value = ("reasoning", "text")
|
||||
self.processor.reasoning_parser = self.mock_reasoning_parser
|
||||
|
||||
# 模拟工具解析器
|
||||
self.mock_tool_parser = MagicMock()
|
||||
self.mock_tool_parser.extract_tool_calls_streaming.return_value = None
|
||||
self.mock_tool_parser_obj = MagicMock()
|
||||
self.mock_tool_parser_obj.return_value = self.mock_tool_parser
|
||||
self.processor.tool_parser_obj = self.mock_tool_parser_obj
|
||||
|
||||
def test_process_request_dict_with_options(self):
|
||||
request_dict = {
|
||||
"messages": [{"role": "user", "content": "Hello"}],
|
||||
"prompt_token_ids": [1, 1, 1],
|
||||
}
|
||||
self.processor.process_request_dict(request_dict, 100)
|
||||
self.assertEqual(request_dict["enable_thinking"], True)
|
||||
|
||||
request_dict = {
|
||||
"messages": [{"role": "user", "content": "Hello"}],
|
||||
"chat_template_kwargs": {"enable_thinking": True},
|
||||
"prompt_token_ids": [1, 1, 1],
|
||||
}
|
||||
self.processor.process_request_dict(request_dict, 100)
|
||||
self.assertEqual(request_dict["enable_thinking"], True)
|
||||
|
||||
request_dict = {
|
||||
"messages": [{"role": "user", "content": "Hello"}],
|
||||
"chat_template_kwargs": {"enable_thinking": False},
|
||||
"prompt_token_ids": [1, 1, 1],
|
||||
}
|
||||
self.processor.process_request_dict(request_dict, 100)
|
||||
self.assertEqual(request_dict["enable_thinking"], False)
|
||||
|
||||
request_dict = {
|
||||
"messages": [{"role": "user", "content": "Hello"}],
|
||||
"chat_template_kwargs": {"options": {"thinking_mode": "open"}},
|
||||
"prompt_token_ids": [1, 1, 1],
|
||||
}
|
||||
self.processor.process_request_dict(request_dict, 100)
|
||||
self.assertEqual(request_dict["enable_thinking"], True)
|
||||
|
||||
request_dict = {
|
||||
"messages": [{"role": "user", "content": "Hello"}],
|
||||
"chat_template_kwargs": {"options": {"thinking_mode": "close"}},
|
||||
"prompt_token_ids": [1, 1, 1],
|
||||
}
|
||||
self.processor.process_request_dict(request_dict, 100)
|
||||
self.assertEqual(request_dict["enable_thinking"], False)
|
||||
|
||||
request_dict = {
|
||||
"messages": [{"role": "user", "content": "Hello"}],
|
||||
"chat_template_kwargs": {"options": {"thinking_mode": "false"}},
|
||||
"prompt_token_ids": [1, 1, 1],
|
||||
}
|
||||
self.processor.process_request_dict(request_dict, 100)
|
||||
self.assertEqual(request_dict["enable_thinking"], False)
|
||||
|
||||
request_dict = {
|
||||
"messages": [{"role": "user", "content": "Hello"}],
|
||||
"chat_template_kwargs": {"options": {"thinking_mode": "123"}},
|
||||
"prompt_token_ids": [1, 1, 1],
|
||||
}
|
||||
self.processor.process_request_dict(request_dict, 100)
|
||||
self.assertEqual(request_dict["enable_thinking"], True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user