diff --git a/fastdeploy/entrypoints/openai/serving_chat.py b/fastdeploy/entrypoints/openai/serving_chat.py index 186e1a0f5..16f5f78a0 100644 --- a/fastdeploy/entrypoints/openai/serving_chat.py +++ b/fastdeploy/entrypoints/openai/serving_chat.py @@ -325,7 +325,9 @@ class OpenAIServingChat: continue delta_message.content = delta_message_output.content or "" delta_message.reasoning_content = delta_message_output.reasoning_content or "" - delta_message.tool_calls = delta_message_output.tool_calls + if delta_message_output.tool_calls: + delta_message.tool_calls = delta_message_output.tool_calls + tool_called = True choice = ChatCompletionResponseStreamChoice( index=0, diff --git a/fastdeploy/entrypoints/openai/serving_completion.py b/fastdeploy/entrypoints/openai/serving_completion.py index 75812d58c..aa5d5f3c5 100644 --- a/fastdeploy/entrypoints/openai/serving_completion.py +++ b/fastdeploy/entrypoints/openai/serving_completion.py @@ -418,7 +418,9 @@ class OpenAIServingCompletion: continue delta_message.text = delta_message_output.content or "" delta_message.reasoning_content = delta_message_output.reasoning_content or "" - delta_message.tool_calls = delta_message_output.tool_calls + if delta_message_output.tool_calls: + delta_message.tool_calls = delta_message_output.tool_calls + tool_called[idx] = True choices.append(delta_message) diff --git a/fastdeploy/input/preprocess.py b/fastdeploy/input/preprocess.py index e7d1c1a9e..48626f380 100644 --- a/fastdeploy/input/preprocess.py +++ b/fastdeploy/input/preprocess.py @@ -71,8 +71,15 @@ class InputPreprocessor: """ reasoning_parser_obj = None tool_parser_obj = None - if self.reasoning_parser: - reasoning_parser_obj = ReasoningParserManager.get_reasoning_parser(self.reasoning_parser) + try: + from fastdeploy.plugins.reasoning_parser import ( + load_reasoning_parser_plugins, + ) + + reasoning_parser_obj = load_reasoning_parser_plugins() + except: + if self.reasoning_parser: + reasoning_parser_obj = ReasoningParserManager.get_reasoning_parser(self.reasoning_parser) if self.tool_parser: tool_parser_obj = ToolParserManager.get_tool_parser(self.tool_parser) @@ -85,6 +92,8 @@ class InputPreprocessor: Processor = load_input_processor_plugins() self.processor = Processor( model_name_or_path=self.model_name_or_path, + reasoning_parser_obj=reasoning_parser_obj, + tool_parser_obj=tool_parser_obj, ) except: if not self.enable_mm: diff --git a/fastdeploy/plugins/__init__.py b/fastdeploy/plugins/__init__.py index 6df06f763..5972f1b4a 100644 --- a/fastdeploy/plugins/__init__.py +++ b/fastdeploy/plugins/__init__.py @@ -17,5 +17,11 @@ from .input_processor import load_input_processor_plugins from .model_register import load_model_register_plugins from .model_runner import load_model_runner_plugins +from .reasoning_parser import load_reasoning_parser_plugins -__all__ = ["load_model_register_plugins", "load_model_runner_plugins", "load_input_processor_plugins"] +__all__ = [ + "load_model_register_plugins", + "load_model_runner_plugins", + "load_input_processor_plugins", + "load_reasoning_parser_plugins", +] diff --git a/fastdeploy/plugins/reasoning_parser/__init__.py b/fastdeploy/plugins/reasoning_parser/__init__.py new file mode 100644 index 000000000..bb19e0e70 --- /dev/null +++ b/fastdeploy/plugins/reasoning_parser/__init__.py @@ -0,0 +1,27 @@ +""" +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +from fastdeploy.plugins.utils import load_plugins_by_group + +# make sure one process only loads plugins once +PLUGINS_GROUP = "fastdeploy.reasoning_parser_plugins" + + +def load_reasoning_parser_plugins(): + """load_reasoning_parser_plugins""" + plugins = load_plugins_by_group(group=PLUGINS_GROUP) + assert len(plugins) <= 1, "Most one plugin is allowed to be loaded." + return next(iter(plugins.values()))()