[Feature] add tool parser (#3518)

* [Feature] Pass through the `chat_template_kwargs` to the data processing module (#3421) * fix chat_template_args * fix args * add offline * add offline * fix * fix * fix default enable_thinking value * fix default enable_thinking value * modify condition * Revert "modify condition" This reverts commit 26430bdeb1. * fix unit test * add Tool Parser (#3272) * add tool-parser * add tool-parser * add tool parser * add tool parser * fix * add offline * add offline * fix * parsers:tool&reasoning * 修改tool parser名称· * update * fix reasoning-parser * add requirements * fix finish reason * fix * fix reasoning-parser * fix * fix * fix * fix * fix --------- Co-authored-by: zhuzixuan <zhuzixuan@baidu.com> * [Feature] add tool parser (#3483) * add tool parser * add x1 enable_thinking * restart ci * fix vl reasoning parser * modify call style * modify call style * add offline enablethinking * fix completion * fix * fix unit test * fix unit test * fix unit test * fix vl reasoning parser * fix vl reasoning parser * fix unit test --------- Co-authored-by: zhuzixuan <zhuzixuan@baidu.com>
2025-10-05 16:48:03 +08:00 · 2025-08-22 11:14:35 +08:00
parent d97aab25bc
commit 4a9c04a746
31 changed files with 1289 additions and 70 deletions
--- a/fastdeploy/entrypoints/openai/serving_chat.py
+++ b/fastdeploy/entrypoints/openai/serving_chat.py
@@ -141,6 +141,7 @@ class OpenAIServingChat:
        previous_num_tokens = 0
        num_prompt_tokens = 0
        num_choices = 1
+        tool_called = False
        max_streaming_response_tokens = (
            request.max_streaming_response_tokens
            if request.max_streaming_response_tokens is not None
@@ -246,20 +247,29 @@ class OpenAIServingChat:
                    output = res["outputs"]
                    delta_text = output["text"]
                    output_top_logprobs = output["top_logprobs"]
+                    previous_num_tokens += len(output["token_ids"])
                    logprobs_res: Optional[LogProbs] = None
                    if request.logprobs and output_top_logprobs is not None:
                        logprobs_res = self._create_chat_logprobs(
                            output_top_logprobs, request.logprobs, request.top_logprobs
                        )

-                    previous_num_tokens += len(output["token_ids"])
                    delta_message = DeltaMessage(
                        content=delta_text,
-                        reasoning_content=output.get("reasoning_content"),
+                        reasoning_content="",
                        prompt_token_ids=None,
                        completion_token_ids=None,
-                        tool_calls=output.get("tool_call_content", []),
+                        tool_calls=None,
                    )
+                    if not res["finished"] and "delta_message" in output:
+                        delta_message_output = output["delta_message"]
+                        if delta_message_output is None:
+                            continue
+                        delta_message.content = delta_message_output.content or ""
+                        delta_message.reasoning_content = delta_message_output.reasoning_content or ""
+                        if delta_message_output.tool_calls:
+                            delta_message.tool_calls = delta_message_output.tool_calls
+                            tool_called = True

                    choice = ChatCompletionResponseStreamChoice(
                        index=0,
@@ -277,10 +287,7 @@ class OpenAIServingChat:
                        max_tokens = request.max_completion_tokens or request.max_tokens
                        if has_no_token_limit or previous_num_tokens != max_tokens:
                            choice.finish_reason = "stop"
-                            if (
-                                self.engine_client.reasoning_parser == "ernie_x1"
-                                and output.get("finish_reason", "") == "tool_calls"
-                            ):
+                            if tool_called:
                                choice.finish_reason = "tool_calls"
                        else:
                            choice.finish_reason = "length"
@@ -421,7 +428,7 @@ class OpenAIServingChat:
            role="assistant",
            content=output["text"],
            reasoning_content=output.get("reasoning_content"),
-            tool_calls=output.get("tool_call_content"),
+            tool_calls=output.get("tool_call"),
            prompt_token_ids=prompt_token_ids if request.return_token_ids else None,
            completion_token_ids=completion_token_ids if request.return_token_ids else None,
            text_after_process=text_after_process if request.return_token_ids else None,
@@ -443,7 +450,7 @@ class OpenAIServingChat:
        max_tokens = request.max_completion_tokens or request.max_tokens
        if has_no_token_limit or previous_num_tokens != max_tokens:
            choice.finish_reason = "stop"
-            if self.engine_client.reasoning_parser == "ernie_x1" and output.get("finish_reason", "") == "tool_calls":
+            if output.get("tool_call"):
                choice.finish_reason = "tool_calls"
        else:
            choice.finish_reason = "length"