add Tool Parser (#3272)

* add tool-parser * add tool-parser * add tool parser * add tool parser * fix * add offline * add offline * fix * parsers:tool&reasoning * 修改tool parser名称· * update * fix reasoning-parser * add requirements * fix finish reason * fix * fix reasoning-parser * fix * fix * fix * fix * fix --------- Co-authored-by: zhuzixuan <zhuzixuan@baidu.com>
2025-10-06 00:57:33 +08:00 · 2025-08-13 01:06:55 +08:00
parent 2d1a4cacdf
commit eda83ca672
23 changed files with 1056 additions and 38 deletions
--- a/fastdeploy/engine/args_utils.py
+++ b/fastdeploy/engine/args_utils.py
@@ -15,10 +15,10 @@
 """

 import json
+import os
 from dataclasses import asdict, dataclass
 from dataclasses import fields as dataclass_fields
 from typing import Any, Dict, List, Optional
-import os

 from fastdeploy.config import (
    CacheConfig,
@@ -94,6 +94,14 @@ class EngineArgs:
    """
    specifies the reasoning parser to use for extracting reasoning content from the model output
    """
+    tool_call_parser: str = None
+    """
+    specifies the tool call parser  to use for extracting tool call from the model output
+    """
+    tool_parser_plugin: str = None
+    """
+    tool parser plugin used to register user defined tool parsers
+    """
    enable_mm: bool = False
    """
    Flags to enable multi-modal model
@@ -434,6 +442,18 @@ class EngineArgs:
            help="Flag specifies the reasoning parser to use for extracting "
            "reasoning content from the model output",
        )
+        model_group.add_argument(
+            "--tool-call-parser",
+            type=str,
+            default=EngineArgs.tool_call_parser,
+            help="Flag specifies the tool call parser to use for extracting" "tool call from the model output",
+        )
+        model_group.add_argument(
+            "--tool-parser-plugin",
+            type=str,
+            default=EngineArgs.tool_parser_plugin,
+            help="tool parser plugin used to register user defined tool parsers",
+        )
        model_group.add_argument(
            "--speculative-config",
            type=json.loads,
@@ -885,10 +905,10 @@ class EngineArgs:
            if self.enable_chunked_prefill:
                self.max_num_batched_tokens = 2048
            else:
-                if not int(os.getenv('ENABLE_V1_KVCACHE_SCHEDULER', '0')):
+                if not int(os.getenv("ENABLE_V1_KVCACHE_SCHEDULER", "0")):
                    self.max_num_batched_tokens = self.max_model_len
                else:
-                    self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
+                    self.max_num_batched_tokens = 8192  # if set to max_model_len, it's easy to be OOM

        all_dict = asdict(self)
        all_dict["model_cfg"] = model_cfg
@@ -927,6 +947,7 @@ class EngineArgs:
            mm_processor_kwargs=self.mm_processor_kwargs,
            # enable_mm=self.enable_mm,
            reasoning_parser=self.reasoning_parser,
+            tool_parser=self.tool_call_parser,
            splitwise_role=self.splitwise_role,
            innode_prefill_ports=self.innode_prefill_ports,
            max_num_partial_prefills=self.max_num_partial_prefills,
--- a/fastdeploy/engine/config.py
+++ b/fastdeploy/engine/config.py
@@ -86,6 +86,7 @@ class Config:
        max_long_partial_prefills: int = 1,
        long_prefill_token_threshold: int = 0,
        reasoning_parser: str = None,
+        tool_parser: str = None,
        guided_decoding_backend: Optional[str] = None,
        disable_any_whitespace: bool = False,
        enable_logprob: bool = False,
@@ -166,6 +167,7 @@ class Config:
        self.max_long_partial_prefills = max_long_partial_prefills
        self.long_prefill_token_threshold = long_prefill_token_threshold
        self.reasoning_parser = reasoning_parser
+        self.tool_parser = tool_parser
        self.graph_optimization_config = graph_optimization_config
        self.early_stop_config = early_stop_config
        self.guided_decoding_backend = guided_decoding_backend
@@ -245,10 +247,10 @@ class Config:
            if self.cache_config.enable_chunked_prefill:
                self.max_num_batched_tokens = 2048
            else:
-                if not int(os.getenv('ENABLE_V1_KVCACHE_SCHEDULER', '0')):
+                if not int(os.getenv("ENABLE_V1_KVCACHE_SCHEDULER", "0")):
                    self.max_num_batched_tokens = self.max_model_len
                else:
-                    self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
+                    self.max_num_batched_tokens = 8192  # if set to max_model_len, it's easy to be OOM

        if self.long_prefill_token_threshold == 0:
            self.long_prefill_token_threshold = int(self.max_model_len * 0.04)
@@ -296,7 +298,7 @@ class Config:
        )

        if not self.cache_config.enable_chunked_prefill:
-            if not int(os.getenv('ENABLE_V1_KVCACHE_SCHEDULER', '0')):
+            if not int(os.getenv("ENABLE_V1_KVCACHE_SCHEDULER", "0")):
                assert self.max_num_batched_tokens >= self.max_model_len, (
                    f"max_num_batched_tokens: {self.max_num_batched_tokens} "
                    f"should be larger than or equal to max_model_len: {self.max_model_len}"
--- a/fastdeploy/engine/engine.py
+++ b/fastdeploy/engine/engine.py
@@ -106,6 +106,7 @@ class LLMEngine:
            cfg.limit_mm_per_prompt,
            cfg.mm_processor_kwargs,
            cfg.enable_mm,
+            cfg.tool_parser,
        )

        self.start_queue_service()
--- a/fastdeploy/engine/request.py
+++ b/fastdeploy/engine/request.py
@@ -24,6 +24,7 @@ from typing import Any, Dict, Optional, Union
 import numpy as np

 from fastdeploy.engine.sampling_params import SamplingParams
+from fastdeploy.entrypoints.openai.protocol import ToolCall
 from fastdeploy.utils import data_processor_logger
 from fastdeploy.worker.output import LogprobsLists, SampleLogprobs

@@ -249,6 +250,7 @@ class CompletionOutput:
    draft_token_ids: list[int] = None
    text: Optional[str] = None
    reasoning_content: Optional[str] = None
+    tool_calls: Optional[ToolCall] = None

    def to_dict(self):
        """