Revert "Merge branch 'feature/online/vs_think_20250813' into release/2.1"

This reverts commit 02596fc537, reversing changes made to 03347626a6.
2025-10-05 16:48:03 +08:00 · 2025-08-14 17:20:29 +08:00
parent 02596fc537
commit 28918702c2
23 changed files with 39 additions and 1054 deletions
--- a/fastdeploy/engine/args_utils.py
+++ b/fastdeploy/engine/args_utils.py
@@ -15,10 +15,10 @@
 """

 import json
-import os
 from dataclasses import asdict, dataclass
 from dataclasses import fields as dataclass_fields
 from typing import Any, Dict, List, Optional
+import os

 from fastdeploy.config import (
    CacheConfig,
@@ -93,14 +93,6 @@ class EngineArgs:
    """
    specifies the reasoning parser to use for extracting reasoning content from the model output
    """
-    tool_call_parser: str = None
-    """
-    specifies the tool call parser  to use for extracting tool call from the model output
-    """
-    tool_parser_plugin: str = None
-    """
-    tool parser plugin used to register user defined tool parsers
-    """
    enable_mm: bool = False
    """
    Flags to enable multi-modal model
@@ -429,18 +421,6 @@ class EngineArgs:
            help="Flag specifies the reasoning parser to use for extracting "
            "reasoning content from the model output",
        )
-        model_group.add_argument(
-            "--tool-call-parser",
-            type=str,
-            default=EngineArgs.tool_call_parser,
-            help="Flag specifies the tool call parser to use for extracting" "tool call from the model output",
-        )
-        model_group.add_argument(
-            "--tool-parser-plugin",
-            type=str,
-            default=EngineArgs.tool_parser_plugin,
-            help="tool parser plugin used to register user defined tool parsers",
-        )
        model_group.add_argument(
            "--speculative-config",
            type=json.loads,
@@ -886,10 +866,10 @@ class EngineArgs:
            if self.enable_chunked_prefill:
                self.max_num_batched_tokens = 2048
            else:
-                if not int(os.getenv("ENABLE_V1_KVCACHE_SCHEDULER", "0")):
+                if not int(os.getenv('ENABLE_V1_KVCACHE_SCHEDULER', '0')):
                    self.max_num_batched_tokens = self.max_model_len
                else:
-                    self.max_num_batched_tokens = 8192  # if set to max_model_len, it's easy to be OOM
+                    self.max_num_batched_tokens = 8192

        all_dict = asdict(self)
        all_dict["model_cfg"] = model_cfg
@@ -928,7 +908,6 @@ class EngineArgs:
            mm_processor_kwargs=self.mm_processor_kwargs,
            enable_mm=self.enable_mm,
            reasoning_parser=self.reasoning_parser,
-            tool_parser=self.tool_call_parser,
            splitwise_role=self.splitwise_role,
            innode_prefill_ports=self.innode_prefill_ports,
            max_num_partial_prefills=self.max_num_partial_prefills,