Revert "Merge branch 'feature/online/vs_think_20250813' into release/2.1"

This reverts commit 02596fc537, reversing
changes made to 03347626a6.
This commit is contained in:
Jiang-Jia-Jun
2025-08-14 17:20:29 +08:00
parent 02596fc537
commit 28918702c2
23 changed files with 39 additions and 1054 deletions

View File

@@ -15,10 +15,10 @@
"""
import json
import os
from dataclasses import asdict, dataclass
from dataclasses import fields as dataclass_fields
from typing import Any, Dict, List, Optional
import os
from fastdeploy.config import (
CacheConfig,
@@ -93,14 +93,6 @@ class EngineArgs:
"""
specifies the reasoning parser to use for extracting reasoning content from the model output
"""
tool_call_parser: str = None
"""
specifies the tool call parser to use for extracting tool call from the model output
"""
tool_parser_plugin: str = None
"""
tool parser plugin used to register user defined tool parsers
"""
enable_mm: bool = False
"""
Flags to enable multi-modal model
@@ -429,18 +421,6 @@ class EngineArgs:
help="Flag specifies the reasoning parser to use for extracting "
"reasoning content from the model output",
)
model_group.add_argument(
"--tool-call-parser",
type=str,
default=EngineArgs.tool_call_parser,
help="Flag specifies the tool call parser to use for extracting" "tool call from the model output",
)
model_group.add_argument(
"--tool-parser-plugin",
type=str,
default=EngineArgs.tool_parser_plugin,
help="tool parser plugin used to register user defined tool parsers",
)
model_group.add_argument(
"--speculative-config",
type=json.loads,
@@ -886,10 +866,10 @@ class EngineArgs:
if self.enable_chunked_prefill:
self.max_num_batched_tokens = 2048
else:
if not int(os.getenv("ENABLE_V1_KVCACHE_SCHEDULER", "0")):
if not int(os.getenv('ENABLE_V1_KVCACHE_SCHEDULER', '0')):
self.max_num_batched_tokens = self.max_model_len
else:
self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
self.max_num_batched_tokens = 8192
all_dict = asdict(self)
all_dict["model_cfg"] = model_cfg
@@ -928,7 +908,6 @@ class EngineArgs:
mm_processor_kwargs=self.mm_processor_kwargs,
enable_mm=self.enable_mm,
reasoning_parser=self.reasoning_parser,
tool_parser=self.tool_call_parser,
splitwise_role=self.splitwise_role,
innode_prefill_ports=self.innode_prefill_ports,
max_num_partial_prefills=self.max_num_partial_prefills,