add tool parser

2025-10-05 08:37:06 +08:00 · 2025-08-13 01:06:55 +08:00
parent ad816f20f4
commit 81092c0fe3
23 changed files with 1056 additions and 41 deletions
--- a/fastdeploy/engine/config.py
+++ b/fastdeploy/engine/config.py
@@ -85,6 +85,7 @@ class Config:
        max_long_partial_prefills: int = 1,
        long_prefill_token_threshold: int = 0,
        reasoning_parser: str = None,
+        tool_parser: str = None,
        guided_decoding_backend: Optional[str] = None,
        disable_any_whitespace: bool = False,
        enable_logprob: bool = False,
@@ -165,6 +166,7 @@ class Config:
        self.max_long_partial_prefills = max_long_partial_prefills
        self.long_prefill_token_threshold = long_prefill_token_threshold
        self.reasoning_parser = reasoning_parser
+        self.tool_parser = tool_parser
        self.graph_optimization_config = graph_optimization_config
        self.early_stop_config = early_stop_config
        self.guided_decoding_backend = guided_decoding_backend
@@ -236,10 +238,10 @@ class Config:
            if self.cache_config.enable_chunked_prefill:
                self.max_num_batched_tokens = 2048
            else:
-                if not int(os.getenv('ENABLE_V1_KVCACHE_SCHEDULER', '0')):
+                if not int(os.getenv("ENABLE_V1_KVCACHE_SCHEDULER", "0")):
                    self.max_num_batched_tokens = self.max_model_len
                else:
-                    self.max_num_batched_tokens = 8192
+                    self.max_num_batched_tokens = 8192  # if set to max_model_len, it's easy to be OOM

        if self.long_prefill_token_threshold == 0:
            self.long_prefill_token_threshold = int(self.max_model_len * 0.04)
@@ -287,7 +289,7 @@ class Config:
        )

        if not self.cache_config.enable_chunked_prefill:
-            if not int(os.getenv('ENABLE_V1_KVCACHE_SCHEDULER', '0')):
+            if not int(os.getenv("ENABLE_V1_KVCACHE_SCHEDULER", "0")):
                assert self.max_num_batched_tokens >= self.max_model_len, (
                    f"max_num_batched_tokens: {self.max_num_batched_tokens} "
                    f"should be larger than or equal to max_model_len: {self.max_model_len}"