add Tool Parser (#3272)

* add tool-parser * add tool-parser * add tool parser * add tool parser * fix * add offline * add offline * fix * parsers:tool&reasoning * 修改tool parser名称· * update * fix reasoning-parser * add requirements * fix finish reason * fix * fix reasoning-parser * fix * fix * fix * fix * fix --------- Co-authored-by: zhuzixuan <zhuzixuan@baidu.com>
2025-10-05 08:37:06 +08:00 · 2025-08-13 01:06:55 +08:00
parent 2d1a4cacdf
commit eda83ca672
23 changed files with 1056 additions and 38 deletions
--- a/fastdeploy/engine/config.py
+++ b/fastdeploy/engine/config.py
@@ -86,6 +86,7 @@ class Config:
        max_long_partial_prefills: int = 1,
        long_prefill_token_threshold: int = 0,
        reasoning_parser: str = None,
+        tool_parser: str = None,
        guided_decoding_backend: Optional[str] = None,
        disable_any_whitespace: bool = False,
        enable_logprob: bool = False,
@@ -166,6 +167,7 @@ class Config:
        self.max_long_partial_prefills = max_long_partial_prefills
        self.long_prefill_token_threshold = long_prefill_token_threshold
        self.reasoning_parser = reasoning_parser
+        self.tool_parser = tool_parser
        self.graph_optimization_config = graph_optimization_config
        self.early_stop_config = early_stop_config
        self.guided_decoding_backend = guided_decoding_backend
@@ -245,10 +247,10 @@ class Config:
            if self.cache_config.enable_chunked_prefill:
                self.max_num_batched_tokens = 2048
            else:
-                if not int(os.getenv('ENABLE_V1_KVCACHE_SCHEDULER', '0')):
+                if not int(os.getenv("ENABLE_V1_KVCACHE_SCHEDULER", "0")):
                    self.max_num_batched_tokens = self.max_model_len
                else:
-                    self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
+                    self.max_num_batched_tokens = 8192  # if set to max_model_len, it's easy to be OOM

        if self.long_prefill_token_threshold == 0:
            self.long_prefill_token_threshold = int(self.max_model_len * 0.04)
@@ -296,7 +298,7 @@ class Config:
        )

        if not self.cache_config.enable_chunked_prefill:
-            if not int(os.getenv('ENABLE_V1_KVCACHE_SCHEDULER', '0')):
+            if not int(os.getenv("ENABLE_V1_KVCACHE_SCHEDULER", "0")):
                assert self.max_num_batched_tokens >= self.max_model_len, (
                    f"max_num_batched_tokens: {self.max_num_batched_tokens} "
                    f"should be larger than or equal to max_model_len: {self.max_model_len}"