add Tool Parser (#3272)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled

* add tool-parser

* add tool-parser

* add tool parser

* add tool parser

* fix

* add offline

* add offline

* fix

* parsers:tool&reasoning

* 修改tool parser名称·

* update

* fix reasoning-parser

* add requirements

* fix finish reason

* fix

* fix reasoning-parser

* fix

* fix

* fix

* fix

* fix

---------

Co-authored-by: zhuzixuan <zhuzixuan@baidu.com>
This commit is contained in:
luukunn
2025-08-13 01:06:55 +08:00
committed by GitHub
parent 2d1a4cacdf
commit eda83ca672
23 changed files with 1056 additions and 38 deletions

View File

@@ -86,6 +86,7 @@ class Config:
max_long_partial_prefills: int = 1,
long_prefill_token_threshold: int = 0,
reasoning_parser: str = None,
tool_parser: str = None,
guided_decoding_backend: Optional[str] = None,
disable_any_whitespace: bool = False,
enable_logprob: bool = False,
@@ -166,6 +167,7 @@ class Config:
self.max_long_partial_prefills = max_long_partial_prefills
self.long_prefill_token_threshold = long_prefill_token_threshold
self.reasoning_parser = reasoning_parser
self.tool_parser = tool_parser
self.graph_optimization_config = graph_optimization_config
self.early_stop_config = early_stop_config
self.guided_decoding_backend = guided_decoding_backend
@@ -245,10 +247,10 @@ class Config:
if self.cache_config.enable_chunked_prefill:
self.max_num_batched_tokens = 2048
else:
if not int(os.getenv('ENABLE_V1_KVCACHE_SCHEDULER', '0')):
if not int(os.getenv("ENABLE_V1_KVCACHE_SCHEDULER", "0")):
self.max_num_batched_tokens = self.max_model_len
else:
self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
if self.long_prefill_token_threshold == 0:
self.long_prefill_token_threshold = int(self.max_model_len * 0.04)
@@ -296,7 +298,7 @@ class Config:
)
if not self.cache_config.enable_chunked_prefill:
if not int(os.getenv('ENABLE_V1_KVCACHE_SCHEDULER', '0')):
if not int(os.getenv("ENABLE_V1_KVCACHE_SCHEDULER", "0")):
assert self.max_num_batched_tokens >= self.max_model_len, (
f"max_num_batched_tokens: {self.max_num_batched_tokens} "
f"should be larger than or equal to max_model_len: {self.max_model_len}"