mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
add tool parser
This commit is contained in:
@@ -85,6 +85,7 @@ class Config:
|
||||
max_long_partial_prefills: int = 1,
|
||||
long_prefill_token_threshold: int = 0,
|
||||
reasoning_parser: str = None,
|
||||
tool_parser: str = None,
|
||||
guided_decoding_backend: Optional[str] = None,
|
||||
disable_any_whitespace: bool = False,
|
||||
enable_logprob: bool = False,
|
||||
@@ -165,6 +166,7 @@ class Config:
|
||||
self.max_long_partial_prefills = max_long_partial_prefills
|
||||
self.long_prefill_token_threshold = long_prefill_token_threshold
|
||||
self.reasoning_parser = reasoning_parser
|
||||
self.tool_parser = tool_parser
|
||||
self.graph_optimization_config = graph_optimization_config
|
||||
self.early_stop_config = early_stop_config
|
||||
self.guided_decoding_backend = guided_decoding_backend
|
||||
@@ -236,10 +238,10 @@ class Config:
|
||||
if self.cache_config.enable_chunked_prefill:
|
||||
self.max_num_batched_tokens = 2048
|
||||
else:
|
||||
if not int(os.getenv('ENABLE_V1_KVCACHE_SCHEDULER', '0')):
|
||||
if not int(os.getenv("ENABLE_V1_KVCACHE_SCHEDULER", "0")):
|
||||
self.max_num_batched_tokens = self.max_model_len
|
||||
else:
|
||||
self.max_num_batched_tokens = 8192
|
||||
self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
|
||||
|
||||
if self.long_prefill_token_threshold == 0:
|
||||
self.long_prefill_token_threshold = int(self.max_model_len * 0.04)
|
||||
@@ -287,7 +289,7 @@ class Config:
|
||||
)
|
||||
|
||||
if not self.cache_config.enable_chunked_prefill:
|
||||
if not int(os.getenv('ENABLE_V1_KVCACHE_SCHEDULER', '0')):
|
||||
if not int(os.getenv("ENABLE_V1_KVCACHE_SCHEDULER", "0")):
|
||||
assert self.max_num_batched_tokens >= self.max_model_len, (
|
||||
f"max_num_batched_tokens: {self.max_num_batched_tokens} "
|
||||
f"should be larger than or equal to max_model_len: {self.max_model_len}"
|
||||
|
Reference in New Issue
Block a user