[Bug fix] Fix bug for supporting max think len (#4267)

* fix bug for supporting max thinking lens

* fix max_think_lens
This commit is contained in:
chenjian
2025-09-25 19:08:38 +08:00
committed by GitHub
parent db653644ad
commit b272ca9f83
2 changed files with 7 additions and 1 deletions

View File

@@ -72,6 +72,7 @@ class Request:
structural_tag: Optional[Any] = None, structural_tag: Optional[Any] = None,
guided_json_object: Optional[bool] = None, guided_json_object: Optional[bool] = None,
enable_thinking: Optional[bool] = True, enable_thinking: Optional[bool] = True,
reasoning_max_tokens: Optional[int] = None,
trace_carrier: dict = dict(), trace_carrier: dict = dict(),
dp_rank: Optional[int] = None, dp_rank: Optional[int] = None,
chat_template: Optional[str] = None, chat_template: Optional[str] = None,
@@ -121,6 +122,7 @@ class Request:
self.multimodal_img_boundaries = None self.multimodal_img_boundaries = None
self.enable_thinking = enable_thinking self.enable_thinking = enable_thinking
self.reasoning_max_tokens = reasoning_max_tokens
self.trace_carrier = trace_carrier self.trace_carrier = trace_carrier
self.chat_template = chat_template self.chat_template = chat_template
@@ -178,7 +180,8 @@ class Request:
guided_grammar=d.get("guided_grammar", None), guided_grammar=d.get("guided_grammar", None),
structural_tag=d.get("structural_tag", None), structural_tag=d.get("structural_tag", None),
guided_json_object=d.get("guided_json_object", None), guided_json_object=d.get("guided_json_object", None),
enable_thinking=d.get("enable_thinking", True), enable_thinking=d.get("enable_thinking", False),
reasoning_max_tokens=d.get("reasoning_max_tokens", None),
trace_carrier=d.get("trace_carrier", {}), trace_carrier=d.get("trace_carrier", {}),
chat_template=d.get("chat_template", None), chat_template=d.get("chat_template", None),
num_computed_tokens=d.get("num_computed_tokens", 0), num_computed_tokens=d.get("num_computed_tokens", 0),
@@ -229,6 +232,7 @@ class Request:
"disaggregate_info": self.disaggregate_info, "disaggregate_info": self.disaggregate_info,
"draft_token_ids": self.draft_token_ids, "draft_token_ids": self.draft_token_ids,
"enable_thinking": self.enable_thinking, "enable_thinking": self.enable_thinking,
"reasoning_max_tokens": self.reasoning_max_tokens,
"trace_carrier": self.trace_carrier, "trace_carrier": self.trace_carrier,
"chat_template": self.chat_template, "chat_template": self.chat_template,
"num_computed_tokens": self.num_computed_tokens, "num_computed_tokens": self.num_computed_tokens,

View File

@@ -672,6 +672,8 @@ class ResourceManagerV1(ResourceManager):
return False return False
if self.available_batch() == 0: if self.available_batch() == 0:
return False return False
if request.reasoning_max_tokens is not None:
request.reasoning_max_tokens -= 1
request.need_prefill_tokens = len(request.prompt_token_ids) request.need_prefill_tokens = len(request.prompt_token_ids)
need_prealloc_prefill_blocks = ( need_prealloc_prefill_blocks = (
request.need_prefill_tokens + self.config.cache_config.block_size - 1 request.need_prefill_tokens + self.config.cache_config.block_size - 1