From b272ca9f839aa262b9f6cd11f3cf7d31bf8dc1b3 Mon Sep 17 00:00:00 2001 From: chenjian <1435317881@qq.com> Date: Thu, 25 Sep 2025 19:08:38 +0800 Subject: [PATCH] [Bug fix] Fix bug for supporting max think len (#4267) * fix bug for supporting max thinking lens * fix max_think_lens --- fastdeploy/engine/request.py | 6 +++++- fastdeploy/engine/sched/resource_manager_v1.py | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/fastdeploy/engine/request.py b/fastdeploy/engine/request.py index 3906cd29b..fbe937f2a 100644 --- a/fastdeploy/engine/request.py +++ b/fastdeploy/engine/request.py @@ -72,6 +72,7 @@ class Request: structural_tag: Optional[Any] = None, guided_json_object: Optional[bool] = None, enable_thinking: Optional[bool] = True, + reasoning_max_tokens: Optional[int] = None, trace_carrier: dict = dict(), dp_rank: Optional[int] = None, chat_template: Optional[str] = None, @@ -121,6 +122,7 @@ class Request: self.multimodal_img_boundaries = None self.enable_thinking = enable_thinking + self.reasoning_max_tokens = reasoning_max_tokens self.trace_carrier = trace_carrier self.chat_template = chat_template @@ -178,7 +180,8 @@ class Request: guided_grammar=d.get("guided_grammar", None), structural_tag=d.get("structural_tag", None), guided_json_object=d.get("guided_json_object", None), - enable_thinking=d.get("enable_thinking", True), + enable_thinking=d.get("enable_thinking", False), + reasoning_max_tokens=d.get("reasoning_max_tokens", None), trace_carrier=d.get("trace_carrier", {}), chat_template=d.get("chat_template", None), num_computed_tokens=d.get("num_computed_tokens", 0), @@ -229,6 +232,7 @@ class Request: "disaggregate_info": self.disaggregate_info, "draft_token_ids": self.draft_token_ids, "enable_thinking": self.enable_thinking, + "reasoning_max_tokens": self.reasoning_max_tokens, "trace_carrier": self.trace_carrier, "chat_template": self.chat_template, "num_computed_tokens": self.num_computed_tokens, diff --git a/fastdeploy/engine/sched/resource_manager_v1.py b/fastdeploy/engine/sched/resource_manager_v1.py index 0628e63cf..6291c8f3a 100644 --- a/fastdeploy/engine/sched/resource_manager_v1.py +++ b/fastdeploy/engine/sched/resource_manager_v1.py @@ -672,6 +672,8 @@ class ResourceManagerV1(ResourceManager): return False if self.available_batch() == 0: return False + if request.reasoning_max_tokens is not None: + request.reasoning_max_tokens -= 1 request.need_prefill_tokens = len(request.prompt_token_ids) need_prealloc_prefill_blocks = ( request.need_prefill_tokens + self.config.cache_config.block_size - 1