[Feature]Optimization of Thinking Pattern Framework (#4302)

* add model status in vl * add x1 parser * add model_status * fix parser * fix parser * fix parser * fix parser * Revert "fix parser" This reverts commit 300f446d8a. * fix parser * fix * fix * fix * fix * fix parser * fix unit test * fix unit test * add unit test * fix * fix * add unit test * fix unit test * add unit test * add unit test * fix unit test * fix unit test * fix bug * fix unit test * x1 tool parser * fix unit test * fix unit test * fix unit test * fix n * fix unit test * add unit test * add unit test * remove pring
2025-12-24 13:28:13 +08:00 · 2025-12-10 16:17:06 +08:00
parent 1bffac866b
commit fbc9bce1e9
28 changed files with 1199 additions and 458 deletions
--- a/fastdeploy/input/qwen_vl_processor/qwen_vl_processor.py
+++ b/fastdeploy/input/qwen_vl_processor/qwen_vl_processor.py
@@ -267,6 +267,18 @@ class QwenVLProcessor(TextProcessor):
        # Set default max_tokens if not specified
        if request.get("max_tokens") is None:
            request["max_tokens"] = max(1, max_model_len - len(request["prompt_token_ids"]))  # Ensure at least 1 token
+        if self.reasoning_parser:
+            model_status = self.reasoning_parser.get_model_status(request["prompt_token_ids"])
+            parts = request["request_id"].split("_")
+            if len(parts) > 1:
+                real_req_id = parts[0]
+                index = int(parts[1])
+                n = request.get("n", 1)
+                for idx in range(index * n, (index + 1) * n):
+                    self.model_status_dict[f"{real_req_id}_{idx}"] = model_status
+            else:
+                self.model_status_dict[request["request_id"]] = model_status
+            request["enable_thinking"] = model_status == "think_start"
        data_processor_logger.info(f"Processed request {request}")

        return request