mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[Feature]Optimization of Thinking Pattern Framework (#4302)
* add model status in vl
* add x1 parser
* add model_status
* fix parser
* fix parser
* fix parser
* fix parser
* Revert "fix parser"
This reverts commit 300f446d8a.
* fix parser
* fix
* fix
* fix
* fix
* fix parser
* fix unit test
* fix unit test
* add unit test
* fix
* fix
* add unit test
* fix unit test
* add unit test
* add unit test
* fix unit test
* fix unit test
* fix bug
* fix unit test
* x1 tool parser
* fix unit test
* fix unit test
* fix unit test
* fix n
* fix unit test
* add unit test
* add unit test
* remove pring
This commit is contained in:
@@ -267,6 +267,18 @@ class QwenVLProcessor(TextProcessor):
|
||||
# Set default max_tokens if not specified
|
||||
if request.get("max_tokens") is None:
|
||||
request["max_tokens"] = max(1, max_model_len - len(request["prompt_token_ids"])) # Ensure at least 1 token
|
||||
if self.reasoning_parser:
|
||||
model_status = self.reasoning_parser.get_model_status(request["prompt_token_ids"])
|
||||
parts = request["request_id"].split("_")
|
||||
if len(parts) > 1:
|
||||
real_req_id = parts[0]
|
||||
index = int(parts[1])
|
||||
n = request.get("n", 1)
|
||||
for idx in range(index * n, (index + 1) * n):
|
||||
self.model_status_dict[f"{real_req_id}_{idx}"] = model_status
|
||||
else:
|
||||
self.model_status_dict[request["request_id"]] = model_status
|
||||
request["enable_thinking"] = model_status == "think_start"
|
||||
data_processor_logger.info(f"Processed request {request}")
|
||||
|
||||
return request
|
||||
|
||||
Reference in New Issue
Block a user