mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[Feature] Support limit thinking len for text models (#3527)
* support limit thinking len * remove default think_end_id * remove reasoning_max_tokens * update think_end_id for ernie * update think_end_id for ernie. --------- Co-authored-by: K11OntheBoat <“ruianmaidanglao@163.com”> Co-authored-by: luukunn <981429396@qq.com>
This commit is contained in:
@@ -121,8 +121,6 @@ class EngineClient:
|
||||
task["prompt_token_ids_len"] = len(task["prompt_token_ids"])
|
||||
input_ids_len = task["prompt_token_ids_len"]
|
||||
task["max_tokens"] = min(self.max_model_len - input_ids_len, task.get("max_tokens"))
|
||||
if task.get("reasoning_max_tokens", None) is None:
|
||||
task["reasoning_max_tokens"] = max(int(task["max_tokens"] * 0.8), 1)
|
||||
min_tokens = task.get("min_tokens", 1)
|
||||
if "messages" in task:
|
||||
del task["messages"]
|
||||
|
Reference in New Issue
Block a user