mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-26 10:00:33 +08:00
support reasoning_max_tokens (#4207)
This commit is contained in:
@@ -149,8 +149,6 @@ class EngineClient:
|
|||||||
task["prompt_token_ids_len"] = len(task["prompt_token_ids"])
|
task["prompt_token_ids_len"] = len(task["prompt_token_ids"])
|
||||||
input_ids_len = task["prompt_token_ids_len"]
|
input_ids_len = task["prompt_token_ids_len"]
|
||||||
task["max_tokens"] = min(self.max_model_len - input_ids_len, task.get("max_tokens"))
|
task["max_tokens"] = min(self.max_model_len - input_ids_len, task.get("max_tokens"))
|
||||||
if task.get("reasoning_max_tokens", None) is None:
|
|
||||||
task["reasoning_max_tokens"] = max(int(task["max_tokens"] * 0.8), 1)
|
|
||||||
min_tokens = task.get("min_tokens", 1)
|
min_tokens = task.get("min_tokens", 1)
|
||||||
if "messages" in task:
|
if "messages" in task:
|
||||||
del task["messages"]
|
del task["messages"]
|
||||||
@@ -229,8 +227,8 @@ class EngineClient:
|
|||||||
raise ValueError(f"max_tokens can be defined [1, {self.max_model_len}).")
|
raise ValueError(f"max_tokens can be defined [1, {self.max_model_len}).")
|
||||||
|
|
||||||
if data.get("reasoning_max_tokens") is not None:
|
if data.get("reasoning_max_tokens") is not None:
|
||||||
if data["reasoning_max_tokens"] > data["max_tokens"] or data["reasoning_max_tokens"] < 1:
|
if data["reasoning_max_tokens"] > data["max_tokens"] or data["reasoning_max_tokens"] < 0:
|
||||||
raise ValueError("reasoning_max_tokens must be between max_tokens and 1")
|
raise ValueError("reasoning_max_tokens must be between max_tokens and 0")
|
||||||
|
|
||||||
if data.get("top_p") is not None:
|
if data.get("top_p") is not None:
|
||||||
if data["top_p"] > 1 or data["top_p"] < 0:
|
if data["top_p"] > 1 or data["top_p"] < 0:
|
||||||
|
|||||||
Reference in New Issue
Block a user