[Feature] Support limit thinking len for text models (#3527)

* support limit thinking len

* remove default think_end_id

* remove reasoning_max_tokens

* update think_end_id for ernie

* update think_end_id for ernie.

---------

Co-authored-by: K11OntheBoat <“ruianmaidanglao@163.com”>
Co-authored-by: luukunn <981429396@qq.com>
This commit is contained in:
K11OntheBoat
2025-08-22 14:48:15 +08:00
committed by GitHub
parent 4d6fb96cd6
commit 93d999b830
6 changed files with 64 additions and 26 deletions

View File

@@ -246,6 +246,10 @@ class ErnieMoEVLProcessor(ErnieProcessor):
request["prompt_token_ids"] = request["prompt_token_ids"][: max_model_len - 1]
if request.get("max_tokens") is None:
request["max_tokens"] = max(1, max_model_len - len(request["prompt_token_ids"]))
else:
request["max_tokens"] = min(max_model_len - len(request["prompt_token_ids"]), request["max_tokens"])
if not request.get("reasoning_max_tokens"):
request["reasoning_max_tokens"] = max(int(request["max_tokens"] * 0.8), 1)
data_processor_logger.info(f"Processed request {request}")
return request