[Feature] Support limit thinking len for text models (#3527)

* support limit thinking len * remove default think_end_id * remove reasoning_max_tokens * update think_end_id for ernie * update think_end_id for ernie. --------- Co-authored-by: K11OntheBoat <“ruianmaidanglao@163.com”> Co-authored-by: luukunn <981429396@qq.com>
2025-10-06 00:57:33 +08:00 · 2025-08-22 14:48:15 +08:00
parent 4d6fb96cd6
commit 93d999b830
6 changed files with 64 additions and 26 deletions
--- a/fastdeploy/model_executor/pre_and_post_process.py
+++ b/fastdeploy/model_executor/pre_and_post_process.py
@@ -160,7 +160,7 @@ def post_process_normal(
 ) -> ModelRunnerOutput:
    """Post-processing steps after completing a single token generation."""
    # handle vl:
-    if model_output.enable_thinking:
+    if model_output.enable_thinking and model_output.think_end_id is not None:
        exists_think_end = sampler_output.sampled_token_ids == model_output.think_end_id
        paddle.assign(
            paddle.where(