Fix wrong batch size of thinking_mask (#4296)

Co-authored-by: K11OntheBoat <“ruianmaidanglao@163.com”> Co-authored-by: xiegegege <46314656+xiegegege@users.noreply.github.com>
2025-12-24 13:28:13 +08:00 · 2025-09-28 14:56:42 +08:00
parent 3cef851468
commit 7b6cb72ab2
1 changed files with 1 additions and 1 deletions
--- a/fastdeploy/model_executor/pre_and_post_process.py
+++ b/fastdeploy/model_executor/pre_and_post_process.py
@@ -196,7 +196,7 @@ def post_process_normal(
    """Post-processing steps after completing a single token generation."""
    # handle vl:
    if model_output.think_end_id != -1:
-        thinking_mask = model_output.enable_thinking
+        thinking_mask = model_output.enable_thinking[: sampler_output.sampled_token_ids.shape[0]]
        exists_think_end = (sampler_output.sampled_token_ids == model_output.think_end_id) & thinking_mask
        paddle.assign(
            paddle.where(