diff --git a/fastdeploy/model_executor/pre_and_post_process.py b/fastdeploy/model_executor/pre_and_post_process.py
index f63453784..65948ea7d 100644
--- a/fastdeploy/model_executor/pre_and_post_process.py
+++ b/fastdeploy/model_executor/pre_and_post_process.py
@@ -196,7 +196,7 @@ def post_process_normal(
     """Post-processing steps after completing a single token generation."""
     # handle vl:
     if model_output.think_end_id != -1:
-        thinking_mask = model_output.enable_thinking
+        thinking_mask = model_output.enable_thinking[: sampler_output.sampled_token_ids.shape[0]]
         exists_think_end = (sampler_output.sampled_token_ids == model_output.think_end_id) & thinking_mask
         paddle.assign(
             paddle.where(