diff --git a/fastdeploy/model_executor/pre_and_post_process.py b/fastdeploy/model_executor/pre_and_post_process.py index f63453784..65948ea7d 100644 --- a/fastdeploy/model_executor/pre_and_post_process.py +++ b/fastdeploy/model_executor/pre_and_post_process.py @@ -196,7 +196,7 @@ def post_process_normal( """Post-processing steps after completing a single token generation.""" # handle vl: if model_output.think_end_id != -1: - thinking_mask = model_output.enable_thinking + thinking_mask = model_output.enable_thinking[: sampler_output.sampled_token_ids.shape[0]] exists_think_end = (sampler_output.sampled_token_ids == model_output.think_end_id) & thinking_mask paddle.assign( paddle.where(