From 1776d410d0ad6f46d30de2a0128e768fec70d450 Mon Sep 17 00:00:00 2001 From: Yuanle Liu Date: Wed, 10 Dec 2025 11:56:35 +0800 Subject: [PATCH] fix limit_thinking bug (#5469) --- custom_ops/gpu_ops/limit_thinking_content_length_v1.cu | 2 +- custom_ops/gpu_ops/limit_thinking_content_length_v2.cu | 2 +- .../speculate_limit_thinking_content_length_v1.cu | 2 +- .../speculate_limit_thinking_content_length_v2.cu | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/custom_ops/gpu_ops/limit_thinking_content_length_v1.cu b/custom_ops/gpu_ops/limit_thinking_content_length_v1.cu index 45bf8f704..9bfb31bee 100644 --- a/custom_ops/gpu_ops/limit_thinking_content_length_v1.cu +++ b/custom_ops/gpu_ops/limit_thinking_content_length_v1.cu @@ -33,7 +33,7 @@ __global__ void limit_thinking_content_length_kernel_v1( if (max_think_len < 0) return; int current_limit_think_status = limit_think_status[bid]; // 如果在回复阶段, 且已经触发停止标志, 则直接返回, 无需多余执行 - if (current_limit_think_status == 2 && stop_flags[bid]) { + if (current_limit_think_status == 2 || stop_flags[bid]) { return; } diff --git a/custom_ops/gpu_ops/limit_thinking_content_length_v2.cu b/custom_ops/gpu_ops/limit_thinking_content_length_v2.cu index ea5f8c9c4..b261e01b2 100644 --- a/custom_ops/gpu_ops/limit_thinking_content_length_v2.cu +++ b/custom_ops/gpu_ops/limit_thinking_content_length_v2.cu @@ -35,7 +35,7 @@ __global__ void limit_thinking_content_length_kernel_v2( if (max_think_len < 0) return; int current_limit_think_status = limit_think_status[bid]; // 如果在回复阶段, 且已经触发停止标志, 则直接返回, 无需多余执行 - if (current_limit_think_status == 3 && stop_flags[bid]) { + if (current_limit_think_status == 3 || stop_flags[bid]) { return; } diff --git a/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v1.cu b/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v1.cu index 0a703639c..097d3429a 100644 --- a/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v1.cu +++ b/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v1.cu @@ -39,7 +39,7 @@ __global__ void speculate_limit_thinking_content_length_kernel_v1( if (max_think_len < 0) return; int current_limit_think_status = limit_think_status[bid]; // 如果在回复阶段, 且已经触发停止标志, 则直接返回, 无需多余执行 - if (current_limit_think_status == 2 && stop_flags[bid]) { + if (current_limit_think_status == 2 || stop_flags[bid]) { return; } diff --git a/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v2.cu b/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v2.cu index 709911d2b..8d963eb0c 100644 --- a/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v2.cu +++ b/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v2.cu @@ -42,7 +42,7 @@ __global__ void speculate_limit_thinking_content_length_kernel_v2( if (max_think_len < 0) return; int current_limit_think_status = limit_think_status[bid]; // 如果在回复阶段, 且已经触发停止标志, 则直接返回, 无需多余执行. - if (current_limit_think_status == 3 && stop_flags[bid]) { + if (current_limit_think_status == 3 || stop_flags[bid]) { return; }