From e38709b499a9340c96195232fb7699adff57cd30 Mon Sep 17 00:00:00 2001
From: Copilot <198982749+Copilot@users.noreply.github.com>
Date: Wed, 10 Dec 2025 11:03:19 +0800
Subject: [PATCH] [BugFix] Fix limit_thinking early return logic in CUDA
 kernels (#5471)

* Initial plan

* [BugFix] Fix limit_thinking bug - change AND to OR in condition checks

Co-authored-by: yuanlehome <23653004+yuanlehome@users.noreply.github.com>

* Update Chinese comments to reflect OR logic instead of AND

Co-authored-by: yuanlehome <23653004+yuanlehome@users.noreply.github.com>

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: yuanlehome <23653004+yuanlehome@users.noreply.github.com>
---
 custom_ops/gpu_ops/limit_thinking_content_length_v1.cu        | 4 ++--
 custom_ops/gpu_ops/limit_thinking_content_length_v2.cu        | 4 ++--
 .../speculate_limit_thinking_content_length_v1.cu             | 4 ++--
 .../speculate_limit_thinking_content_length_v2.cu             | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/custom_ops/gpu_ops/limit_thinking_content_length_v1.cu b/custom_ops/gpu_ops/limit_thinking_content_length_v1.cu
index 45bf8f704..89e1e1c66 100644
--- a/custom_ops/gpu_ops/limit_thinking_content_length_v1.cu
+++ b/custom_ops/gpu_ops/limit_thinking_content_length_v1.cu
@@ -32,8 +32,8 @@ __global__ void limit_thinking_content_length_kernel_v1(
   const int max_think_len = max_think_lens[bid];
   if (max_think_len < 0) return;
   int current_limit_think_status = limit_think_status[bid];
-  // 如果在回复阶段, 且已经触发停止标志, 则直接返回, 无需多余执行
-  if (current_limit_think_status == 2 && stop_flags[bid]) {
+  // 如果在回复阶段, 或者已经触发停止标志, 则直接返回, 无需多余执行
+  if (current_limit_think_status == 2 || stop_flags[bid]) {
     return;
   }
 
diff --git a/custom_ops/gpu_ops/limit_thinking_content_length_v2.cu b/custom_ops/gpu_ops/limit_thinking_content_length_v2.cu
index ea5f8c9c4..8ff42058f 100644
--- a/custom_ops/gpu_ops/limit_thinking_content_length_v2.cu
+++ b/custom_ops/gpu_ops/limit_thinking_content_length_v2.cu
@@ -34,8 +34,8 @@ __global__ void limit_thinking_content_length_kernel_v2(
   const int max_think_len = max_think_lens[bid];
   if (max_think_len < 0) return;
   int current_limit_think_status = limit_think_status[bid];
-  // 如果在回复阶段, 且已经触发停止标志, 则直接返回, 无需多余执行
-  if (current_limit_think_status == 3 && stop_flags[bid]) {
+  // 如果在回复阶段, 或者已经触发停止标志, 则直接返回, 无需多余执行
+  if (current_limit_think_status == 3 || stop_flags[bid]) {
     return;
   }
 
diff --git a/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v1.cu b/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v1.cu
index 0a703639c..a18f3b2bc 100644
--- a/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v1.cu
+++ b/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v1.cu
@@ -38,8 +38,8 @@ __global__ void speculate_limit_thinking_content_length_kernel_v1(
   const int max_think_len = max_think_lens[bid];
   if (max_think_len < 0) return;
   int current_limit_think_status = limit_think_status[bid];
-  // 如果在回复阶段, 且已经触发停止标志, 则直接返回, 无需多余执行
-  if (current_limit_think_status == 2 && stop_flags[bid]) {
+  // 如果在回复阶段, 或者已经触发停止标志, 则直接返回, 无需多余执行
+  if (current_limit_think_status == 2 || stop_flags[bid]) {
     return;
   }
 
diff --git a/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v2.cu b/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v2.cu
index 709911d2b..270ab07e1 100644
--- a/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v2.cu
+++ b/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v2.cu
@@ -41,8 +41,8 @@ __global__ void speculate_limit_thinking_content_length_kernel_v2(
   const int max_think_len = max_think_lens[bid];
   if (max_think_len < 0) return;
   int current_limit_think_status = limit_think_status[bid];
-  // 如果在回复阶段, 且已经触发停止标志, 则直接返回, 无需多余执行.
-  if (current_limit_think_status == 3 && stop_flags[bid]) {
+  // 如果在回复阶段, 或者已经触发停止标志, 则直接返回, 无需多余执行.
+  if (current_limit_think_status == 3 || stop_flags[bid]) {
     return;
   }