[BugFix] Fix limit_thinking early return logic in CUDA kernels (#5471)

* Initial plan

* [BugFix] Fix limit_thinking bug - change AND to OR in condition checks

Co-authored-by: yuanlehome <23653004+yuanlehome@users.noreply.github.com>

* Update Chinese comments to reflect OR logic instead of AND

Co-authored-by: yuanlehome <23653004+yuanlehome@users.noreply.github.com>

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: yuanlehome <23653004+yuanlehome@users.noreply.github.com>
This commit is contained in:
Copilot
2025-12-10 11:03:19 +08:00
committed by GitHub
parent 53460935ec
commit e38709b499
4 changed files with 8 additions and 8 deletions

View File

@@ -32,8 +32,8 @@ __global__ void limit_thinking_content_length_kernel_v1(
const int max_think_len = max_think_lens[bid];
if (max_think_len < 0) return;
int current_limit_think_status = limit_think_status[bid];
// 如果在回复阶段, 已经触发停止标志, 则直接返回, 无需多余执行
if (current_limit_think_status == 2 && stop_flags[bid]) {
// 如果在回复阶段, 或者已经触发停止标志, 则直接返回, 无需多余执行
if (current_limit_think_status == 2 || stop_flags[bid]) {
return;
}

View File

@@ -34,8 +34,8 @@ __global__ void limit_thinking_content_length_kernel_v2(
const int max_think_len = max_think_lens[bid];
if (max_think_len < 0) return;
int current_limit_think_status = limit_think_status[bid];
// 如果在回复阶段, 已经触发停止标志, 则直接返回, 无需多余执行
if (current_limit_think_status == 3 && stop_flags[bid]) {
// 如果在回复阶段, 或者已经触发停止标志, 则直接返回, 无需多余执行
if (current_limit_think_status == 3 || stop_flags[bid]) {
return;
}

View File

@@ -38,8 +38,8 @@ __global__ void speculate_limit_thinking_content_length_kernel_v1(
const int max_think_len = max_think_lens[bid];
if (max_think_len < 0) return;
int current_limit_think_status = limit_think_status[bid];
// 如果在回复阶段, 已经触发停止标志, 则直接返回, 无需多余执行
if (current_limit_think_status == 2 && stop_flags[bid]) {
// 如果在回复阶段, 或者已经触发停止标志, 则直接返回, 无需多余执行
if (current_limit_think_status == 2 || stop_flags[bid]) {
return;
}

View File

@@ -41,8 +41,8 @@ __global__ void speculate_limit_thinking_content_length_kernel_v2(
const int max_think_len = max_think_lens[bid];
if (max_think_len < 0) return;
int current_limit_think_status = limit_think_status[bid];
// 如果在回复阶段, 已经触发停止标志, 则直接返回, 无需多余执行.
if (current_limit_think_status == 3 && stop_flags[bid]) {
// 如果在回复阶段, 或者已经触发停止标志, 则直接返回, 无需多余执行.
if (current_limit_think_status == 3 || stop_flags[bid]) {
return;
}