diff --git a/custom_ops/gpu_ops/limit_thinking_content_length_v1.cu b/custom_ops/gpu_ops/limit_thinking_content_length_v1.cu index 45bf8f704..89e1e1c66 100644 --- a/custom_ops/gpu_ops/limit_thinking_content_length_v1.cu +++ b/custom_ops/gpu_ops/limit_thinking_content_length_v1.cu @@ -32,8 +32,8 @@ __global__ void limit_thinking_content_length_kernel_v1( const int max_think_len = max_think_lens[bid]; if (max_think_len < 0) return; int current_limit_think_status = limit_think_status[bid]; - // 如果在回复阶段, 且已经触发停止标志, 则直接返回, 无需多余执行 - if (current_limit_think_status == 2 && stop_flags[bid]) { + // 如果在回复阶段, 或者已经触发停止标志, 则直接返回, 无需多余执行 + if (current_limit_think_status == 2 || stop_flags[bid]) { return; } diff --git a/custom_ops/gpu_ops/limit_thinking_content_length_v2.cu b/custom_ops/gpu_ops/limit_thinking_content_length_v2.cu index ea5f8c9c4..8ff42058f 100644 --- a/custom_ops/gpu_ops/limit_thinking_content_length_v2.cu +++ b/custom_ops/gpu_ops/limit_thinking_content_length_v2.cu @@ -34,8 +34,8 @@ __global__ void limit_thinking_content_length_kernel_v2( const int max_think_len = max_think_lens[bid]; if (max_think_len < 0) return; int current_limit_think_status = limit_think_status[bid]; - // 如果在回复阶段, 且已经触发停止标志, 则直接返回, 无需多余执行 - if (current_limit_think_status == 3 && stop_flags[bid]) { + // 如果在回复阶段, 或者已经触发停止标志, 则直接返回, 无需多余执行 + if (current_limit_think_status == 3 || stop_flags[bid]) { return; } diff --git a/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v1.cu b/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v1.cu index 0a703639c..a18f3b2bc 100644 --- a/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v1.cu +++ b/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v1.cu @@ -38,8 +38,8 @@ __global__ void speculate_limit_thinking_content_length_kernel_v1( const int max_think_len = max_think_lens[bid]; if (max_think_len < 0) return; int current_limit_think_status = limit_think_status[bid]; - // 如果在回复阶段, 且已经触发停止标志, 则直接返回, 无需多余执行 - if (current_limit_think_status == 2 && stop_flags[bid]) { + // 如果在回复阶段, 或者已经触发停止标志, 则直接返回, 无需多余执行 + if (current_limit_think_status == 2 || stop_flags[bid]) { return; } diff --git a/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v2.cu b/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v2.cu index 709911d2b..270ab07e1 100644 --- a/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v2.cu +++ b/custom_ops/gpu_ops/speculate_decoding/speculate_limit_thinking_content_length_v2.cu @@ -41,8 +41,8 @@ __global__ void speculate_limit_thinking_content_length_kernel_v2( const int max_think_len = max_think_lens[bid]; if (max_think_len < 0) return; int current_limit_think_status = limit_think_status[bid]; - // 如果在回复阶段, 且已经触发停止标志, 则直接返回, 无需多余执行. - if (current_limit_think_status == 3 && stop_flags[bid]) { + // 如果在回复阶段, 或者已经触发停止标志, 则直接返回, 无需多余执行. + if (current_limit_think_status == 3 || stop_flags[bid]) { return; }