[BugFix] fix thinking bug (#4710)

* fix thinking bug * fix ut * update * fix
2025-12-24 13:28:13 +08:00 · 2025-10-31 22:00:31 +08:00
parent 10358bf1a0
commit b301bd6c31
8 changed files with 458 additions and 290 deletions
--- a/custom_ops/gpu_ops/cpp_extensions.cc
+++ b/custom_ops/gpu_ops/cpp_extensions.cc
@@ -987,12 +987,15 @@ void LimitThinkingContentLengthV1(const paddle::Tensor& next_tokens,
                                  const paddle::Tensor& max_think_lens,
                                  const paddle::Tensor& step_idx,
                                  const paddle::Tensor& limit_think_status,
+                                  const paddle::Tensor& stop_flags,
+                                  const paddle::Tensor& eos_token_ids,
                                  const int64_t think_end_id);

 void LimitThinkingContentLengthV2(const paddle::Tensor& next_tokens,
                                  const paddle::Tensor& max_think_lens,
                                  const paddle::Tensor& step_idx,
                                  const paddle::Tensor& limit_think_status,
+                                  const paddle::Tensor& stop_flags,
                                  const int64_t think_end_id,
                                  const int64_t line_break_id);

@@ -1003,6 +1006,8 @@ void SpeculateLimitThinkingContentLengthV1(
    const paddle::Tensor& limit_think_status,
    const paddle::Tensor& accept_num,
    const paddle::Tensor& seq_lens_decoder,
+    const paddle::Tensor& stop_flags,
+    const paddle::Tensor& eos_token_ids,
    const int64_t think_end_id);

 void SpeculateLimitThinkingContentLengthV2(
@@ -1012,6 +1017,7 @@ void SpeculateLimitThinkingContentLengthV2(
    const paddle::Tensor& limit_think_status,
    const paddle::Tensor& accept_num,
    const paddle::Tensor& seq_lens_decoder,
+    const paddle::Tensor& stop_flags,
    const int64_t think_end_id,
    const int64_t line_break_id);