del batch id per token (#3963)

* Update decoder_write_cache_with_rope_kernel.cu del batch_id_per_token * Update decoder_write_cache_with_rope_impl.cuh * Update test_append_attention.py * Update test_append_attention.py
2025-12-24 13:28:13 +08:00 · 2025-09-08 21:58:34 +08:00
parent 08b3153661
commit f12159b630
3 changed files with 7 additions and 10 deletions
--- a/custom_ops/gpu_ops/append_attn/decoder_write_cache_with_rope_impl.cuh
+++ b/custom_ops/gpu_ops/append_attn/decoder_write_cache_with_rope_impl.cuh
@@ -684,7 +684,6 @@ __global__ void append_decode_cache_int8_rope_qk_norm_kernel(
                                        // block_size, head_size // 2]
    T* __restrict__ qkv_out,
    const int* __restrict__ block_tables,     // [bsz, max_blocks_per_seq]
-    const int* __restrict__ batch_id_per_token,  // [num_tokens]
    const int* __restrict__ cu_seqlens_q,
    const int* __restrict__ seq_lens,          // [bsz]
    const int* __restrict__ seq_lens_encoder,  // [bsz]
--- a/custom_ops/gpu_ops/append_attn/decoder_write_cache_with_rope_kernel.cu
+++ b/custom_ops/gpu_ops/append_attn/decoder_write_cache_with_rope_kernel.cu
@@ -565,7 +565,6 @@ void DecoderWriteCacheWithRoPEKernel(
              value_cache_out->data<uint8_t>(),
              reinterpret_cast<DataType_*>(qkv_out->data<T>()),
              block_tables.data<int>(),
-              batch_id_per_token.data<int>(),
              cu_seqlens_q.data<int>(),
              seq_lens.data<int>(),
              seq_lens_encoder.data<int>(),
@@ -729,7 +728,6 @@ void DecoderWriteCacheWithRoPEKernel(
              value_cache_out->data<uint8_t>(),
              reinterpret_cast<DataType_*>(qkv_out->data<T>()),
              block_tables.data<int>(),
-              batch_id_per_token.data<int>(),
              cu_seqlens_q.data<int>(),
              seq_lens.data<int>(),
              seq_lens_encoder.data<int>(),