diff --git a/test/layers/test_append_attention.py b/test/layers/test_append_attention.py index 6a7832575..764191a7b 100644 --- a/test/layers/test_append_attention.py +++ b/test/layers/test_append_attention.py @@ -352,6 +352,11 @@ class TestAppendGroupQueryAttnWithRope(unittest.TestCase): self.max_dec_len_this_time = paddle.to_tensor([self.max_dec_len_this_time], "int32", place=paddle.CPUPlace()) self.seq_lens_this_time = self.seq_lens_encoder + self.decoder_batch_ids = paddle.full([self.batch_size], 0, dtype="int32") + self.decoder_tile_ids_per_batch = paddle.full([self.batch_size], 0, dtype="int32") + self.decoder_num_blocks_cpu = paddle.full([1], 0, dtype="int32").pin_memory() + self.max_len_tensor_cpu = paddle.full([8], 0, dtype="int32").cpu() + self.cache_shape = ( self.max_block_num, self.kv_num_head, @@ -414,16 +419,15 @@ class TestAppendGroupQueryAttnWithRope(unittest.TestCase): kv_batch_ids, kv_tile_ids_per_batch, kv_num_blocks, - decoder_batch_ids, - decoder_tile_ids_per_batch, - decoder_num_blocks, max_len_kv, - set_max_lengths, ) = get_block_shape_and_split_kv_block( self.seq_lens_encoder, self.seq_lens_decoder, self.seq_lens_this_time, - self.cum_offset, + self.decoder_batch_ids, + self.decoder_tile_ids_per_batch, + self.decoder_num_blocks_cpu, + self.max_len_tensor_cpu, 64, 12, (self.q_num_head + 2 * self.kv_num_head) // self.kv_num_head, @@ -454,10 +458,10 @@ class TestAppendGroupQueryAttnWithRope(unittest.TestCase): kv_batch_ids, kv_tile_ids_per_batch, kv_num_blocks, - decoder_batch_ids, - decoder_tile_ids_per_batch, - decoder_num_blocks, - set_max_lengths, + self.decoder_batch_ids, + self.decoder_tile_ids_per_batch, + self.decoder_num_blocks_cpu, + self.max_len_tensor_cpu, max_len_kv, self.rope_emb, # rope_emb None, # attn_mask