mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-09-26 20:41:53 +08:00
【Hackathon 9th No.24】add rebuild_padding (#4107)
This commit is contained in:
200
tests/operators/test_rebuild_padding.py
Normal file
200
tests/operators/test_rebuild_padding.py
Normal file
@@ -0,0 +1,200 @@
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
|
||||
from fastdeploy.model_executor.ops.gpu import rebuild_padding
|
||||
|
||||
|
||||
def RebuildPaddingKernel(
|
||||
out,
|
||||
tmp_out,
|
||||
cu_seqlens_q,
|
||||
seq_len_this_time,
|
||||
seq_lens_decoder,
|
||||
seq_lens_encoder,
|
||||
bsz,
|
||||
):
|
||||
for bi in range(bsz):
|
||||
seq_id = 0
|
||||
if seq_len_this_time[bi] == 0:
|
||||
continue
|
||||
if seq_lens_decoder[bi] == 0 and seq_lens_encoder[bi] == 0:
|
||||
continue
|
||||
if seq_lens_encoder[bi] > 0:
|
||||
seq_id = seq_lens_encoder[bi] - 1
|
||||
out[bi] = tmp_out[cu_seqlens_q[bi] + seq_id][:]
|
||||
|
||||
|
||||
def RebuildAppendPaddingKernel(
|
||||
out,
|
||||
tmp_out,
|
||||
cu_seqlens_q,
|
||||
seq_len_this_time,
|
||||
seq_lens_decoder,
|
||||
seq_lens_encoder,
|
||||
output_padding_offset,
|
||||
max_input_length,
|
||||
token_num,
|
||||
need_delete_token_num,
|
||||
):
|
||||
for token_id in range(token_num - need_delete_token_num):
|
||||
bi = int(token_id / max_input_length)
|
||||
if seq_len_this_time[bi] == 0 or (seq_lens_decoder[bi] == 0 and seq_lens_encoder[bi] == 0):
|
||||
continue
|
||||
ori_token_id = token_id + output_padding_offset[token_id]
|
||||
seq_id = 0
|
||||
if seq_lens_encoder[bi] > 0:
|
||||
seq_id = seq_lens_encoder[bi] - 1
|
||||
cum_offset_bi = bi * max_input_length - cu_seqlens_q[bi]
|
||||
input_token_id = ori_token_id - cum_offset_bi + seq_id
|
||||
out[token_id] = tmp_out[input_token_id][:]
|
||||
|
||||
|
||||
def rebuild_padding_ref(
|
||||
tmp_out, # [token_num, dim_embed]
|
||||
cu_seqlens_q, # [bsz+1, 1]
|
||||
seq_len_this_time,
|
||||
seq_lens_decoder,
|
||||
seq_lens_encoder,
|
||||
output_padding_offset,
|
||||
max_input_length,
|
||||
):
|
||||
|
||||
tmp_out_shape = tmp_out.shape
|
||||
token_num = tmp_out_shape[0]
|
||||
dim_embed = tmp_out_shape[1]
|
||||
bsz = cu_seqlens_q.shape[0] - 1
|
||||
|
||||
out = np.zeros([bsz, dim_embed])
|
||||
if output_padding_offset is not None:
|
||||
need_delete_token_num = 0
|
||||
for i in range(bsz):
|
||||
if seq_lens_encoder[i] > 0:
|
||||
need_delete_token_num += seq_lens_encoder[i] - 1
|
||||
out = np.zeros([token_num - need_delete_token_num, dim_embed])
|
||||
else:
|
||||
out = np.zeros([bsz, dim_embed])
|
||||
|
||||
if output_padding_offset is not None:
|
||||
RebuildAppendPaddingKernel(
|
||||
out,
|
||||
tmp_out,
|
||||
cu_seqlens_q,
|
||||
seq_len_this_time,
|
||||
seq_lens_decoder,
|
||||
seq_lens_encoder,
|
||||
output_padding_offset,
|
||||
max_input_length,
|
||||
token_num,
|
||||
need_delete_token_num,
|
||||
)
|
||||
else:
|
||||
RebuildPaddingKernel(
|
||||
out,
|
||||
tmp_out,
|
||||
cu_seqlens_q,
|
||||
seq_len_this_time,
|
||||
seq_lens_decoder,
|
||||
seq_lens_encoder,
|
||||
bsz,
|
||||
)
|
||||
return out
|
||||
|
||||
|
||||
class TestRebuildPadding(unittest.TestCase):
|
||||
# test no offset
|
||||
def test_rebuild_padding_no_offset(self):
|
||||
token_num = 100
|
||||
dim_embed = 256
|
||||
# bsz = 4
|
||||
max_input_length = 512
|
||||
# tmp_out: [token_num, dim_embed]
|
||||
tmp_out = np.random.randn(token_num, dim_embed).astype(np.float32)
|
||||
# cu_seqlens_q: [bsz + 1],accumulate the number of tokens for each batch.
|
||||
cu_seqlens_q = np.array(
|
||||
[0, 1, 21, 22, 42, 43, 63, 64, 84], dtype=np.int32
|
||||
) # Assume there are 4 batches, and the total token_num = 100.
|
||||
|
||||
# Simulated sequence length information
|
||||
seq_len_this_time = np.array([1, 20, 1, 20, 1, 20, 1, 20], dtype=np.int32)
|
||||
seq_lens_encoder = np.array([0, 20, 0, 20, 0, 20, 0, 20], dtype=np.int32)
|
||||
seq_lens_decoder = np.array([21, 0, 21, 0, 21, 0, 21, 0], dtype=np.int32)
|
||||
out_no_offset_ref = rebuild_padding_ref(
|
||||
tmp_out=tmp_out,
|
||||
cu_seqlens_q=cu_seqlens_q,
|
||||
seq_len_this_time=seq_len_this_time,
|
||||
seq_lens_decoder=seq_lens_decoder,
|
||||
seq_lens_encoder=seq_lens_encoder,
|
||||
output_padding_offset=None,
|
||||
max_input_length=max_input_length,
|
||||
)
|
||||
|
||||
tmp_out = paddle.to_tensor(tmp_out)
|
||||
cu_seqlens_q = paddle.to_tensor(cu_seqlens_q)
|
||||
seq_len_this_time = paddle.to_tensor(seq_len_this_time)
|
||||
seq_lens_decoder = paddle.to_tensor(seq_lens_decoder)
|
||||
seq_lens_encoder = paddle.to_tensor(seq_lens_encoder)
|
||||
|
||||
out_no_offset = rebuild_padding(
|
||||
tmp_out,
|
||||
cu_seqlens_q,
|
||||
seq_len_this_time,
|
||||
seq_lens_decoder,
|
||||
seq_lens_encoder,
|
||||
None,
|
||||
max_input_length,
|
||||
)
|
||||
np.testing.assert_allclose(out_no_offset.numpy(), out_no_offset_ref)
|
||||
|
||||
# test with offset
|
||||
def test_rebuild_padding_with_offset(self):
|
||||
paddle.seed(42)
|
||||
token_num = 100
|
||||
dim_embed = 256
|
||||
# bsz = 4
|
||||
max_input_length = 512
|
||||
# tmp_out: [token_num, dim_embed]
|
||||
tmp_out = np.random.randn(token_num, dim_embed).astype(np.float32)
|
||||
# cu_seqlens_q: [bsz + 1],accumulate the number of tokens for each batch.
|
||||
cu_seqlens_q = np.array(
|
||||
[0, 1, 21, 22, 42, 43, 63, 64, 84], dtype=np.int32
|
||||
) # Assume there are 4 batches, and the total token_num = 100.
|
||||
|
||||
# Simulated sequence length information
|
||||
seq_len_this_time = np.array([1, 20, 1, 20, 1, 20, 1, 20], dtype=np.int32)
|
||||
seq_lens_encoder = np.array([0, 20, 0, 20, 0, 20, 0, 20], dtype=np.int32)
|
||||
seq_lens_decoder = np.array([21, 0, 21, 0, 21, 0, 21, 0], dtype=np.int32)
|
||||
|
||||
num_output_tokens = 80
|
||||
output_padding_offset = np.random.randint(0, 10, [num_output_tokens], dtype=np.int32)
|
||||
out_with_offset_ref = rebuild_padding_ref(
|
||||
tmp_out=tmp_out,
|
||||
cu_seqlens_q=cu_seqlens_q,
|
||||
seq_len_this_time=seq_len_this_time,
|
||||
seq_lens_decoder=seq_lens_decoder,
|
||||
seq_lens_encoder=seq_lens_encoder,
|
||||
output_padding_offset=output_padding_offset,
|
||||
max_input_length=max_input_length,
|
||||
)
|
||||
|
||||
tmp_out = paddle.to_tensor(tmp_out)
|
||||
cu_seqlens_q = paddle.to_tensor(cu_seqlens_q)
|
||||
seq_len_this_time = paddle.to_tensor(seq_len_this_time)
|
||||
seq_lens_decoder = paddle.to_tensor(seq_lens_decoder)
|
||||
seq_lens_encoder = paddle.to_tensor(seq_lens_encoder)
|
||||
output_padding_offset = paddle.to_tensor(output_padding_offset)
|
||||
out_with_offset = rebuild_padding(
|
||||
tmp_out,
|
||||
cu_seqlens_q,
|
||||
seq_len_this_time,
|
||||
seq_lens_decoder,
|
||||
seq_lens_encoder,
|
||||
output_padding_offset,
|
||||
max_input_length,
|
||||
)
|
||||
np.testing.assert_allclose(out_with_offset.numpy(), out_with_offset_ref)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
Reference in New Issue
Block a user