mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-10 11:00:19 +08:00
[UnitTest][MTP]add test_speculate_get_output_padding_offset (#3740)
This commit is contained in:
53
tests/operators/test_speculate_get_output_padding_offset.py
Normal file
53
tests/operators/test_speculate_get_output_padding_offset.py
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import paddle
|
||||||
|
|
||||||
|
from fastdeploy.model_executor.ops.gpu import speculate_get_output_padding_offset
|
||||||
|
|
||||||
|
|
||||||
|
class TestSpeculateGetOutputPaddingOffset(unittest.TestCase):
|
||||||
|
def test_speculate_get_output_padding_offset(self):
|
||||||
|
bsz = 256
|
||||||
|
max_seq_len = 8192
|
||||||
|
|
||||||
|
seq_lens_output = np.random.randint(0, 4, size=bsz)
|
||||||
|
output_token_num = np.sum(seq_lens_output)
|
||||||
|
|
||||||
|
seq_lens_output = paddle.to_tensor(seq_lens_output, dtype="int32")
|
||||||
|
out_token_num = paddle.sum(seq_lens_output).astype("int32")
|
||||||
|
output_cum_offsets_tmp = paddle.cumsum(max_seq_len - seq_lens_output).astype("int32")
|
||||||
|
|
||||||
|
output_padding_offset_gpu, output_cum_offsets_gpu = speculate_get_output_padding_offset(
|
||||||
|
output_cum_offsets_tmp, out_token_num, seq_lens_output, max_seq_len
|
||||||
|
)
|
||||||
|
|
||||||
|
output_padding_offset_ref = [-1] * output_token_num
|
||||||
|
output_cum_offsets_ref = [-1] * bsz
|
||||||
|
|
||||||
|
for bi in range(bsz):
|
||||||
|
cum_offset = 0 if bi == 0 else output_cum_offsets_tmp[bi - 1]
|
||||||
|
output_cum_offsets_ref[bi] = cum_offset
|
||||||
|
for token_i in range(seq_lens_output[bi]):
|
||||||
|
output_padding_offset_ref[bi * max_seq_len - cum_offset + token_i] = cum_offset
|
||||||
|
|
||||||
|
np.testing.assert_allclose(output_padding_offset_gpu, output_padding_offset_ref)
|
||||||
|
np.testing.assert_allclose(output_cum_offsets_gpu, output_cum_offsets_ref)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
Reference in New Issue
Block a user