From aaa2de1afaa609869f73c9b208a142006ed52ab5 Mon Sep 17 00:00:00 2001 From: co63oc Date: Wed, 3 Sep 2025 22:21:02 +0800 Subject: [PATCH] [UnitTest][MTP]add test_speculate_get_padding_offset (#3730) --- .../test_speculate_get_padding_offset.py | 143 ++++++++++++++++++ .../test_speculate_get_seq_lens_output.py | 60 ++++++++ 2 files changed, 203 insertions(+) create mode 100644 tests/operators/test_speculate_get_padding_offset.py create mode 100644 tests/operators/test_speculate_get_seq_lens_output.py diff --git a/tests/operators/test_speculate_get_padding_offset.py b/tests/operators/test_speculate_get_padding_offset.py new file mode 100644 index 000000000..a9e0b3031 --- /dev/null +++ b/tests/operators/test_speculate_get_padding_offset.py @@ -0,0 +1,143 @@ +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +import paddle + +from fastdeploy.model_executor.ops.gpu import speculate_get_padding_offset + + +def ref_speculate_get_padding_offset(cum_offsets, seq_lens, max_seq_len, token_num_data): + bsz = seq_lens.shape[0] + + padding_offset = np.zeros([token_num_data], dtype=np.int32) + batch_id_per_token = np.zeros([token_num_data], dtype=np.int32) + cum_offsets_out = np.zeros([bsz], dtype=np.int32) + cu_seqlens_q = np.zeros([bsz + 1], dtype=np.int32) + cu_seqlens_k = np.zeros([bsz + 1], dtype=np.int32) + + modified_indices = { + "padding_offset": [], + "cum_offsets_out": [], + "cu_seqlens_q": [], + "cu_seqlens_k": [], + } + + cu_seqlens_q[0] = 0 + cu_seqlens_k[0] = 0 + modified_indices["cu_seqlens_q"].append(0) + modified_indices["cu_seqlens_k"].append(0) + + for bi in range(bsz): + cum_offset = 0 if bi == 0 else cum_offsets[bi - 1] + cum_offsets_out[bi] = cum_offset + modified_indices["cum_offsets_out"].append(bi) + + for i in range(seq_lens[bi]): + idx = bi * max_seq_len - cum_offset + i + if idx >= 0 and idx < token_num_data: + if idx == 0: + print(idx, bi, cum_offset) + padding_offset[idx] = cum_offset + batch_id_per_token[idx] = bi + modified_indices["padding_offset"].append(idx) + + cum_seq_len = (bi + 1) * max_seq_len - cum_offsets[bi] + cu_seqlens_q[bi + 1] = cum_seq_len + cu_seqlens_k[bi + 1] = cum_seq_len + modified_indices["cu_seqlens_q"].append(bi + 1) + modified_indices["cu_seqlens_k"].append(bi + 1) + + return ( + padding_offset, + cum_offsets_out, + cu_seqlens_q, + cu_seqlens_k, + modified_indices, + batch_id_per_token, + ) + + +class TestSpeculateGetPaddingOffset(unittest.TestCase): + def test_speculate_get_padding_offset(self): + test_case = { + "bsz": 4, + "max_seq_len": 10, + "token_num_data": 32, + "cum_offsets": np.array([2, 5, 8, 12], dtype=np.int32), + "seq_lens": np.array([8, 5, 7, 6], dtype=np.int32), + "seq_lens_encoder": np.array([1, 0, 1, 0], dtype=np.int32), + } + + max_draft_tokens = 4 + + input_ids = np.random.randint(0, 1000, (test_case["bsz"], test_case["max_seq_len"]), dtype=np.int64) + draft_tokens = np.random.randint(0, 1000, (test_case["bsz"], max_draft_tokens), dtype=np.int64) + token_num = np.array([test_case["token_num_data"]], dtype=np.int64) + + input_ids_tensor = paddle.to_tensor(input_ids) + draft_tokens_tensor = paddle.to_tensor(draft_tokens) + cum_offsets_tensor = paddle.to_tensor(test_case["cum_offsets"]) + seq_lens_tensor = paddle.to_tensor(test_case["seq_lens"]) + seq_lens_encoder_tensor = paddle.to_tensor(test_case["seq_lens_encoder"]) + token_num_tensor = paddle.to_tensor(token_num) + + ( + x_remove_padding, + batch_id_per_token, + cu_seqlens_q, + cu_seqlens_k, + ) = speculate_get_padding_offset( + input_ids_tensor, + draft_tokens_tensor, + cum_offsets_tensor, + token_num_tensor, + seq_lens_tensor, + seq_lens_encoder_tensor, + ) + + ( + ref_padding_offset, + ref_cum_offsets_out, + ref_cu_seqlens_q, + ref_cu_seqlens_k, + modified_indices, + ref_batch_id_per_token, + ) = ref_speculate_get_padding_offset( + test_case["cum_offsets"], + test_case["seq_lens"], + test_case["max_seq_len"], + test_case["token_num_data"], + ) + + output_arrays = { + "batch_id_per_token": batch_id_per_token.numpy(), + "cu_seqlens_q": cu_seqlens_q.numpy(), + "cu_seqlens_k": cu_seqlens_k.numpy(), + } + + ref_arrays = { + "batch_id_per_token": ref_batch_id_per_token, + "cu_seqlens_q": ref_cu_seqlens_q, + "cu_seqlens_k": ref_cu_seqlens_k, + } + + for key in output_arrays: + np.testing.assert_allclose(output_arrays[key], ref_arrays[key]) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/operators/test_speculate_get_seq_lens_output.py b/tests/operators/test_speculate_get_seq_lens_output.py new file mode 100644 index 000000000..86db4e8fc --- /dev/null +++ b/tests/operators/test_speculate_get_seq_lens_output.py @@ -0,0 +1,60 @@ +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +import paddle + +from fastdeploy.model_executor.ops.gpu import speculate_get_seq_lens_output + + +class TestSpeculateGetSeqLensOutput(unittest.TestCase): + + def run_seq_lens(self, input_values): + paddle.seed(42) + np.random.seed(42) + seq_lens_this_time = paddle.to_tensor(input_values[0], dtype="int32") + seq_lens_encoder = paddle.to_tensor(input_values[1], dtype="int32") + seq_lens_decoder = paddle.to_tensor(input_values[2], dtype="int32") + seq_lens_output = speculate_get_seq_lens_output(seq_lens_this_time, seq_lens_encoder, seq_lens_decoder)[0] + return seq_lens_output + + def test_speculate_get_seq_lens_output1(self): + input_values = [[7], [0], [0]] + output_value = 7 + result = self.run_seq_lens(input_values) + np.testing.assert_allclose(result.numpy(), output_value) + + def test_speculate_get_seq_lens_output2(self): + input_values = [[7], [1], [0]] + output_value = 1 + result = self.run_seq_lens(input_values) + np.testing.assert_allclose(result.numpy(), output_value) + + def test_speculate_get_seq_lens_output3(self): + input_values = [[1], [1], [0]] + output_value = 1 + result = self.run_seq_lens(input_values) + np.testing.assert_allclose(result.numpy(), output_value) + + def test_speculate_get_seq_lens_output4(self): + input_values = [[0], [1], [0]] + output_value = 0 + result = self.run_seq_lens(input_values) + np.testing.assert_allclose(result.numpy(), output_value) + + +if __name__ == "__main__": + unittest.main()