Files
FastDeploy/tests/operators/test_moe_top_k_select.py
YUNSHEN XIE 3a6058e445 Add stable ci (#3460)
* add stable ci

* fix

* update

* fix

* rename tests dir;fix stable ci bug

* add timeout limit

* update
2025-08-20 08:57:17 +08:00

89 lines
3.2 KiB
Python

# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
import paddle
from fastdeploy.model_executor.ops.gpu import moe_topk_select
class Test(unittest.TestCase):
def setUp(self):
"""
Initialize.
"""
paddle.seed(2024)
print(paddle.device.cuda.get_device_properties())
print(paddle.__git_commit__)
self.batch_size = 1500
self.num_experts = 128
self.top_k = 8
def moe_topk_select_ref(self, gate_out: paddle.Tensor, bias: paddle.Tensor, top_k: int, apply_norm_weight: bool):
gate_out_after_softmax = paddle.nn.functional.softmax(gate_out, axis=-1)
topk_weights_ref, topk_ids_ref = paddle.topk(gate_out_after_softmax, k=top_k, axis=-1)
if bias is not None:
gate_out_after_softmax_bias = gate_out_after_softmax + bias
_, topk_ids_ref = paddle.topk(gate_out_after_softmax_bias, k=top_k, axis=-1)
batch_indices = paddle.arange(gate_out.shape[0]).unsqueeze(-1).expand_as(topk_ids_ref)
topk_weights_ref = gate_out_after_softmax.gather_nd(paddle.stack([batch_indices, topk_ids_ref], axis=-1))
if apply_norm_weight:
topk_weights_ref = topk_weights_ref / topk_weights_ref.sum(axis=-1, keepdim=True)
return topk_ids_ref, topk_weights_ref
def test_moe_topk_select(self):
"""
Check moe_topk_select.
"""
gate_out = paddle.rand([self.batch_size, self.num_experts], dtype="float32")
gate_correction_bias = paddle.rand([1, self.num_experts], dtype="float32")
gate_correction_bias = gate_correction_bias / 10.0
for apply_norm_weight in [True, False]:
for bias in [None, gate_correction_bias]:
topk_ids_ref, topk_weights_ref = self.moe_topk_select_ref(
gate_out, bias, self.top_k, apply_norm_weight
)
for fused in [True, False]:
topk_ids, topk_weights = moe_topk_select(
gate_out,
bias,
self.top_k,
apply_norm_weight,
fused,
)
np.testing.assert_allclose(
topk_ids_ref,
topk_ids,
rtol=1e-05,
atol=1e-05,
)
np.testing.assert_allclose(
topk_weights_ref,
topk_weights,
rtol=1e-05,
atol=1e-05,
)
if __name__ == "__main__":
unittest.main()