Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
co63oc
2025-08-28 14:42:24 +08:00
committed by GitHub
parent c294fc8139
commit d4fc893fe3
3 changed files with 11 additions and 11 deletions

View File

@@ -67,8 +67,8 @@ class Test(unittest.TestCase):
baseline = paddle.add(baseline, bias)
out_type = a.dtype
c = cutlass_scaled_mm(a_q, b_q, a_scales, b_scales, out_type, bias)
euqal = np.allclose(baseline.numpy(), c.numpy(), rtol=1e-2, atol=1e-2)
print(euqal) #
equal = np.allclose(baseline.numpy(), c.numpy(), rtol=1e-2, atol=1e-2)
print(equal) #
def test_cutlass_scaled_mm_int8(self):
"""
@@ -90,8 +90,8 @@ class Test(unittest.TestCase):
baseline = paddle.add(baseline, bias)
out_type = a.dtype
c = cutlass_scaled_mm(a_q, b_q, a_scales, b_scales, out_type, bias)
euqal = np.allclose(baseline.numpy(), c.numpy(), rtol=1e-2, atol=1e-2)
print(euqal) #
equal = np.allclose(baseline.numpy(), c.numpy(), rtol=1e-2, atol=1e-2)
print(equal) #
if __name__ == "__main__":

View File

@@ -30,10 +30,10 @@ class TestW4AFP8GEMM(unittest.TestCase):
self.TokenPadding = 0
tokens = [self.tokens_per_group] * self.BATCH
self.tokens_perfix_sum = np.cumsum(tokens)
self.tokens_prefix_sum = np.cumsum(tokens)
self.tokens = paddle.to_tensor(tokens, dtype="int64")
self.tokens_perfix_sum = paddle.to_tensor(self.tokens_perfix_sum, dtype="int64")
self.tokens_prefix_sum = paddle.to_tensor(self.tokens_prefix_sum, dtype="int64")
self.all_tokens = int(self.tokens.sum())
self.input_fp8 = paddle.randn([self.all_tokens, self.K], dtype="bfloat16").astype(paddle.float8_e4m3fn)
@@ -81,7 +81,7 @@ class TestW4AFP8GEMM(unittest.TestCase):
out_cuda = w4afp8_gemm(
self.input_fp8,
weight_int4.cuda(),
self.tokens_perfix_sum,
self.tokens_prefix_sum,
self.input_row_sum.astype("float32"),
weight_dequant_scale.astype("float32"),
int(self.TokenPadding),

View File

@@ -109,11 +109,11 @@ class TestWFp8Afp8SparseGemm(unittest.TestCase):
TokenPadding = 0
tokens = [tokens_per_group] * BATCH
tokens_perfix_sum = np.cumsum(tokens)
tokens_perfix_sum = np.insert(tokens_perfix_sum, 0, 0)
tokens_prefix_sum = np.cumsum(tokens)
tokens_prefix_sum = np.insert(tokens_prefix_sum, 0, 0)
tokens = paddle.to_tensor(tokens, dtype="int32")
tokens_perfix_sum = paddle.to_tensor(tokens_perfix_sum, dtype="int32")
tokens_prefix_sum = paddle.to_tensor(tokens_prefix_sum, dtype="int32")
all_tokens = int(tokens.sum())
@@ -148,7 +148,7 @@ class TestWFp8Afp8SparseGemm(unittest.TestCase):
input_fp8,
convert_sparse_idx,
pack_weight.reshape([BATCH, N, K // 2]),
tokens_perfix_sum if TokenPadding == 0 else tokens,
tokens_prefix_sum if TokenPadding == 0 else tokens,
1 / weight_scale,
out_pd,
int(TokenPadding),