mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
fix typos (#3633)
Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
@@ -67,8 +67,8 @@ class Test(unittest.TestCase):
|
||||
baseline = paddle.add(baseline, bias)
|
||||
out_type = a.dtype
|
||||
c = cutlass_scaled_mm(a_q, b_q, a_scales, b_scales, out_type, bias)
|
||||
euqal = np.allclose(baseline.numpy(), c.numpy(), rtol=1e-2, atol=1e-2)
|
||||
print(euqal) #
|
||||
equal = np.allclose(baseline.numpy(), c.numpy(), rtol=1e-2, atol=1e-2)
|
||||
print(equal) #
|
||||
|
||||
def test_cutlass_scaled_mm_int8(self):
|
||||
"""
|
||||
@@ -90,8 +90,8 @@ class Test(unittest.TestCase):
|
||||
baseline = paddle.add(baseline, bias)
|
||||
out_type = a.dtype
|
||||
c = cutlass_scaled_mm(a_q, b_q, a_scales, b_scales, out_type, bias)
|
||||
euqal = np.allclose(baseline.numpy(), c.numpy(), rtol=1e-2, atol=1e-2)
|
||||
print(euqal) #
|
||||
equal = np.allclose(baseline.numpy(), c.numpy(), rtol=1e-2, atol=1e-2)
|
||||
print(equal) #
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -30,10 +30,10 @@ class TestW4AFP8GEMM(unittest.TestCase):
|
||||
self.TokenPadding = 0
|
||||
|
||||
tokens = [self.tokens_per_group] * self.BATCH
|
||||
self.tokens_perfix_sum = np.cumsum(tokens)
|
||||
self.tokens_prefix_sum = np.cumsum(tokens)
|
||||
|
||||
self.tokens = paddle.to_tensor(tokens, dtype="int64")
|
||||
self.tokens_perfix_sum = paddle.to_tensor(self.tokens_perfix_sum, dtype="int64")
|
||||
self.tokens_prefix_sum = paddle.to_tensor(self.tokens_prefix_sum, dtype="int64")
|
||||
self.all_tokens = int(self.tokens.sum())
|
||||
|
||||
self.input_fp8 = paddle.randn([self.all_tokens, self.K], dtype="bfloat16").astype(paddle.float8_e4m3fn)
|
||||
@@ -81,7 +81,7 @@ class TestW4AFP8GEMM(unittest.TestCase):
|
||||
out_cuda = w4afp8_gemm(
|
||||
self.input_fp8,
|
||||
weight_int4.cuda(),
|
||||
self.tokens_perfix_sum,
|
||||
self.tokens_prefix_sum,
|
||||
self.input_row_sum.astype("float32"),
|
||||
weight_dequant_scale.astype("float32"),
|
||||
int(self.TokenPadding),
|
||||
|
||||
@@ -109,11 +109,11 @@ class TestWFp8Afp8SparseGemm(unittest.TestCase):
|
||||
TokenPadding = 0
|
||||
|
||||
tokens = [tokens_per_group] * BATCH
|
||||
tokens_perfix_sum = np.cumsum(tokens)
|
||||
tokens_perfix_sum = np.insert(tokens_perfix_sum, 0, 0)
|
||||
tokens_prefix_sum = np.cumsum(tokens)
|
||||
tokens_prefix_sum = np.insert(tokens_prefix_sum, 0, 0)
|
||||
|
||||
tokens = paddle.to_tensor(tokens, dtype="int32")
|
||||
tokens_perfix_sum = paddle.to_tensor(tokens_perfix_sum, dtype="int32")
|
||||
tokens_prefix_sum = paddle.to_tensor(tokens_prefix_sum, dtype="int32")
|
||||
|
||||
all_tokens = int(tokens.sum())
|
||||
|
||||
@@ -148,7 +148,7 @@ class TestWFp8Afp8SparseGemm(unittest.TestCase):
|
||||
input_fp8,
|
||||
convert_sparse_idx,
|
||||
pack_weight.reshape([BATCH, N, K // 2]),
|
||||
tokens_perfix_sum if TokenPadding == 0 else tokens,
|
||||
tokens_prefix_sum if TokenPadding == 0 else tokens,
|
||||
1 / weight_scale,
|
||||
out_pd,
|
||||
int(TokenPadding),
|
||||
|
||||
Reference in New Issue
Block a user