【bug fix】修复w4a8编译慢 (#3510)

* 修复w4a8编译

* code style

* 修复tma copy
This commit is contained in:
yangjianfengo1
2025-08-21 18:50:14 +08:00
committed by GitHub
parent a5692e8b7d
commit e5aa7087db
3 changed files with 9 additions and 54 deletions

View File

@@ -83,14 +83,9 @@ void w4afp8_gemm_M{M}_N{N}_TAILN{TAILN}_K{K}_B{BATCH}_P{PADDING}_{TYPE}(
}}
"""
gemm_case = [
[8192, 3584, 8, 0], # eb45T ffn1
[8192, 3584, 8, 2048], # eb45T ffn1
[7168, 8192, 8, 0], # eb45T ffn2
[7168, 8192, 8, 2048], # eb45T ffn2
]
gemm_case = [[256, 256, 1, 0]]
dtype = ["BF16", "FP16"]
dtype = ["BF16"]
def get_cutlass_type(type):