支持w4afp8 (#3324)

This commit is contained in:
yangjianfengo1
2025-08-11 19:00:18 +08:00
committed by GitHub
parent c7cb31051b
commit c7993d35cb
9 changed files with 1454 additions and 0 deletions

View File

@@ -494,6 +494,8 @@ elif paddle.is_compiled_with_cuda():
if cc >= 90 and nvcc_version >= 12.0:
# Hopper optmized mla
sources += find_end_files("gpu_ops/mla_attn", ".cu")
os.system("python utils/auto_gen_w4afp8_gemm_kernel.py")
sources += find_end_files("gpu_ops/w4afp8_gemm", ".cu")
setup(
name="fastdeploy_ops",