[New Feature] fa3 支持flash mask (#3184)

* 支持flash mask

* 修改test_flash_mask

* 修改test.sh
This commit is contained in:
yangjianfengo1
2025-08-05 12:20:48 +08:00
committed by GitHub
parent b8f3c73aac
commit 40f7f3e0d8
8 changed files with 1702 additions and 0 deletions

View File

@@ -497,6 +497,7 @@ elif paddle.is_compiled_with_cuda():
if cc >= 90 and nvcc_version >= 12.0:
# Hopper optmized mla
sources += find_end_files("gpu_ops/mla_attn", ".cu")
sources += ["gpu_ops/flash_mask_attn/flash_mask_attn.cu"]
setup(
name="fastdeploy_ops",