【Hackathon 9th No.86】autogen MultiQueryAppendC8Attention template_instantiation -part (#4330)

* split MultiQueryAppendC8Attention template_instantiation

* update setup_ops.py

* fix ci

* fix bug
This commit is contained in:
Zhenghai Zhang
2025-10-10 15:07:48 +08:00
committed by GitHub
parent c4ebaf8a07
commit c46d5e48f8
12 changed files with 2275 additions and 2282 deletions

View File

@@ -377,6 +377,7 @@ elif paddle.is_compiled_with_cuda():
if cc >= 80:
# append_attention
os.system("python gpu_ops/append_attn/autogen_template_instantiation.py")
sources += ["gpu_ops/append_attention.cu"]
sources += find_end_files("gpu_ops/append_attn", ".cu")
# mla