mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
【Hackathon 9th No.86】autogen MultiQueryAppendC8Attention template_instantiation -part (#4330)
* split MultiQueryAppendC8Attention template_instantiation * update setup_ops.py * fix ci * fix bug
This commit is contained in:
@@ -377,6 +377,7 @@ elif paddle.is_compiled_with_cuda():
|
||||
|
||||
if cc >= 80:
|
||||
# append_attention
|
||||
os.system("python gpu_ops/append_attn/autogen_template_instantiation.py")
|
||||
sources += ["gpu_ops/append_attention.cu"]
|
||||
sources += find_end_files("gpu_ops/append_attn", ".cu")
|
||||
# mla
|
||||
|
||||
Reference in New Issue
Block a user