[Metax] support cutlass moe & optimize flash attention (#4208)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled

This commit is contained in:
xiaozude
2025-09-29 11:22:43 +08:00
committed by GitHub
parent 2b2b645296
commit 7c919070f7
20 changed files with 2786 additions and 103 deletions

View File

@@ -597,6 +597,10 @@ elif paddle.device.is_compiled_with_custom_device("metax_gpu"):
"gpu_ops/moe/tritonmoe_preprocess.cu",
"gpu_ops/moe/moe_topk_select.cu",
"gpu_ops/recover_decode_task.cu",
"metax_ops/moe_dispatch.cu",
"metax_ops/moe_ffn.cu",
"metax_ops/moe_reduce.cu",
"metax_ops/fused_moe.cu",
]
sources += find_end_files("gpu_ops/speculate_decoding", ".cu")
@@ -617,7 +621,7 @@ elif paddle.device.is_compiled_with_custom_device("metax_gpu"):
],
},
library_dirs=[os.path.join(maca_path, "lib")],
extra_link_args=["-lruntime_cu"],
extra_link_args=["-lruntime_cu", "-lmctlassEx"],
include_dirs=[
os.path.join(maca_path, "include"),
os.path.join(maca_path, "include/mcr"),