[Iluvatar GPU] Optimze attention and moe performance (#3234)

This commit is contained in:
yzwu
2025-08-08 10:51:24 +08:00
committed by GitHub
parent 37569cca86
commit fbdd6b0663
24 changed files with 1130 additions and 1653 deletions

View File

@@ -539,9 +539,12 @@ elif paddle.is_compiled_with_custom_device("iluvatar_gpu"):
"gpu_ops/stop_generation_multi_ends.cu",
"gpu_ops/step.cu",
"gpu_ops/token_penalty_multi_scores.cu",
"gpu_ops/sample_kernels/rejection_top_p_sampling.cu",
"gpu_ops/sample_kernels/top_k_renorm_probs.cu",
"iluvatar_ops/moe_dispatch.cu",
"iluvatar_ops/moe_reduce.cu",
"iluvatar_ops/paged_attn.cu",
"iluvatar_ops/w8a16_group_gemm.cu",
"iluvatar_ops/runtime/iluvatar_context.cc",
],
include_dirs=["iluvatar_ops/runtime", "gpu_ops"],