mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
[Iluvatar GPU] Optimze attention and moe performance (#3234)
This commit is contained in:
@@ -539,9 +539,12 @@ elif paddle.is_compiled_with_custom_device("iluvatar_gpu"):
|
||||
"gpu_ops/stop_generation_multi_ends.cu",
|
||||
"gpu_ops/step.cu",
|
||||
"gpu_ops/token_penalty_multi_scores.cu",
|
||||
"gpu_ops/sample_kernels/rejection_top_p_sampling.cu",
|
||||
"gpu_ops/sample_kernels/top_k_renorm_probs.cu",
|
||||
"iluvatar_ops/moe_dispatch.cu",
|
||||
"iluvatar_ops/moe_reduce.cu",
|
||||
"iluvatar_ops/paged_attn.cu",
|
||||
"iluvatar_ops/w8a16_group_gemm.cu",
|
||||
"iluvatar_ops/runtime/iluvatar_context.cc",
|
||||
],
|
||||
include_dirs=["iluvatar_ops/runtime", "gpu_ops"],
|
||||
|
Reference in New Issue
Block a user