【Hackathon 9th No.86】autogen MultiQueryDecoderAttention template_instantiation -part (#4383)

* split MultiQueryDecoderAttention template_instantiation * update comment * CI
2025-12-24 13:28:13 +08:00 · 2025-10-16 17:08:19 +08:00
parent f72be7a2c8
commit 6adfbe07ad
27 changed files with 3975 additions and 3836 deletions
--- a/custom_ops/gpu_ops/append_attn/decode_attention_func.cuh
+++ b/custom_ops/gpu_ops/append_attn/decode_attention_func.cuh
@@ -13,8 +13,8 @@
 // limitations under the License.
 #pragma once

-
-#include "multi_head_latent_attention_kernel.h"
+#include "helper.h"
+#include "utils.cuh"

 template <size_t vec_size, typename T>
 struct softmax_state_t {