【Hackathon 9th No.86】autogen MultiQueryDecoderAttention template_instantiation -part (#4383)

* split MultiQueryDecoderAttention template_instantiation

* update comment

* CI
This commit is contained in:
Zhenghai Zhang
2025-10-16 17:08:19 +08:00
committed by GitHub
parent f72be7a2c8
commit 6adfbe07ad
27 changed files with 3975 additions and 3836 deletions

View File

@@ -13,8 +13,8 @@
// limitations under the License.
#pragma once
#include "multi_head_latent_attention_kernel.h"
#include "helper.h"
#include "utils.cuh"
template <size_t vec_size, typename T>
struct softmax_state_t {