mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
【Hackathon 9th No.86】autogen MultiQueryDecoderAttention template_instantiation -part (#4383)
* split MultiQueryDecoderAttention template_instantiation * update comment * CI
This commit is contained in:
@@ -13,8 +13,8 @@
|
||||
// limitations under the License.
|
||||
#pragma once
|
||||
|
||||
|
||||
#include "multi_head_latent_attention_kernel.h"
|
||||
#include "helper.h"
|
||||
#include "utils.cuh"
|
||||
|
||||
template <size_t vec_size, typename T>
|
||||
struct softmax_state_t {
|
||||
|
||||
Reference in New Issue
Block a user