mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
fix split_rope_cache_kv_encoder in mix mtp (#5384)
This commit is contained in:
@@ -585,7 +585,8 @@ std::vector<paddle::Tensor> BlockAttnKernel(
|
||||
int,
|
||||
E_Scale>(
|
||||
xpu_ctx->x_context(),
|
||||
reinterpret_cast<const XPU_XType*>(qkv.data<data_t>()), // qkv
|
||||
reinterpret_cast<const XPU_XType*>(qkv.data<data_t>()) +
|
||||
total_enc_len * qkv_shape[qkv_shape.size() - 1], // qkv
|
||||
reinterpret_cast<const float*>(
|
||||
rotary_embs.data<float>()), // rotary_pos_emb
|
||||
reinterpret_cast<const int*>(
|
||||
|
||||
Reference in New Issue
Block a user