mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 08:16:42 +08:00
[Inference, rename] remove padding_offsets from atten use batch_id_per_token (#2880)
* remove padding_offsets from atten
This commit is contained in:
@@ -40,7 +40,7 @@ void CascadeAppendAttentionC16Kernel(
|
||||
const paddle::Tensor& seq_lens_q,
|
||||
const paddle::Tensor& seq_lens_kv,
|
||||
const paddle::Tensor& seq_lens_encoder,
|
||||
const paddle::Tensor& padding_offsets,
|
||||
const paddle::Tensor& batch_id_per_token,
|
||||
const paddle::Tensor& cu_seqlens_q,
|
||||
const paddle::Tensor& block_table,
|
||||
const paddle::Tensor& batch_ids,
|
||||
@@ -85,7 +85,7 @@ void CascadeAppendAttentionC8Kernel(
|
||||
const paddle::Tensor& seq_lens_q,
|
||||
const paddle::Tensor& seq_lens_kv,
|
||||
const paddle::Tensor& seq_lens_encoder,
|
||||
const paddle::Tensor& padding_offsets,
|
||||
const paddle::Tensor& batch_id_per_token,
|
||||
const paddle::Tensor& cu_seqlens_q,
|
||||
const paddle::Tensor& block_table,
|
||||
const paddle::Tensor& batch_ids,
|
||||
@@ -130,7 +130,7 @@ void CascadeAppendAttentionC4Kernel(
|
||||
const paddle::Tensor& seq_lens_q,
|
||||
const paddle::Tensor& seq_lens_kv,
|
||||
const paddle::Tensor& seq_lens_encoder,
|
||||
const paddle::Tensor& padding_offsets,
|
||||
const paddle::Tensor& batch_id_per_token,
|
||||
const paddle::Tensor& cu_seqlens_q,
|
||||
const paddle::Tensor& block_table,
|
||||
const paddle::Tensor& batch_ids,
|
||||
@@ -175,7 +175,7 @@ void CascadeAppendAttentionKernel(
|
||||
const paddle::Tensor& seq_lens_q,
|
||||
const paddle::Tensor& seq_lens_kv,
|
||||
const paddle::Tensor& seq_lens_encoder,
|
||||
const paddle::Tensor& padding_offsets,
|
||||
const paddle::Tensor& batch_id_per_token,
|
||||
const paddle::Tensor& cu_seqlens_q,
|
||||
const paddle::Tensor& block_table,
|
||||
const paddle::Tensor& batch_ids,
|
||||
@@ -211,7 +211,7 @@ void CascadeAppendAttentionKernel(
|
||||
seq_lens_q,
|
||||
seq_lens_kv,
|
||||
seq_lens_encoder,
|
||||
padding_offsets,
|
||||
batch_id_per_token,
|
||||
cu_seqlens_q,
|
||||
block_table,
|
||||
batch_ids,
|
||||
@@ -246,7 +246,7 @@ void CascadeAppendAttentionKernel(
|
||||
seq_lens_q,
|
||||
seq_lens_kv,
|
||||
seq_lens_encoder,
|
||||
padding_offsets,
|
||||
batch_id_per_token,
|
||||
cu_seqlens_q,
|
||||
block_table,
|
||||
batch_ids,
|
||||
@@ -281,7 +281,7 @@ void CascadeAppendAttentionKernel(
|
||||
seq_lens_q,
|
||||
seq_lens_kv,
|
||||
seq_lens_encoder,
|
||||
padding_offsets,
|
||||
batch_id_per_token,
|
||||
cu_seqlens_q,
|
||||
block_table,
|
||||
batch_ids,
|
||||
@@ -316,7 +316,7 @@ void CascadeAppendAttentionKernel(
|
||||
seq_lens_q,
|
||||
seq_lens_kv,
|
||||
seq_lens_encoder,
|
||||
padding_offsets,
|
||||
batch_id_per_token,
|
||||
cu_seqlens_q,
|
||||
block_table,
|
||||
batch_ids,
|
||||
|
Reference in New Issue
Block a user