[Inference, rename] remove padding_offsets from atten use batch_id_per_token (#2880)

* remove padding_offsets from atten
This commit is contained in:
周周周
2025-07-17 18:41:31 +08:00
committed by GitHub
parent d49f8fb30a
commit ddb10ac509
50 changed files with 311 additions and 288 deletions

View File

@@ -40,7 +40,7 @@ void CascadeAppendAttentionC16Kernel(
const paddle::Tensor& seq_lens_q,
const paddle::Tensor& seq_lens_kv,
const paddle::Tensor& seq_lens_encoder,
const paddle::Tensor& padding_offsets,
const paddle::Tensor& batch_id_per_token,
const paddle::Tensor& cu_seqlens_q,
const paddle::Tensor& block_table,
const paddle::Tensor& batch_ids,
@@ -85,7 +85,7 @@ void CascadeAppendAttentionC8Kernel(
const paddle::Tensor& seq_lens_q,
const paddle::Tensor& seq_lens_kv,
const paddle::Tensor& seq_lens_encoder,
const paddle::Tensor& padding_offsets,
const paddle::Tensor& batch_id_per_token,
const paddle::Tensor& cu_seqlens_q,
const paddle::Tensor& block_table,
const paddle::Tensor& batch_ids,
@@ -130,7 +130,7 @@ void CascadeAppendAttentionC4Kernel(
const paddle::Tensor& seq_lens_q,
const paddle::Tensor& seq_lens_kv,
const paddle::Tensor& seq_lens_encoder,
const paddle::Tensor& padding_offsets,
const paddle::Tensor& batch_id_per_token,
const paddle::Tensor& cu_seqlens_q,
const paddle::Tensor& block_table,
const paddle::Tensor& batch_ids,
@@ -175,7 +175,7 @@ void CascadeAppendAttentionKernel(
const paddle::Tensor& seq_lens_q,
const paddle::Tensor& seq_lens_kv,
const paddle::Tensor& seq_lens_encoder,
const paddle::Tensor& padding_offsets,
const paddle::Tensor& batch_id_per_token,
const paddle::Tensor& cu_seqlens_q,
const paddle::Tensor& block_table,
const paddle::Tensor& batch_ids,
@@ -211,7 +211,7 @@ void CascadeAppendAttentionKernel(
seq_lens_q,
seq_lens_kv,
seq_lens_encoder,
padding_offsets,
batch_id_per_token,
cu_seqlens_q,
block_table,
batch_ids,
@@ -246,7 +246,7 @@ void CascadeAppendAttentionKernel(
seq_lens_q,
seq_lens_kv,
seq_lens_encoder,
padding_offsets,
batch_id_per_token,
cu_seqlens_q,
block_table,
batch_ids,
@@ -281,7 +281,7 @@ void CascadeAppendAttentionKernel(
seq_lens_q,
seq_lens_kv,
seq_lens_encoder,
padding_offsets,
batch_id_per_token,
cu_seqlens_q,
block_table,
batch_ids,
@@ -316,7 +316,7 @@ void CascadeAppendAttentionKernel(
seq_lens_q,
seq_lens_kv,
seq_lens_encoder,
padding_offsets,
batch_id_per_token,
cu_seqlens_q,
block_table,
batch_ids,