mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[XPU] Remove padding_offsets from get_padding_offset.cu (#2911)
This commit is contained in:
@@ -34,7 +34,7 @@ __global__ void RemovePadding(int64_t *output_data,
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void GetPaddingOffsetKernel(int *padding_offset,
|
||||
__global__ void GetPaddingOffsetKernel(int *batch_id_per_token,
|
||||
int *cum_offsets_out,
|
||||
int *cu_seqlens_q,
|
||||
int *cu_seqlens_k,
|
||||
@@ -46,7 +46,7 @@ __global__ void GetPaddingOffsetKernel(int *padding_offset,
|
||||
const int ti = threadIdx.x;
|
||||
int cum_offset = bi == 0 ? 0 : cum_offsets[bi - 1];
|
||||
for (int i = ti; i < seq_lens[bi]; i += blockDim.x) {
|
||||
padding_offset[bi * max_seq_len - cum_offset + i] = bi;
|
||||
batch_id_per_token[bi * max_seq_len - cum_offset + i] = bi;
|
||||
}
|
||||
if (ti == 0) {
|
||||
cum_offsets_out[bi] = cum_offset;
|
||||
@@ -75,7 +75,7 @@ std::vector<paddle::Tensor> GetPaddingOffset(const paddle::Tensor &input_ids,
|
||||
const int token_num_data = cpu_token_num.data<int64_t>()[0];
|
||||
auto x_remove_padding = paddle::empty(
|
||||
{token_num_data}, paddle::DataType::INT64, input_ids.place());
|
||||
auto padding_offset = paddle::empty(
|
||||
auto batch_id_per_token = paddle::empty(
|
||||
{token_num_data}, paddle::DataType::INT32, input_ids.place());
|
||||
auto cu_seqlens_q =
|
||||
paddle::full({bsz + 1}, 0, paddle::DataType::INT32, input_ids.place());
|
||||
@@ -87,7 +87,7 @@ std::vector<paddle::Tensor> GetPaddingOffset(const paddle::Tensor &input_ids,
|
||||
int blockSize = min((token_num_data + WARP_SIZE - 1) / WARP_SIZE * WARP_SIZE, 128);
|
||||
#endif
|
||||
GetPaddingOffsetKernel<<<bsz, 128, 0, cu_stream>>>(
|
||||
padding_offset.data<int>(),
|
||||
batch_id_per_token.data<int>(),
|
||||
cum_offsets_out.data<int>(),
|
||||
cu_seqlens_q.data<int>(),
|
||||
cu_seqlens_k.data<int>(),
|
||||
@@ -102,7 +102,7 @@ std::vector<paddle::Tensor> GetPaddingOffset(const paddle::Tensor &input_ids,
|
||||
seq_length);
|
||||
return {x_remove_padding,
|
||||
cum_offsets_out,
|
||||
padding_offset,
|
||||
batch_id_per_token,
|
||||
cu_seqlens_q,
|
||||
cu_seqlens_k}; // , enc_token_num, dec_token_num};
|
||||
}
|
||||
@@ -133,7 +133,7 @@ PD_BUILD_STATIC_OP(get_padding_offset)
|
||||
.Inputs({"input_ids", "token_num", "cum_offsets", "seq_len"})
|
||||
.Outputs({"x_remove_padding",
|
||||
"cum_offsets_out",
|
||||
"padding_offset",
|
||||
"batch_id_per_token",
|
||||
"cu_seqlens_q",
|
||||
"cu_seqlens_k"})
|
||||
.SetKernelFn(PD_KERNEL(GetPaddingOffset))
|
||||
|
||||
Reference in New Issue
Block a user