polish code with new pre-commit rule (#2923)

This commit is contained in:
Zero Rains
2025-07-19 23:19:27 +08:00
committed by GitHub
parent b8676d71a8
commit 25698d56d1
424 changed files with 14307 additions and 13518 deletions

View File

@@ -19,7 +19,7 @@
// #define DEBUG_EAGLE_KERNEL
__global__ void ComputeOrderKernel(
const int* seq_lens_this_time,
const int* seq_lens_this_time,
const int* seq_lens_encoder,
const int* base_model_seq_lens_this_time,
const int* base_model_seq_lens_encoder,
@@ -47,7 +47,7 @@ __global__ void ComputeOrderKernel(
printf("batch %d: cur_seq_lens_encoder > 0 \n", i);
#endif
for (int j = 0; j < cur_seq_lens_encoder; j++) {
position_map[in_offset++] = out_offset++;
position_map[in_offset++] = out_offset++;
}
// 2. base model encoder. Base step=0
} else if (cur_base_model_seq_lens_encoder != 0) {
@@ -69,13 +69,13 @@ __global__ void ComputeOrderKernel(
in_offset += cur_base_model_seq_lens_this_time;
} else /*Accept all draft tokens*/ {
#ifdef DEBUG_EAGLE_KERNEL
printf("batch %d: accept_num > actual_draft_token_num \n", i);
printf("batch %d: accept_num > actual_draft_token_num \n", i);
#endif
position_map[in_offset + accept_num - 2] = out_offset++;
position_map[in_offset + accept_num - 1] = out_offset++;
in_offset += cur_base_model_seq_lens_this_time;
}
}
}
}
output_token_num[0] = out_offset;
#ifdef DEBUG_EAGLE_KERNEL
@@ -208,7 +208,7 @@ std::vector<paddle::Tensor> EagleGetHiddenStates(
}
case paddle::DataType::BFLOAT16: {
return DispatchDtype<paddle::DataType::BFLOAT16>(
input,
input,
seq_lens_this_time,
seq_lens_encoder,
seq_lens_decoder,

View File

@@ -72,7 +72,7 @@ __global__ void computeOrderKernel(
output_token_num[0] = out_offset;
#ifdef DEBUG_EAGLE_KERNEL
printf("position map output_token_num%d:\n", output_token_num[0]);
for (int i = 0; i < output_token_num[0]; i++) {
for (int i = 0; i < output_token_num[0]; i++) {
printf("%d ", src_map[i]);
}
printf("\n");
@@ -187,4 +187,4 @@ PD_BUILD_STATIC_OP(eagle_get_self_hidden_states)
"seq_lens_this_time",
"step_idx"})
.Outputs({"out"})
.SetKernelFn(PD_KERNEL(EagleGetSelfHiddenStates));
.SetKernelFn(PD_KERNEL(EagleGetSelfHiddenStates));

View File

@@ -26,7 +26,7 @@ __global__ void RebuildAppendPaddingKernel(
const int seq_len,
const int dim_embed,
const size_t elem_nums) {
using LoadT = AlignedVector<T, VecSize>;
using LoadT = AlignedVector<T, VecSize>;
LoadT src_vec;
const int64_t global_idx = blockDim.x * blockIdx.x + threadIdx.x;
for (int64_t i = global_idx * VecSize; i < elem_nums; i += gridDim.x * blockDim.x * VecSize) {
@@ -42,7 +42,7 @@ __global__ void RebuildAppendPaddingKernel(
const int input_token_id = ori_token_id - cum_offset[bi] + seq_id;
const int bias_idx = i % dim_embed;
Load<T, VecSize>(&full_hidden_states[input_token_id * dim_embed + bias_idx], &src_vec);
Store<T, VecSize>(src_vec, &out[i]);
}
@@ -78,14 +78,14 @@ std::vector<paddle::Tensor> DispatchDtype(
GetNumBlocks(pack_num, &grid_size);
RebuildAppendPaddingKernel<DataType_, PackSize><<<grid_size, threads_per_block, 0, full_hidden_states.stream()>>>(
reinterpret_cast<DataType_*>(out.data<data_t>()),
reinterpret_cast<const DataType_*>(full_hidden_states.data<data_t>()),
cum_offsets.data<int32_t>(),
seq_len_encoder.data<int32_t>(),
seq_len_decoder.data<int32_t>(),
output_padding_offset.data<int32_t>(),
max_seq_len,
dim_embed,
reinterpret_cast<DataType_*>(out.data<data_t>()),
reinterpret_cast<const DataType_*>(full_hidden_states.data<data_t>()),
cum_offsets.data<int32_t>(),
seq_len_encoder.data<int32_t>(),
seq_len_decoder.data<int32_t>(),
output_padding_offset.data<int32_t>(),
max_seq_len,
dim_embed,
elem_nums);
return {out};
}
@@ -99,7 +99,7 @@ std::vector<paddle::Tensor> RebuildAppendPadding(
const paddle::Tensor& output_padding_offset,
const int max_seq_len) {
switch (full_hidden_states.dtype()) {
case paddle::DataType::BFLOAT16:
return DispatchDtype<paddle::DataType::BFLOAT16>(
@@ -137,7 +137,7 @@ std::vector<paddle::DataType> RebuildAppendPaddingInferDtype(
PD_BUILD_STATIC_OP(speculate_rebuild_append_padding)
.Inputs({"full_hidden_states",
.Inputs({"full_hidden_states",
"cum_offsets",
"seq_len_encoder",
"seq_len_decoder",
@@ -146,4 +146,4 @@ PD_BUILD_STATIC_OP(speculate_rebuild_append_padding)
.Outputs({"out"})
.SetKernelFn(PD_KERNEL(RebuildAppendPadding))
.SetInferShapeFn(PD_INFER_SHAPE(RebuildAppendPaddingInferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(RebuildAppendPaddingInferDtype));
.SetInferDtypeFn(PD_INFER_DTYPE(RebuildAppendPaddingInferDtype));

View File

@@ -93,7 +93,7 @@ __global__ void speculate_free_and_reschedule(bool *stop_flags,
used_list_len[tid] = 0;
}
} else if (seq_lens_this_time[tid] != 0 && max_possible_block_idx < block_num_per_seq &&
block_table_now[(seq_lens_decoder[tid] + max_draft_tokens +
block_table_now[(seq_lens_decoder[tid] + max_draft_tokens +
1) /
block_size] == -1) {
// 统计需要分配block的位置和总数
@@ -347,7 +347,7 @@ PD_BUILD_STATIC_OP(speculate_step_reschedule)
"next_tokens",
"first_token_ids",
"accept_num"})
.Attrs({"block_size: int",
.Attrs({"block_size: int",
"encoder_decoder_block_num: int",
"max_draft_tokens: int"})
.Outputs({"stop_flags_out",