add dtype int32 (#3692)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled

This commit is contained in:
Ryan
2025-08-29 14:56:35 +08:00
committed by GitHub
parent 1bf4fc7f36
commit 45f81b34f0
7 changed files with 9 additions and 9 deletions

View File

@@ -30,7 +30,7 @@ def test_speculate_get_output_padding_offset():
seq_lens_output = paddle.to_tensor(seq_lens_output, dtype="int32") seq_lens_output = paddle.to_tensor(seq_lens_output, dtype="int32")
out_token_num = paddle.sum(seq_lens_output) out_token_num = paddle.sum(seq_lens_output)
output_cum_offsets_tmp = paddle.cumsum(max_seq_len - seq_lens_output) output_cum_offsets_tmp = paddle.cumsum(max_seq_len - seq_lens_output, dtype="int32")
output_padding_offset_xpu, output_cum_offsets_xpu = speculate_get_output_padding_offset( output_padding_offset_xpu, output_cum_offsets_xpu = speculate_get_output_padding_offset(
output_cum_offsets_tmp, out_token_num, seq_lens_output, max_seq_len output_cum_offsets_tmp, out_token_num, seq_lens_output, max_seq_len

View File

@@ -397,7 +397,7 @@ class EngineSevice:
image_type_ids = paddle.to_tensor(inputs["image_type_ids"], dtype="int32") image_type_ids = paddle.to_tensor(inputs["image_type_ids"], dtype="int32")
image_mask = input_ids == self.data_processor.image_patch_id image_mask = input_ids == self.data_processor.image_patch_id
image_token_sum = paddle.full(shape=[len(input_ids) + 1], fill_value=0, dtype="int32") image_token_sum = paddle.full(shape=[len(input_ids) + 1], fill_value=0, dtype="int32")
image_token_sum[1:] = paddle.cumsum(image_mask.cast("int32")) image_token_sum[1:] = paddle.cumsum(image_mask.cast("int32"), dtype="int32")
grid_thw = [] grid_thw = []
for one in inputs["grid_thw"]: for one in inputs["grid_thw"]:
if one[0] == 1: if one[0] == 1:

View File

@@ -257,7 +257,7 @@ def remove_padding(
- The key sequence lengths (paddle.Tensor). - The key sequence lengths (paddle.Tensor).
""" """
if current_platform.is_cuda(): if current_platform.is_cuda():
cum_offsets_now = paddle.cumsum(max_len - seq_lens_this_time) cum_offsets_now = paddle.cumsum(max_len - seq_lens_this_time, dtype="int32")
token_num = paddle.sum(seq_lens_this_time) token_num = paddle.sum(seq_lens_this_time)
( (
ids_remove_padding, ids_remove_padding,
@@ -301,7 +301,7 @@ def speculate_remove_padding(
- Key sequence lengths (paddle.Tensor). - Key sequence lengths (paddle.Tensor).
""" """
if current_platform.is_cuda(): if current_platform.is_cuda():
cum_offsets_now = paddle.cumsum(max_len - seq_lens_this_time) cum_offsets_now = paddle.cumsum(max_len - seq_lens_this_time, dtype="int32")
token_num = paddle.sum(seq_lens_this_time) token_num = paddle.sum(seq_lens_this_time)
( (
ids_remove_padding, ids_remove_padding,

View File

@@ -106,7 +106,7 @@ def pre_process(
""" """
# Remove padding # Remove padding
max_len = input_ids.shape[1] max_len = input_ids.shape[1]
cum_offsets_now = paddle.cumsum(max_len - seq_lens_this_time) cum_offsets_now = paddle.cumsum(max_len - seq_lens_this_time, dtype="int32")
token_num = paddle.sum(seq_lens_this_time) token_num = paddle.sum(seq_lens_this_time)
output_padding_offset = None output_padding_offset = None
output_cum_offsets = None output_cum_offsets = None
@@ -132,7 +132,7 @@ def pre_process(
if isinstance(seq_lens_output, list): if isinstance(seq_lens_output, list):
seq_lens_output = seq_lens_output[0] seq_lens_output = seq_lens_output[0]
output_token_num = paddle.sum(seq_lens_output) output_token_num = paddle.sum(seq_lens_output)
output_cum_offsets_tmp = paddle.cumsum(max_len - seq_lens_output) output_cum_offsets_tmp = paddle.cumsum(max_len - seq_lens_output, dtype="int32")
output_padding_offset, output_cum_offsets = speculate_get_output_padding_offset( output_padding_offset, output_cum_offsets = speculate_get_output_padding_offset(
output_cum_offsets_tmp, output_cum_offsets_tmp,
output_token_num, output_token_num,

View File

@@ -63,7 +63,7 @@ def xpu_pre_process(
) -> XPUForwardMeta: ) -> XPUForwardMeta:
""" """ """ """
max_len = input_ids.shape[1] max_len = input_ids.shape[1]
cum_offsets_now = paddle.cumsum(max_len - seq_lens_this_time) cum_offsets_now = paddle.cumsum(max_len - seq_lens_this_time, dtype="int32")
token_num = paddle.sum(seq_lens_this_time) token_num = paddle.sum(seq_lens_this_time)
( (

View File

@@ -197,7 +197,7 @@ def naive_attention_impl(
def get_padding_offset(bsz, max_seq_len, seq_lens_this_time): def get_padding_offset(bsz, max_seq_len, seq_lens_this_time):
cum_offsets_now = paddle.cumsum(max_seq_len - seq_lens_this_time) cum_offsets_now = paddle.cumsum(max_seq_len - seq_lens_this_time, dtype="int32")
cum_offsets = paddle.zeros(shape=(bsz + 1), dtype="int32") cum_offsets = paddle.zeros(shape=(bsz + 1), dtype="int32")
cum_offsets[1:] = cum_offsets_now cum_offsets[1:] = cum_offsets_now
token_num = paddle.sum(seq_lens_this_time) token_num = paddle.sum(seq_lens_this_time)

View File

@@ -197,7 +197,7 @@ def naive_attention_impl(
def get_padding_offset(bsz, max_seq_len, seq_lens_this_time): def get_padding_offset(bsz, max_seq_len, seq_lens_this_time):
cum_offsets_now = paddle.cumsum(max_seq_len - seq_lens_this_time) cum_offsets_now = paddle.cumsum(max_seq_len - seq_lens_this_time, dtype="int32")
cum_offsets = paddle.zeros(shape=(bsz + 1), dtype="int32") cum_offsets = paddle.zeros(shape=(bsz + 1), dtype="int32")
cum_offsets[1:] = cum_offsets_now cum_offsets[1:] = cum_offsets_now
token_num = paddle.sum(seq_lens_this_time) token_num = paddle.sum(seq_lens_this_time)