mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 08:16:42 +08:00
add dtype int32 (#3692)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
This commit is contained in:
@@ -30,7 +30,7 @@ def test_speculate_get_output_padding_offset():
|
||||
|
||||
seq_lens_output = paddle.to_tensor(seq_lens_output, dtype="int32")
|
||||
out_token_num = paddle.sum(seq_lens_output)
|
||||
output_cum_offsets_tmp = paddle.cumsum(max_seq_len - seq_lens_output)
|
||||
output_cum_offsets_tmp = paddle.cumsum(max_seq_len - seq_lens_output, dtype="int32")
|
||||
|
||||
output_padding_offset_xpu, output_cum_offsets_xpu = speculate_get_output_padding_offset(
|
||||
output_cum_offsets_tmp, out_token_num, seq_lens_output, max_seq_len
|
||||
|
@@ -397,7 +397,7 @@ class EngineSevice:
|
||||
image_type_ids = paddle.to_tensor(inputs["image_type_ids"], dtype="int32")
|
||||
image_mask = input_ids == self.data_processor.image_patch_id
|
||||
image_token_sum = paddle.full(shape=[len(input_ids) + 1], fill_value=0, dtype="int32")
|
||||
image_token_sum[1:] = paddle.cumsum(image_mask.cast("int32"))
|
||||
image_token_sum[1:] = paddle.cumsum(image_mask.cast("int32"), dtype="int32")
|
||||
grid_thw = []
|
||||
for one in inputs["grid_thw"]:
|
||||
if one[0] == 1:
|
||||
|
@@ -257,7 +257,7 @@ def remove_padding(
|
||||
- The key sequence lengths (paddle.Tensor).
|
||||
"""
|
||||
if current_platform.is_cuda():
|
||||
cum_offsets_now = paddle.cumsum(max_len - seq_lens_this_time)
|
||||
cum_offsets_now = paddle.cumsum(max_len - seq_lens_this_time, dtype="int32")
|
||||
token_num = paddle.sum(seq_lens_this_time)
|
||||
(
|
||||
ids_remove_padding,
|
||||
@@ -301,7 +301,7 @@ def speculate_remove_padding(
|
||||
- Key sequence lengths (paddle.Tensor).
|
||||
"""
|
||||
if current_platform.is_cuda():
|
||||
cum_offsets_now = paddle.cumsum(max_len - seq_lens_this_time)
|
||||
cum_offsets_now = paddle.cumsum(max_len - seq_lens_this_time, dtype="int32")
|
||||
token_num = paddle.sum(seq_lens_this_time)
|
||||
(
|
||||
ids_remove_padding,
|
||||
|
@@ -106,7 +106,7 @@ def pre_process(
|
||||
"""
|
||||
# Remove padding
|
||||
max_len = input_ids.shape[1]
|
||||
cum_offsets_now = paddle.cumsum(max_len - seq_lens_this_time)
|
||||
cum_offsets_now = paddle.cumsum(max_len - seq_lens_this_time, dtype="int32")
|
||||
token_num = paddle.sum(seq_lens_this_time)
|
||||
output_padding_offset = None
|
||||
output_cum_offsets = None
|
||||
@@ -132,7 +132,7 @@ def pre_process(
|
||||
if isinstance(seq_lens_output, list):
|
||||
seq_lens_output = seq_lens_output[0]
|
||||
output_token_num = paddle.sum(seq_lens_output)
|
||||
output_cum_offsets_tmp = paddle.cumsum(max_len - seq_lens_output)
|
||||
output_cum_offsets_tmp = paddle.cumsum(max_len - seq_lens_output, dtype="int32")
|
||||
output_padding_offset, output_cum_offsets = speculate_get_output_padding_offset(
|
||||
output_cum_offsets_tmp,
|
||||
output_token_num,
|
||||
|
@@ -63,7 +63,7 @@ def xpu_pre_process(
|
||||
) -> XPUForwardMeta:
|
||||
""" """
|
||||
max_len = input_ids.shape[1]
|
||||
cum_offsets_now = paddle.cumsum(max_len - seq_lens_this_time)
|
||||
cum_offsets_now = paddle.cumsum(max_len - seq_lens_this_time, dtype="int32")
|
||||
token_num = paddle.sum(seq_lens_this_time)
|
||||
|
||||
(
|
||||
|
@@ -197,7 +197,7 @@ def naive_attention_impl(
|
||||
|
||||
|
||||
def get_padding_offset(bsz, max_seq_len, seq_lens_this_time):
|
||||
cum_offsets_now = paddle.cumsum(max_seq_len - seq_lens_this_time)
|
||||
cum_offsets_now = paddle.cumsum(max_seq_len - seq_lens_this_time, dtype="int32")
|
||||
cum_offsets = paddle.zeros(shape=(bsz + 1), dtype="int32")
|
||||
cum_offsets[1:] = cum_offsets_now
|
||||
token_num = paddle.sum(seq_lens_this_time)
|
||||
|
@@ -197,7 +197,7 @@ def naive_attention_impl(
|
||||
|
||||
|
||||
def get_padding_offset(bsz, max_seq_len, seq_lens_this_time):
|
||||
cum_offsets_now = paddle.cumsum(max_seq_len - seq_lens_this_time)
|
||||
cum_offsets_now = paddle.cumsum(max_seq_len - seq_lens_this_time, dtype="int32")
|
||||
cum_offsets = paddle.zeros(shape=(bsz + 1), dtype="int32")
|
||||
cum_offsets[1:] = cum_offsets_now
|
||||
token_num = paddle.sum(seq_lens_this_time)
|
||||
|
Reference in New Issue
Block a user