native top_p_sampling (#2901)

This commit is contained in:
lifulll
2025-07-22 14:09:59 +08:00
committed by GitHub
parent 0eedbdaee0
commit 2c6a9e887e
14 changed files with 93 additions and 7 deletions

View File

@@ -479,6 +479,17 @@ def rebuild_padding(
if current_platform.is_cuda():
from fastdeploy.model_executor.ops.gpu import rebuild_padding
hidden_states = rebuild_padding(
tmp_out,
cum_offsets,
seq_len_this_time,
seq_lens_decoder,
seq_lens_encoder,
output_padding_offset,
max_input_length,
)
elif current_platform.is_dcu():
from fastdeploy.model_executor.ops.gpu import rebuild_padding
hidden_states = rebuild_padding(
tmp_out,
cum_offsets,