mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
native top_p_sampling (#2901)
This commit is contained in:
@@ -479,6 +479,17 @@ def rebuild_padding(
|
||||
if current_platform.is_cuda():
|
||||
from fastdeploy.model_executor.ops.gpu import rebuild_padding
|
||||
|
||||
hidden_states = rebuild_padding(
|
||||
tmp_out,
|
||||
cum_offsets,
|
||||
seq_len_this_time,
|
||||
seq_lens_decoder,
|
||||
seq_lens_encoder,
|
||||
output_padding_offset,
|
||||
max_input_length,
|
||||
)
|
||||
elif current_platform.is_dcu():
|
||||
from fastdeploy.model_executor.ops.gpu import rebuild_padding
|
||||
hidden_states = rebuild_padding(
|
||||
tmp_out,
|
||||
cum_offsets,
|
||||
|
Reference in New Issue
Block a user