[Metax] adapt cutlass moe for ernie-vl (#4685)

This commit is contained in:
Neil Zhu
2025-11-03 17:44:27 +08:00
committed by GitHub
parent 69c2f3cda1
commit c95d0740ec
6 changed files with 174 additions and 101 deletions

View File

@@ -50,8 +50,12 @@ elif current_platform.is_dcu():
elif current_platform.is_maca():
from fastdeploy.model_executor.ops.gpu import (
get_padding_offset,
limit_thinking_content_length_v1,
limit_thinking_content_length_v2,
save_output,
set_stop_value_multi_ends,
speculate_limit_thinking_content_length_v1,
speculate_limit_thinking_content_length_v2,
step_paddle,
update_inputs,
update_inputs_v1,
@@ -810,7 +814,9 @@ def rebuild_padding(
seq_lens_decoder,
seq_lens_encoder,
output_padding_offset,
first_token_out,
max_input_length,
enable_logprob,
)
else:
raise RuntimeError("Not supported platform")