[Iluvatar GPU] Optimze attention and moe performance (#3234)

This commit is contained in:
yzwu
2025-08-08 10:51:24 +08:00
committed by GitHub
parent 37569cca86
commit fbdd6b0663
24 changed files with 1130 additions and 1653 deletions

View File

@@ -211,7 +211,7 @@ def post_process_normal(
model_output.stop_flags,
)
if current_platform.is_cuda():
if current_platform.is_cuda() or current_platform.is_iluvatar():
set_stop_value_multi_ends(
sampler_output.sampled_token_ids,
model_output.stop_flags,