[Iluvatar GPU] Optimze attention and moe performance (#3234)

2025-10-04 08:16:42 +08:00 · 2025-08-08 10:51:24 +08:00
parent 37569cca86
commit fbdd6b0663
24 changed files with 1130 additions and 1653 deletions
--- a/fastdeploy/model_executor/pre_and_post_process.py
+++ b/fastdeploy/model_executor/pre_and_post_process.py
@@ -211,7 +211,7 @@ def post_process_normal(
        model_output.stop_flags,
    )

-    if current_platform.is_cuda():
+    if current_platform.is_cuda() or current_platform.is_iluvatar():
        set_stop_value_multi_ends(
            sampler_output.sampled_token_ids,
            model_output.stop_flags,