[Iluvatar GPU] Optimze attention and moe performance (#3234)

2025-10-05 08:37:06 +08:00 · 2025-08-08 10:51:24 +08:00
parent 37569cca86
commit fbdd6b0663
24 changed files with 1130 additions and 1653 deletions
--- a/fastdeploy/worker/worker_process.py
+++ b/fastdeploy/worker/worker_process.py
@@ -723,7 +723,12 @@ def run_worker_proc() -> None:
    fd_config = initialize_fd_config(args, ranks, local_rank)

    # Create worker process
-    worker_proc = PaddleDisWorkerProc(fd_config, ranks, local_rank)
+    if current_platform.is_iluvatar():
+        from fastdeploy.worker.iluvatar_worker import IluvatarPaddleDisWorkerProc
+
+        worker_proc = IluvatarPaddleDisWorkerProc(fd_config, ranks, local_rank)
+    else:
+        worker_proc = PaddleDisWorkerProc(fd_config, ranks, local_rank)

    # Initialize device and create model runner
    worker_proc.init_device()