[Iluvatar GPU] Optimze attention and moe performance (#3234)

This commit is contained in:
yzwu
2025-08-08 10:51:24 +08:00
committed by GitHub
parent 37569cca86
commit fbdd6b0663
24 changed files with 1130 additions and 1653 deletions

View File

@@ -723,7 +723,12 @@ def run_worker_proc() -> None:
fd_config = initialize_fd_config(args, ranks, local_rank)
# Create worker process
worker_proc = PaddleDisWorkerProc(fd_config, ranks, local_rank)
if current_platform.is_iluvatar():
from fastdeploy.worker.iluvatar_worker import IluvatarPaddleDisWorkerProc
worker_proc = IluvatarPaddleDisWorkerProc(fd_config, ranks, local_rank)
else:
worker_proc = PaddleDisWorkerProc(fd_config, ranks, local_rank)
# Initialize device and create model runner
worker_proc.init_device()