mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
[Iluvatar GPU] Optimze attention and moe performance (#3234)
This commit is contained in:
@@ -723,7 +723,12 @@ def run_worker_proc() -> None:
|
||||
fd_config = initialize_fd_config(args, ranks, local_rank)
|
||||
|
||||
# Create worker process
|
||||
worker_proc = PaddleDisWorkerProc(fd_config, ranks, local_rank)
|
||||
if current_platform.is_iluvatar():
|
||||
from fastdeploy.worker.iluvatar_worker import IluvatarPaddleDisWorkerProc
|
||||
|
||||
worker_proc = IluvatarPaddleDisWorkerProc(fd_config, ranks, local_rank)
|
||||
else:
|
||||
worker_proc = PaddleDisWorkerProc(fd_config, ranks, local_rank)
|
||||
|
||||
# Initialize device and create model runner
|
||||
worker_proc.init_device()
|
||||
|
Reference in New Issue
Block a user