[BugFix] Fix the abnormal memory usage caused by shape errors in the triton moe backend (#4026)

* fix device_id to in

* fix triton_moe bug
This commit is contained in:
Yuanle Liu
2025-09-10 11:05:54 +08:00
committed by GitHub
parent dbab579299
commit c3b2a60fb8
4 changed files with 12 additions and 10 deletions

View File

@@ -84,7 +84,7 @@ class GpuWorker(WorkerBase):
self.model_runner: ModelRunnerBase = ModelRunner(
fd_config=self.fd_config,
device=self.device,
device_id=self.device_ids[self.local_rank % self.max_chips_per_node],
device_id=int(self.device_ids[self.local_rank % self.max_chips_per_node]),
rank=self.rank,
local_rank=self.local_rank,
)