[BugFix] fix TaskQueue dp_id in multi node (#3919)

This commit is contained in:
lizhenyun01
2025-09-05 22:29:26 +08:00
committed by GitHub
parent 8915c8411d
commit 2d975e16b0

View File

@@ -225,7 +225,8 @@ class EngineSevice:
client_id=0, client_id=0,
local_data_parallel_size=self.cfg.parallel_config.data_parallel_size, local_data_parallel_size=self.cfg.parallel_config.data_parallel_size,
local_data_parallel_id=min( local_data_parallel_id=min(
self.cfg.worker_num_per_node * self.cfg.node_rank + self.cfg.parallel_config.local_data_parallel_id, self.cfg.worker_num_per_node // self.cfg.parallel_config.tensor_parallel_size * self.cfg.node_rank
+ self.cfg.parallel_config.local_data_parallel_id,
self.cfg.parallel_config.data_parallel_size - 1, self.cfg.parallel_config.data_parallel_size - 1,
), ),
) )