[Feature] support model weight update in ep (#3802)

* Update config.py

* Update ep.py

* Update fused_moe_backend_base.py

* Update dynamic_weight_manager.py

* Update worker_process.py

* fix ci
This commit is contained in:
ltd0924
2025-09-02 20:52:47 +08:00
committed by GitHub
parent d1d063e4af
commit 0f42771a84
5 changed files with 43 additions and 19 deletions

View File

@@ -350,8 +350,8 @@ class ParallelConfig:
)
)
# same ep group id
# (TODO:gaoziyuan move this gid config to ep.py)
dist.collective._set_custom_gid(self.data_parallel_size + tp_gid_offset)
self.ep_group = dist.new_group(range(self.expert_parallel_size))
logger.info(
f"data_parallel_size: {self.data_parallel_size}, tensor_parallel_size: {self.tensor_parallel_size}, expert_parallel_size: {self.expert_parallel_size}, data_parallel_rank: {self.data_parallel_rank}, tensor_parallel_rank: {self.tensor_parallel_rank}, expert_parallel_rank: {self.expert_parallel_rank}, tp_group: {self.tp_group}."
)