custom all reduce support cuda graph (#2938)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled

* Support enabling cuda graph and custom all reduce at the same time, and fix the overwritten custom all reduce flag

* rename communication_op to communication
This commit is contained in:
zhink
2025-07-21 22:52:03 +08:00
committed by GitHub
parent ff4569f135
commit 0262ef7eb3
21 changed files with 88 additions and 51 deletions

View File

@@ -201,6 +201,8 @@ class ParallelConfig:
# disable any whitespace for guided decoding
self.disable_any_whitespace: bool = True
self.pod_ip: str = None
# enable the custom all-reduce kernel and fall back to NCCL(dist.all_reduce).
self.enable_custom_all_reduce: bool = False
for key, value in args.items():
if hasattr(self, key):
setattr(self, key, value)
@@ -213,8 +215,6 @@ class ParallelConfig:
self.moe_phase = MoEPhase.DECODER
else:
raise NotImplementedError
# enable the custom all-reduce kernel and fall back to NCCL(dist.all_reduce).
self.enable_custom_all_reduce: bool = False
# pd_disaggregation
use_pd_disaggregation: int = int(os.getenv("FLAGS_use_pd_disaggregation", 0))