custom all reduce support cuda graph (#2938)

* Support enabling cuda graph and custom all reduce at the same time, and fix the overwritten custom all reduce flag * rename communication_op to communication
2025-10-20 23:29:39 +08:00 · 2025-07-21 22:52:03 +08:00
parent ff4569f135
commit 0262ef7eb3
21 changed files with 88 additions and 51 deletions
--- a/fastdeploy/config.py
+++ b/fastdeploy/config.py
@@ -201,6 +201,8 @@ class ParallelConfig:
        # disable any whitespace for guided decoding
        self.disable_any_whitespace: bool = True
        self.pod_ip: str = None
+        # enable the custom all-reduce kernel and fall back to NCCL(dist.all_reduce).
+        self.enable_custom_all_reduce: bool = False
        for key, value in args.items():
            if hasattr(self, key):
                setattr(self, key, value)
@@ -213,8 +215,6 @@ class ParallelConfig:
            self.moe_phase = MoEPhase.DECODER
        else:
            raise NotImplementedError
-        # enable the custom all-reduce kernel and fall back to NCCL(dist.all_reduce).
-        self.enable_custom_all_reduce: bool = False

        # pd_disaggregation
        use_pd_disaggregation: int = int(os.getenv("FLAGS_use_pd_disaggregation", 0))