custom all reduce support cuda graph (#2938)

* Support enabling cuda graph and custom all reduce at the same time, and fix the overwritten custom all reduce flag * rename communication_op to communication
2025-10-04 00:06:38 +08:00 · 2025-07-21 22:52:03 +08:00
parent ff4569f135
commit 0262ef7eb3
21 changed files with 88 additions and 51 deletions
--- a/fastdeploy/model_executor/graph_optimization/cudagraph_piecewise_backend.py
+++ b/fastdeploy/model_executor/graph_optimization/cudagraph_piecewise_backend.py
@@ -22,6 +22,7 @@ from paddle.device.cuda import graphs

 from fastdeploy.config import FDConfig
 from fastdeploy.utils import get_logger
+from fastdeploy.distributed.communication import capture_custom_allreduce

 logger = get_logger("cudagrpah_piecewise_backend", "cudagraph_piecewise_backend.log")

@@ -109,9 +110,11 @@ class CudaGraphPiecewiseBackend:
            paddle.device.synchronize()

            # Capture
-            new_grpah.capture_begin()
-            output = entry.runnable(**kwargs)
-            new_grpah.capture_end()
+            with capture_custom_allreduce():
+                new_grpah.capture_begin()
+                output = entry.runnable(**kwargs)
+                new_grpah.capture_end()
+            

            # Store output buffer
            entry.cuda_graph = new_grpah