custom all reduce support cuda graph (#2938)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled

* Support enabling cuda graph and custom all reduce at the same time, and fix the overwritten custom all reduce flag

* rename communication_op to communication
This commit is contained in:
zhink
2025-07-21 22:52:03 +08:00
committed by GitHub
parent ff4569f135
commit 0262ef7eb3
21 changed files with 88 additions and 51 deletions

View File

@@ -22,6 +22,7 @@ from paddle.device.cuda import graphs
from fastdeploy.config import FDConfig
from fastdeploy.utils import get_logger
from fastdeploy.distributed.communication import capture_custom_allreduce
logger = get_logger("cudagrpah_piecewise_backend", "cudagraph_piecewise_backend.log")
@@ -109,9 +110,11 @@ class CudaGraphPiecewiseBackend:
paddle.device.synchronize()
# Capture
new_grpah.capture_begin()
output = entry.runnable(**kwargs)
new_grpah.capture_end()
with capture_custom_allreduce():
new_grpah.capture_begin()
output = entry.runnable(**kwargs)
new_grpah.capture_end()
# Store output buffer
entry.cuda_graph = new_grpah