mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 00:06:38 +08:00
custom all reduce support cuda graph (#2938)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
* Support enabling cuda graph and custom all reduce at the same time, and fix the overwritten custom all reduce flag * rename communication_op to communication
This commit is contained in:
@@ -22,6 +22,7 @@ from paddle.device.cuda import graphs
|
||||
|
||||
from fastdeploy.config import FDConfig
|
||||
from fastdeploy.utils import get_logger
|
||||
from fastdeploy.distributed.communication import capture_custom_allreduce
|
||||
|
||||
logger = get_logger("cudagrpah_piecewise_backend", "cudagraph_piecewise_backend.log")
|
||||
|
||||
@@ -109,9 +110,11 @@ class CudaGraphPiecewiseBackend:
|
||||
paddle.device.synchronize()
|
||||
|
||||
# Capture
|
||||
new_grpah.capture_begin()
|
||||
output = entry.runnable(**kwargs)
|
||||
new_grpah.capture_end()
|
||||
with capture_custom_allreduce():
|
||||
new_grpah.capture_begin()
|
||||
output = entry.runnable(**kwargs)
|
||||
new_grpah.capture_end()
|
||||
|
||||
|
||||
# Store output buffer
|
||||
entry.cuda_graph = new_grpah
|
||||
|
Reference in New Issue
Block a user