[Executor] CUDA Graph support padding batch (#2844)

* cuda graph support padding batch

* Integrate the startup parameters for the graph optimization backend and provide support for user - defined capture sizes.

* Do not insert max_num_seqs when the user specifies a capture list

* Support set graph optimization config from YAML file

* update cuda graph ci

* fix ci bug

* fix ci bug
This commit is contained in:
RAM
2025-07-16 10:49:01 +08:00
committed by GitHub
parent 61b3997b85
commit 0fad10b35a
30 changed files with 291 additions and 225 deletions

View File

@@ -68,16 +68,20 @@ class CudaGraphPiecewiseBackend:
self.concrete_size_entries[shape] = ConcreteSizeEntry(
runtime_bs=shape)
logger.debug("[CUDA GRAPH] Created all batch size entry ")
logger.info(
f"[CUDA GRAPH] CUDAGraph capture list {self.cudagraph_capture_sizes}, "
"Created all batch sizes entry."
)
def __call__(self, **kwargs):
# Get batch size
ids_remove_padding: paddle.Tensor = kwargs["ids_remove_padding"]
batch_size = ids_remove_padding.shape[0]
padding_batch_size = self.batch_size_to_captured_size[batch_size]
logger.debug((
f"[CUDA GRAPH] The actual batch size obtained by CUDAGraph is :{batch_size}, ",
f"The padded batch size is :{padding_batch_size}"))
logger.debug(
f"[CUDA GRAPH] The actual batch size obtained by CUDAGraph is :{batch_size}, "
f"The padded batch size is :{padding_batch_size}"
)
entry = self.concrete_size_entries.get(padding_batch_size)
assert entry is not None, f"Batch size:{padding_batch_size} is not in cuda graph capture list."
@@ -96,10 +100,10 @@ class CudaGraphPiecewiseBackend:
for n in range(entry.num_finished_warmup, self.warm_up_size):
entry.num_finished_warmup += 1
entry.runnable(**kwargs)
logger.debug((
"[CUDA GRAPH] Warm up for batch size ",
f"{padding_batch_size}, finished ({n+1}/{entry.num_finished_warmup}) times"
))
logger.debug(
f"[CUDA GRAPH] Warm up for batch size {padding_batch_size}, "
f"finished ({n+1}/{entry.num_finished_warmup}) times"
)
# Store input addresses for debug
input_addresses = [