mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-09-26 20:41:53 +08:00
@@ -32,7 +32,7 @@ class ConcreteSizeEntry:
|
||||
"""Record the concrete information corresponding to the current shape(num_tokens)"""
|
||||
|
||||
# Concrete shape
|
||||
runtime_bs: int
|
||||
real_shape: int
|
||||
# The size is in cudagraph_capture_sizes
|
||||
use_cudagraph: bool = True
|
||||
# Has runtime-bs been captured before
|
||||
@@ -114,6 +114,9 @@ class CudaGraphPiecewiseBackend:
|
||||
output._clear
|
||||
|
||||
paddle.device.synchronize()
|
||||
|
||||
# For CUDAGraph debug
|
||||
# self._save_cudagrpah_dot_files(entry)
|
||||
logger.debug(f"[CUDA GRAPH] CUDAGraph captured for real shape {padding_real_shape}")
|
||||
|
||||
# Replay
|
||||
@@ -127,7 +130,7 @@ class CudaGraphPiecewiseBackend:
|
||||
self.concrete_size_entries: Dict[int, ConcreteSizeEntry] = {}
|
||||
|
||||
for shape in self.cudagraph_capture_sizes:
|
||||
self.concrete_size_entries[shape] = ConcreteSizeEntry(runtime_bs=shape)
|
||||
self.concrete_size_entries[shape] = ConcreteSizeEntry(real_shape=shape)
|
||||
|
||||
logger.info(
|
||||
f"[CUDA GRAPH] CUDAGraph capture list {self.cudagraph_capture_sizes}, " "Created all real shape entry."
|
||||
@@ -146,3 +149,11 @@ class CudaGraphPiecewiseBackend:
|
||||
|
||||
# Create new entrys
|
||||
self._create_entry_dict()
|
||||
|
||||
def _save_cudagrpah_dot_files(self, entry):
|
||||
"""Print CUDAGrpah to dot files"""
|
||||
if entry.cuda_graph:
|
||||
entry.cuda_graph.print_to_dot_files(
|
||||
f"./log/GraphDotFiles/backend{id(self)}_shape{entry.real_shape}",
|
||||
1 << 0,
|
||||
)
|
||||
|
Reference in New Issue
Block a user