mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-09-27 21:02:24 +08:00
Clear dead code And supplementary notes (#2757)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
* 1.supplementary notes 2.delete dead code * fix bug of forward meta * Global modification of forward meta * fix vl model_runner bug
This commit is contained in:
@@ -46,13 +46,9 @@ class ConcreteSizeEntry:
|
||||
# Output buffer of cudagraph
|
||||
output_buffer: Optional[paddle.Tensor] = None
|
||||
|
||||
# for cudagraph debugging, track the input addresses
|
||||
# during capture, and check if they are the same during replay
|
||||
input_addresses: Optional[list[int]] = None
|
||||
|
||||
|
||||
class CudaGraphPiecewiseBackend:
|
||||
""" """
|
||||
""" Manage the capture and replay of CUDA graphs at the subgraph level. """
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -65,33 +61,31 @@ class CudaGraphPiecewiseBackend:
|
||||
self.warm_up_size = fd_config.graph_opt_config.cudagraph_num_of_warmups
|
||||
self.batch_size_to_captured_size = fd_config.graph_opt_config.batch_size_to_captured_size
|
||||
|
||||
# runtime_bs -> ConcreteSizeEntry
|
||||
# Runtime batch size -> ConcreteSizeEntry
|
||||
self.concrete_size_entries: Dict[int, ConcreteSizeEntry] = {}
|
||||
|
||||
for shape in self.cudagraph_capture_sizes:
|
||||
self.concrete_size_entries[shape] = ConcreteSizeEntry(
|
||||
runtime_bs=shape)
|
||||
|
||||
print("[CUDA GRAPH] Created all batch size entry ")
|
||||
logger.debug("[CUDA GRAPH] Created all batch size entry ")
|
||||
|
||||
def __call__(self, **kwargs):
|
||||
# Get batch size
|
||||
ids_remove_padding: paddle.Tensor = kwargs["ids_remove_padding"]
|
||||
batch_size = ids_remove_padding.shape[0]
|
||||
|
||||
padding_batch_size = self.batch_size_to_captured_size[batch_size]
|
||||
# print(
|
||||
# f"[CUDA GRAPH] The actual batch size obtained by CUDAGraph is :{batch_size}, ",
|
||||
# f"The padded batch size is :{padding_batch_size}"
|
||||
# )
|
||||
logger.debug(
|
||||
f"[CUDA GRAPH] The actual batch size obtained by CUDAGraph is :{batch_size}, ",
|
||||
f"The padded batch size is :{padding_batch_size}")
|
||||
|
||||
entry = self.concrete_size_entries.get(padding_batch_size)
|
||||
assert entry is not None, f"Batch size:{padding_batch_size} is not in cuda graph capture list."
|
||||
if entry.runnable is None:
|
||||
entry.runnable = self.runnable
|
||||
# print(
|
||||
# f"[CUDA GRAPH] New entry lazy initialize with batch size {padding_batch_size}"
|
||||
# )
|
||||
logger.debug(
|
||||
f"[CUDA GRAPH] New entry lazy initialize with batch size {padding_batch_size}"
|
||||
)
|
||||
|
||||
if not entry.use_cudagraph:
|
||||
return entry.runnable(**kwargs)
|
||||
@@ -102,10 +96,10 @@ class CudaGraphPiecewiseBackend:
|
||||
for n in range(entry.num_finished_warmup, self.warm_up_size):
|
||||
entry.num_finished_warmup += 1
|
||||
entry.runnable(**kwargs)
|
||||
# print(
|
||||
# "[CUDA GRAPH] Warm up for batch size ",
|
||||
# f"{padding_batch_size}, finished ({n+1}/{entry.num_finished_warmup}) times"
|
||||
# )
|
||||
logger.debug(
|
||||
"[CUDA GRAPH] Warm up for batch size ",
|
||||
f"{padding_batch_size}, finished ({n+1}/{entry.num_finished_warmup}) times"
|
||||
)
|
||||
|
||||
# Store input addresses for debug
|
||||
input_addresses = [
|
||||
@@ -129,11 +123,13 @@ class CudaGraphPiecewiseBackend:
|
||||
output._clear
|
||||
|
||||
paddle.device.synchronize()
|
||||
# print(
|
||||
# f"[CUDA GRAPH] CUDAGraph captured for batch size {padding_batch_size}"
|
||||
# )
|
||||
logger.debug(
|
||||
f"[CUDA GRAPH] CUDAGraph captured for batch size {padding_batch_size}"
|
||||
)
|
||||
|
||||
# Replay
|
||||
entry.cuda_graph.replay()
|
||||
# print(f"[CUDA GRAPH] CUDAGraph replayed for batch size {padding_batch_size}")
|
||||
logger.debug(
|
||||
f"[CUDA GRAPH] CUDAGraph replayed for batch size {padding_batch_size}"
|
||||
)
|
||||
return entry.output_buffer
|
||||
|
Reference in New Issue
Block a user