Clear dead code And supplementary notes (#2757)

* 1.supplementary notes 2.delete dead code * fix bug of forward meta * Global modification of forward meta * fix vl model_runner bug
2025-09-27 21:02:24 +08:00 · 2025-07-09 16:17:34 +08:00
parent b89180f1cd
commit 03a74995b8
12 changed files with 248 additions and 463 deletions
--- a/fastdeploy/model_executor/graph_optimization/cudagraph_piecewise_backend.py
+++ b/fastdeploy/model_executor/graph_optimization/cudagraph_piecewise_backend.py
@@ -46,13 +46,9 @@ class ConcreteSizeEntry:
    # Output buffer of cudagraph
    output_buffer: Optional[paddle.Tensor] = None

-    # for cudagraph debugging, track the input addresses
-    # during capture, and check if they are the same during replay
-    input_addresses: Optional[list[int]] = None
-

 class CudaGraphPiecewiseBackend:
-    """ """
+    """ Manage the capture and replay of CUDA graphs at the subgraph level. """

    def __init__(
        self,
@@ -65,33 +61,31 @@ class CudaGraphPiecewiseBackend:
        self.warm_up_size = fd_config.graph_opt_config.cudagraph_num_of_warmups
        self.batch_size_to_captured_size = fd_config.graph_opt_config.batch_size_to_captured_size

-        # runtime_bs -> ConcreteSizeEntry
+        # Runtime batch size -> ConcreteSizeEntry
        self.concrete_size_entries: Dict[int, ConcreteSizeEntry] = {}

        for shape in self.cudagraph_capture_sizes:
            self.concrete_size_entries[shape] = ConcreteSizeEntry(
                runtime_bs=shape)

-        print("[CUDA GRAPH] Created all batch size entry ")
+        logger.debug("[CUDA GRAPH] Created all batch size entry ")

    def __call__(self, **kwargs):
        # Get batch size
        ids_remove_padding: paddle.Tensor = kwargs["ids_remove_padding"]
        batch_size = ids_remove_padding.shape[0]
-
        padding_batch_size = self.batch_size_to_captured_size[batch_size]
-        # print(
-        #     f"[CUDA GRAPH] The actual batch size obtained by CUDAGraph is :{batch_size}, ",
-        #     f"The padded batch size is :{padding_batch_size}"
-        # )
+        logger.debug(
+            f"[CUDA GRAPH] The actual batch size obtained by CUDAGraph is :{batch_size}, ",
+            f"The padded batch size is :{padding_batch_size}")

        entry = self.concrete_size_entries.get(padding_batch_size)
        assert entry is not None, f"Batch size:{padding_batch_size} is not in cuda graph capture list."
        if entry.runnable is None:
            entry.runnable = self.runnable
-            # print(
-            #     f"[CUDA GRAPH] New entry lazy initialize with batch size {padding_batch_size}"
-            # )
+            logger.debug(
+                f"[CUDA GRAPH] New entry lazy initialize with batch size {padding_batch_size}"
+            )

        if not entry.use_cudagraph:
            return entry.runnable(**kwargs)
@@ -102,10 +96,10 @@ class CudaGraphPiecewiseBackend:
            for n in range(entry.num_finished_warmup, self.warm_up_size):
                entry.num_finished_warmup += 1
                entry.runnable(**kwargs)
-                # print(
-                #     "[CUDA GRAPH] Warm up for batch size ",
-                #     f"{padding_batch_size}, finished ({n+1}/{entry.num_finished_warmup}) times"
-                # )
+                logger.debug(
+                    "[CUDA GRAPH] Warm up for batch size ",
+                    f"{padding_batch_size}, finished ({n+1}/{entry.num_finished_warmup}) times"
+                )

            # Store input addresses for debug
            input_addresses = [
@@ -129,11 +123,13 @@ class CudaGraphPiecewiseBackend:
            output._clear

            paddle.device.synchronize()
-            # print(
-            #     f"[CUDA GRAPH] CUDAGraph captured for batch size {padding_batch_size}"
-            # )
+            logger.debug(
+                f"[CUDA GRAPH] CUDAGraph captured for batch size {padding_batch_size}"
+            )

        # Replay
        entry.cuda_graph.replay()
-        # print(f"[CUDA GRAPH] CUDAGraph replayed for batch size {padding_batch_size}")
+        logger.debug(
+            f"[CUDA GRAPH] CUDAGraph replayed for batch size {padding_batch_size}"
+        )
        return entry.output_buffer