Clear dead code And supplementary notes (#2757)

* 1.supplementary notes 2.delete dead code * fix bug of forward meta * Global modification of forward meta * fix vl model_runner bug
2025-10-05 00:33:03 +08:00 · 2025-07-09 16:17:34 +08:00
parent b89180f1cd
commit 03a74995b8
12 changed files with 248 additions and 463 deletions
--- a/fastdeploy/model_executor/graph_optimization/cudagraph_piecewise_backend.py
+++ b/fastdeploy/model_executor/graph_optimization/cudagraph_piecewise_backend.py
@@ -46,13 +46,9 @@ class ConcreteSizeEntry:
    # Output buffer of cudagraph
    output_buffer: Optional[paddle.Tensor] = None

-    # for cudagraph debugging, track the input addresses
-    # during capture, and check if they are the same during replay
-    input_addresses: Optional[list[int]] = None
-

 class CudaGraphPiecewiseBackend:
-    """ """
+    """ Manage the capture and replay of CUDA graphs at the subgraph level. """

    def __init__(
        self,
@@ -65,33 +61,31 @@ class CudaGraphPiecewiseBackend:
        self.warm_up_size = fd_config.graph_opt_config.cudagraph_num_of_warmups
        self.batch_size_to_captured_size = fd_config.graph_opt_config.batch_size_to_captured_size

-        # runtime_bs -> ConcreteSizeEntry
+        # Runtime batch size -> ConcreteSizeEntry
        self.concrete_size_entries: Dict[int, ConcreteSizeEntry] = {}

        for shape in self.cudagraph_capture_sizes:
            self.concrete_size_entries[shape] = ConcreteSizeEntry(
                runtime_bs=shape)

-        print("[CUDA GRAPH] Created all batch size entry ")
+        logger.debug("[CUDA GRAPH] Created all batch size entry ")

    def __call__(self, **kwargs):
        # Get batch size
        ids_remove_padding: paddle.Tensor = kwargs["ids_remove_padding"]
        batch_size = ids_remove_padding.shape[0]
-
        padding_batch_size = self.batch_size_to_captured_size[batch_size]
-        # print(
-        #     f"[CUDA GRAPH] The actual batch size obtained by CUDAGraph is :{batch_size}, ",
-        #     f"The padded batch size is :{padding_batch_size}"
-        # )
+        logger.debug(
+            f"[CUDA GRAPH] The actual batch size obtained by CUDAGraph is :{batch_size}, ",
+            f"The padded batch size is :{padding_batch_size}")

        entry = self.concrete_size_entries.get(padding_batch_size)
        assert entry is not None, f"Batch size:{padding_batch_size} is not in cuda graph capture list."
        if entry.runnable is None:
            entry.runnable = self.runnable
-            # print(
-            #     f"[CUDA GRAPH] New entry lazy initialize with batch size {padding_batch_size}"
-            # )
+            logger.debug(
+                f"[CUDA GRAPH] New entry lazy initialize with batch size {padding_batch_size}"
+            )

        if not entry.use_cudagraph:
            return entry.runnable(**kwargs)
@@ -102,10 +96,10 @@ class CudaGraphPiecewiseBackend:
            for n in range(entry.num_finished_warmup, self.warm_up_size):
                entry.num_finished_warmup += 1
                entry.runnable(**kwargs)
-                # print(
-                #     "[CUDA GRAPH] Warm up for batch size ",
-                #     f"{padding_batch_size}, finished ({n+1}/{entry.num_finished_warmup}) times"
-                # )
+                logger.debug(
+                    "[CUDA GRAPH] Warm up for batch size ",
+                    f"{padding_batch_size}, finished ({n+1}/{entry.num_finished_warmup}) times"
+                )

            # Store input addresses for debug
            input_addresses = [
@@ -129,11 +123,13 @@ class CudaGraphPiecewiseBackend:
            output._clear

            paddle.device.synchronize()
-            # print(
-            #     f"[CUDA GRAPH] CUDAGraph captured for batch size {padding_batch_size}"
-            # )
+            logger.debug(
+                f"[CUDA GRAPH] CUDAGraph captured for batch size {padding_batch_size}"
+            )

        # Replay
        entry.cuda_graph.replay()
-        # print(f"[CUDA GRAPH] CUDAGraph replayed for batch size {padding_batch_size}")
+        logger.debug(
+            f"[CUDA GRAPH] CUDAGraph replayed for batch size {padding_batch_size}"
+        )
        return entry.output_buffer
--- a/fastdeploy/model_executor/graph_optimization/decorator.py
+++ b/fastdeploy/model_executor/graph_optimization/decorator.py
@@ -28,7 +28,7 @@ _T = TypeVar("_T", bound=type[paddle.nn.Layer])

 def support_graph_optimization(cls: Optional[_T] = None) -> _T:
    """
-    A decorator for wrapping models or layers with CUDA graph support.
+    A decorator for wrapping models or layers with static graph and CUDAGraph support.
    This enables efficient kernel launch sequencing for improved GPU performance.

    Example usage:
@@ -74,7 +74,7 @@ def support_graph_optimization(cls: Optional[_T] = None) -> _T:


 class GraphOptWrapper:
-    """ """
+    """ The wrapper for GraphOptBackend """

    def __init__(
        self,
@@ -87,7 +87,7 @@ class GraphOptWrapper:

    @abstractmethod
    def forward(self, **kwargs):
-        """ """
+        """ Abstract methods for implementing model.forward() """
        pass

    def __call__(self, **kwargs):
--- a/fastdeploy/model_executor/graph_optimization/graph_optimization_backend.py
+++ b/fastdeploy/model_executor/graph_optimization/graph_optimization_backend.py
@@ -24,7 +24,10 @@ from fastdeploy.model_executor.graph_optimization.cudagraph_piecewise_backend im


 class GraphOptBackend:
-    """ """
+    """
+    Integrated various graph optimization functions, including dynamic graph to static graph conversion,
+    CINN compilation optimization, CudaGraph, and so on.
+    """

    fd_config: FDConfig
    cudagraph_piecewise_backend: Optional[CudaGraphPiecewiseBackend] = None