[Graph Optimization] Refactor default capture list (#4617)

* fix bug and refine code * add debug count * refine code
2025-12-24 13:28:13 +08:00 · 2025-10-28 21:31:02 +08:00
parent 0a0c74e717
commit fff5fb5e39
3 changed files with 36 additions and 15 deletions
--- a/fastdeploy/model_executor/graph_optimization/cudagraph_piecewise_backend.py
+++ b/fastdeploy/model_executor/graph_optimization/cudagraph_piecewise_backend.py
@@ -171,7 +171,7 @@ class CudaGraphPiecewiseBackend:
            for n in range(entry.num_finished_warmup, self.warm_up_size):
                entry.num_finished_warmup += 1
                entry.runnable(**kwargs)
-                logger.debug(
+                logger.info(
                    f"[CUDA GRAPH][ID:{id(self)}] Warm up for real shape {padding_real_shape}, "
                    f"finished ({n + 1}/{entry.num_finished_warmup}) times"
                )
@@ -207,7 +207,7 @@ class CudaGraphPiecewiseBackend:

            # For CUDAGraph debug
            # self._save_cudagrpah_dot_files(entry)
-            logger.debug(f"[CUDA GRAPH][ID:{id(self)}] CUDAGraph captured for real shape {padding_real_shape}")
+            logger.info(f"[CUDA GRAPH][ID:{id(self)}] CUDAGraph captured for real shape {padding_real_shape}")

        # Replay
        entry.cuda_graph.replay()
@@ -224,7 +224,7 @@ class CudaGraphPiecewiseBackend:
        for shape in self.cudagraph_capture_sizes:
            self.concrete_size_entries[shape] = ConcreteSizeEntry(real_shape=shape)

-        logger.debug(
+        logger.info(
            f"[CUDA GRAPH][ID:{id(self)}] CUDAGraph capture list {self.cudagraph_capture_sizes}, "
            "Created all real shape entry."
        )
@@ -254,3 +254,9 @@ class CudaGraphPiecewiseBackend:
                f"{log_dir}/GraphDotFiles/backend{id(self)}_shape{entry.real_shape}",
                1 << 0,
            )
+
+    def check_capture_successful(self):
+        """Check whether the shapes are captured or not"""
+        for shape, entry in self.concrete_size_entries.items():
+            if not entry.captured:
+                raise ValueError(f"[CUDA GRAPH][ID:{id(self)}] Shape {shape} capture failed.")
--- a/fastdeploy/model_executor/graph_optimization/graph_optimization_backend.py
+++ b/fastdeploy/model_executor/graph_optimization/graph_optimization_backend.py
@@ -34,6 +34,10 @@ from fastdeploy.model_executor.graph_optimization.utils import in_profile_run_mo
 from fastdeploy.model_executor.graph_optimization.utils import (
    in_sot_warmup_mode as in_warmup_mode,
 )
+from fastdeploy.utils import get_logger
+
+logger = get_logger("cudagrpah_piecewise_backend", "cudagraph_piecewise_backend.log")
+

 P = ParamSpec("P")
 T = TypeVar("T")
@@ -105,6 +109,9 @@ class GraphOptBackend:
        self.dy_runnable = self.runnable
        self.fd_config = fd_config
        self.max_captre_size = fd_config.graph_opt_config.cudagraph_capture_sizes[0]
+        self._debug_count_cudagraph_replay = 0
+        self._debug_count_total_step = 0
+
        if self.fd_config.graph_opt_config.graph_opt_level > 0:
            # 1. Prepare cuda graph input buffers (contain output of subgraphs)

@@ -123,6 +130,7 @@ class GraphOptBackend:
        )

    def __call__(self, **kwargs):
+        self._debug_count_total_step += 1
        if not self.fd_config.graph_opt_config.use_cudagraph:
            return self.runnable(**kwargs)
        if self.cudagraph_piecewise_backend is None:
@@ -136,6 +144,10 @@ class GraphOptBackend:
        if (not kwargs["forward_meta"].step_use_cudagraph) or (real_shape > self.cudagraph_switch_threshold):
            return self.dy_runnable(**kwargs)
        else:
+            self._debug_count_cudagraph_replay += 1
+            logger.debug(
+                f"[CUDA GRAPH][ID:{id(self.cudagraph_piecewise_backend)}] Total step count: {self._debug_count_total_step}, CUDAGraph replay count: {self._debug_count_cudagraph_replay}"
+            )
            return self.cudagraph_piecewise_backend.__call__(**kwargs)

    def clear_cudagraph_piecewise_backend(self):