From fa85956c6f8be88d8606043133bb7d9654700e79 Mon Sep 17 00:00:00 2001
From: gongshaotian <gstian5555@outlook.com>
Date: Mon, 27 Oct 2025 14:56:34 +0800
Subject: [PATCH] add draft model using cudagraph switch

---
 fastdeploy/config.py          | 3 +++
 fastdeploy/spec_decode/mtp.py | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/fastdeploy/config.py b/fastdeploy/config.py
index 00aa5bdb8..3d950119f 100644
--- a/fastdeploy/config.py
+++ b/fastdeploy/config.py
@@ -591,6 +591,9 @@ class GraphOptimizationConfig:
         """ Whether to use shared memory pool for multi capture_size """
         self.use_unique_memory_pool: bool = False
 
+        """ Whether to use cudagraph for draft model."""
+        self.draft_model_use_cudagraph: bool = True
+
         self.max_capture_size: int = None
         self.real_shape_to_captured_size: dict[int, int] = None
         # CINN Config ...
diff --git a/fastdeploy/spec_decode/mtp.py b/fastdeploy/spec_decode/mtp.py
index 79a084625..d4d3fc9f5 100644
--- a/fastdeploy/spec_decode/mtp.py
+++ b/fastdeploy/spec_decode/mtp.py
@@ -83,6 +83,7 @@ class MTPProposer(Proposer):
         self._init_model_inputs()
 
         # CUDA Graph
+        self.draft_model_use_cudagraph = self.graph_opt_config.draft_model_use_cudagraph
         self.cudagraph_capture_sizes = list(reversed(self.graph_opt_config.cudagraph_capture_sizes))
         self.sot_warmup_sizes = self.graph_opt_config.sot_warmup_sizes
 
@@ -618,7 +619,7 @@ class MTPProposer(Proposer):
             attn_backend.init_attention_metadata(self.forward_meta)
 
         # TODO(gongshaotian): Use CUDAGraph with Draft Model
-        self.forward_meta.step_use_cudagraph = step_use_cudagraph
+        self.forward_meta.step_use_cudagraph = step_use_cudagraph and self.draft_model_use_cudagraph
 
     def exist_prefill(self):
         """