[Graph Optimization] Add the CUDAGraph usage switch for Draft Model (#4601)

* add draft model using cudagraph switch

* set default as false

* capture draft model in ci

* fix bug
This commit is contained in:
RAM
2025-10-30 11:44:50 +08:00
committed by GitHub
parent cfdd1600a5
commit cd3b7cc392
4 changed files with 29 additions and 22 deletions

View File

@@ -823,6 +823,8 @@ class GraphOptimizationConfig:
self.real_shape_to_captured_size: dict[int, int] = None
""" Whether to use shared memory pool for multi capture_size """
self.use_unique_memory_pool: bool = True
""" Whether to use cudagraph for draft model."""
self.draft_model_use_cudagraph: bool = False
# CINN Config ...
if args is not None: