[SOT] Add sot warmup (NVIDIA GPU Only) (#2929)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled

* add sot warmup

* fix code style

* change batch_size list

* add param to config

* rm free_list settings && set sot_warmup_sizes

* finish debug with dynamic dims by type annotations

* add profile_run guard

* rm sth useless
This commit is contained in:
Ryan
2025-07-22 21:36:14 +08:00
committed by GitHub
parent 7c5e34e72d
commit 95b5af24db
7 changed files with 71 additions and 18 deletions

View File

@@ -31,26 +31,15 @@ from fastdeploy.model_executor.graph_optimization.cudagraph_piecewise_backend im
from fastdeploy.model_executor.graph_optimization.dynamic_dims_marker import (
resolve_dynamic_dims,
)
from fastdeploy.model_executor.graph_optimization.utils import in_profile_run_mode
from fastdeploy.model_executor.graph_optimization.utils import (
in_sot_warmup_mode as in_warmup_mode,
)
P = ParamSpec("P")
T = TypeVar("T")
# TODO(SigureMo): Replace this fn with real implementation by DrRyanHuang
def create_in_warmup_mode():
cnt = 0
def in_warmup_mode():
nonlocal cnt
cnt += 1
return cnt < 32
return in_warmup_mode
in_warmup_mode = create_in_warmup_mode()
def apply_to_static_optimization(fn: Callable[P, T], backend: ToStaticBackend) -> Callable[P, T]:
forward_fn = fn
forward_sig = inspect.signature(forward_fn)
@@ -99,6 +88,8 @@ def apply_to_static_optimization(fn: Callable[P, T], backend: ToStaticBackend) -
@functools.wraps(forward_fn)
def static_forward(self, *args, **kwargs):
if in_profile_run_mode():
return forward_fn(self, *args, **kwargs)
nonlocal need_warmup
is_warmup = in_warmup_mode() and need_warmup
if is_warmup: