mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[SOT] Add sot warmup (NVIDIA GPU Only) (#2929)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
* add sot warmup * fix code style * change batch_size list * add param to config * rm free_list settings && set sot_warmup_sizes * finish debug with dynamic dims by type annotations * add profile_run guard * rm sth useless
This commit is contained in:
@@ -31,26 +31,15 @@ from fastdeploy.model_executor.graph_optimization.cudagraph_piecewise_backend im
|
||||
from fastdeploy.model_executor.graph_optimization.dynamic_dims_marker import (
|
||||
resolve_dynamic_dims,
|
||||
)
|
||||
from fastdeploy.model_executor.graph_optimization.utils import in_profile_run_mode
|
||||
from fastdeploy.model_executor.graph_optimization.utils import (
|
||||
in_sot_warmup_mode as in_warmup_mode,
|
||||
)
|
||||
|
||||
P = ParamSpec("P")
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
# TODO(SigureMo): Replace this fn with real implementation by DrRyanHuang
|
||||
def create_in_warmup_mode():
|
||||
cnt = 0
|
||||
|
||||
def in_warmup_mode():
|
||||
nonlocal cnt
|
||||
cnt += 1
|
||||
return cnt < 32
|
||||
|
||||
return in_warmup_mode
|
||||
|
||||
|
||||
in_warmup_mode = create_in_warmup_mode()
|
||||
|
||||
|
||||
def apply_to_static_optimization(fn: Callable[P, T], backend: ToStaticBackend) -> Callable[P, T]:
|
||||
forward_fn = fn
|
||||
forward_sig = inspect.signature(forward_fn)
|
||||
@@ -99,6 +88,8 @@ def apply_to_static_optimization(fn: Callable[P, T], backend: ToStaticBackend) -
|
||||
|
||||
@functools.wraps(forward_fn)
|
||||
def static_forward(self, *args, **kwargs):
|
||||
if in_profile_run_mode():
|
||||
return forward_fn(self, *args, **kwargs)
|
||||
nonlocal need_warmup
|
||||
is_warmup = in_warmup_mode() and need_warmup
|
||||
if is_warmup:
|
||||
|
Reference in New Issue
Block a user