mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-12 20:11:20 +08:00
[SOT] Add sot warmup (NVIDIA GPU Only) (#2929)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
* add sot warmup * fix code style * change batch_size list * add param to config * rm free_list settings && set sot_warmup_sizes * finish debug with dynamic dims by type annotations * add profile_run guard * rm sth useless
This commit is contained in:
@@ -31,26 +31,15 @@ from fastdeploy.model_executor.graph_optimization.cudagraph_piecewise_backend im
|
||||
from fastdeploy.model_executor.graph_optimization.dynamic_dims_marker import (
|
||||
resolve_dynamic_dims,
|
||||
)
|
||||
from fastdeploy.model_executor.graph_optimization.utils import in_profile_run_mode
|
||||
from fastdeploy.model_executor.graph_optimization.utils import (
|
||||
in_sot_warmup_mode as in_warmup_mode,
|
||||
)
|
||||
|
||||
P = ParamSpec("P")
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
# TODO(SigureMo): Replace this fn with real implementation by DrRyanHuang
|
||||
def create_in_warmup_mode():
|
||||
cnt = 0
|
||||
|
||||
def in_warmup_mode():
|
||||
nonlocal cnt
|
||||
cnt += 1
|
||||
return cnt < 32
|
||||
|
||||
return in_warmup_mode
|
||||
|
||||
|
||||
in_warmup_mode = create_in_warmup_mode()
|
||||
|
||||
|
||||
def apply_to_static_optimization(fn: Callable[P, T], backend: ToStaticBackend) -> Callable[P, T]:
|
||||
forward_fn = fn
|
||||
forward_sig = inspect.signature(forward_fn)
|
||||
@@ -99,6 +88,8 @@ def apply_to_static_optimization(fn: Callable[P, T], backend: ToStaticBackend) -
|
||||
|
||||
@functools.wraps(forward_fn)
|
||||
def static_forward(self, *args, **kwargs):
|
||||
if in_profile_run_mode():
|
||||
return forward_fn(self, *args, **kwargs)
|
||||
nonlocal need_warmup
|
||||
is_warmup = in_warmup_mode() and need_warmup
|
||||
if is_warmup:
|
||||
|
40
fastdeploy/model_executor/graph_optimization/utils.py
Normal file
40
fastdeploy/model_executor/graph_optimization/utils.py
Normal file
@@ -0,0 +1,40 @@
|
||||
"""
|
||||
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
|
||||
import contextlib
|
||||
|
||||
|
||||
def create_guard(default_value):
|
||||
_state = default_value
|
||||
|
||||
@contextlib.contextmanager
|
||||
def state_guard(current_state):
|
||||
nonlocal _state
|
||||
old_state = _state
|
||||
_state = current_state
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
_state = old_state
|
||||
|
||||
def get_state():
|
||||
return _state
|
||||
|
||||
return state_guard, get_state
|
||||
|
||||
|
||||
sot_warmup_guard, in_sot_warmup_mode = create_guard(False)
|
||||
profile_run_guard, in_profile_run_mode = create_guard(False)
|
Reference in New Issue
Block a user