[SOT] Add sot warmup (NVIDIA GPU Only) (#2929)

* add sot warmup * fix code style * change batch_size list * add param to config * rm free_list settings && set sot_warmup_sizes * finish debug with dynamic dims by type annotations * add profile_run guard * rm sth useless
2025-10-12 20:11:20 +08:00 · 2025-07-22 21:36:14 +08:00
parent 7c5e34e72d
commit 95b5af24db
7 changed files with 71 additions and 18 deletions
--- a/fastdeploy/model_executor/graph_optimization/graph_optimization_backend.py
+++ b/fastdeploy/model_executor/graph_optimization/graph_optimization_backend.py
@@ -31,26 +31,15 @@ from fastdeploy.model_executor.graph_optimization.cudagraph_piecewise_backend im
 from fastdeploy.model_executor.graph_optimization.dynamic_dims_marker import (
    resolve_dynamic_dims,
 )
+from fastdeploy.model_executor.graph_optimization.utils import in_profile_run_mode
+from fastdeploy.model_executor.graph_optimization.utils import (
+    in_sot_warmup_mode as in_warmup_mode,
+)

 P = ParamSpec("P")
 T = TypeVar("T")


-# TODO(SigureMo): Replace this fn with real implementation by DrRyanHuang
-def create_in_warmup_mode():
-    cnt = 0
-
-    def in_warmup_mode():
-        nonlocal cnt
-        cnt += 1
-        return cnt < 32
-
-    return in_warmup_mode
-
-
-in_warmup_mode = create_in_warmup_mode()
-
-
 def apply_to_static_optimization(fn: Callable[P, T], backend: ToStaticBackend) -> Callable[P, T]:
    forward_fn = fn
    forward_sig = inspect.signature(forward_fn)
@@ -99,6 +88,8 @@ def apply_to_static_optimization(fn: Callable[P, T], backend: ToStaticBackend) -

    @functools.wraps(forward_fn)
    def static_forward(self, *args, **kwargs):
+        if in_profile_run_mode():
+            return forward_fn(self, *args, **kwargs)
        nonlocal need_warmup
        is_warmup = in_warmup_mode() and need_warmup
        if is_warmup:
--- a/fastdeploy/model_executor/graph_optimization/utils.py
+++ b/fastdeploy/model_executor/graph_optimization/utils.py
@@ -0,0 +1,40 @@
+"""
+# Copyright (c) 2025  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+import contextlib
+
+
+def create_guard(default_value):
+    _state = default_value
+
+    @contextlib.contextmanager
+    def state_guard(current_state):
+        nonlocal _state
+        old_state = _state
+        _state = current_state
+        try:
+            yield
+        finally:
+            _state = old_state
+
+    def get_state():
+        return _state
+
+    return state_guard, get_state
+
+
+sot_warmup_guard, in_sot_warmup_mode = create_guard(False)
+profile_run_guard, in_profile_run_mode = create_guard(False)