[CUDAGraph] Support multi output buffers and merge some fixes from feature/exp_0908 (#4062)

* refine cudagraph

* refine cudagraph

* typo

* fix

* fix plugins

* fix

* update

* update

* update
This commit is contained in:
Yuanle Liu
2025-09-15 16:21:30 +08:00
committed by GitHub
parent 9409665713
commit b1b33211e8
8 changed files with 70 additions and 45 deletions

View File

@@ -23,5 +23,5 @@ PLUGINS_GROUP = "fastdeploy.input_processor_plugins"
def load_input_processor_plugins():
"""load_input_processor_plugins"""
plugins = load_plugins_by_group(group=PLUGINS_GROUP)
assert len(plugins) <= 1, "Most one plugin is allowed to be loaded."
assert len(plugins) == 1, "Only one plugin is allowed to be loaded."
return next(iter(plugins.values()))()

View File

@@ -14,7 +14,7 @@
# limitations under the License.
"""
from fastdeploy.plugins.utils import load_plugins_by_group, plugins_loaded
from fastdeploy.plugins.utils import load_plugins_by_group
# use for modle runner
PLUGINS_GROUP = "fastdeploy.model_runner_plugins"
@@ -22,11 +22,6 @@ PLUGINS_GROUP = "fastdeploy.model_runner_plugins"
def load_model_runner_plugins():
"""load_model_runner_plugins"""
global plugins_loaded
if plugins_loaded:
return
plugins_loaded = True
plugins = load_plugins_by_group(group=PLUGINS_GROUP)
assert len(plugins) <= 1, "Most one plugin is allowed to be loaded."
assert len(plugins) == 1, "Only one plugin is allowed to be loaded."
return next(iter(plugins.values()))()

View File

@@ -14,7 +14,7 @@
# limitations under the License.
"""
from fastdeploy.plugins.utils import load_plugins_by_group
from fastdeploy.plugins.utils import load_plugins_by_group, plugins_loaded
# make sure one process only loads plugins once
PLUGINS_GROUP = "fastdeploy.reasoning_parser_plugins"
@@ -22,6 +22,12 @@ PLUGINS_GROUP = "fastdeploy.reasoning_parser_plugins"
def load_reasoning_parser_plugins():
"""load_reasoning_parser_plugins"""
global plugins_loaded
if plugins_loaded:
return
plugins_loaded = True
plugins = load_plugins_by_group(group=PLUGINS_GROUP)
assert len(plugins) <= 1, "Most one plugin is allowed to be loaded."
return next(iter(plugins.values()))()
# general plugins, we only need to execute the loaded functions
for func in plugins.values():
func()