[CUDAGraph] Support multi output buffers and merge some fixes from feature/exp_0908 (#4062)

* refine cudagraph

* refine cudagraph

* typo

* fix

* fix plugins

* fix

* update

* update

* update
This commit is contained in:
Yuanle Liu
2025-09-15 16:21:30 +08:00
committed by GitHub
parent 9409665713
commit b1b33211e8
8 changed files with 70 additions and 45 deletions

View File

@@ -14,6 +14,8 @@
# limitations under the License.
"""
from fastdeploy.plugins import load_reasoning_parser_plugins
from .abs_reasoning_parsers import ReasoningParser, ReasoningParserManager
from .ernie_vl_reasoning_parsers import ErnieVLReasoningParser
from .ernie_x1_reasoning_parsers import ErnieX1ReasoningParser
@@ -26,3 +28,5 @@ __all__ = [
"Qwen3ReasoningParser",
"ErnieX1ReasoningParser",
]
load_reasoning_parser_plugins()

View File

@@ -1,14 +1,6 @@
"""
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
#
from collections.abc import Sequence
from typing import Tuple, Union
from fastdeploy.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
from fastdeploy.reasoning import ReasoningParser, ReasoningParserManager
#
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
@@ -20,6 +12,13 @@ from fastdeploy.reasoning import ReasoningParser, ReasoningParserManager
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
from collections.abc import Sequence
from typing import Tuple, Union
from fastdeploy.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
from fastdeploy.reasoning import ReasoningParser, ReasoningParserManager
@ReasoningParserManager.register_module("ernie_x1")