diff --git a/fastdeploy/engine/common_engine.py b/fastdeploy/engine/common_engine.py index ac1b92c9b..648617423 100644 --- a/fastdeploy/engine/common_engine.py +++ b/fastdeploy/engine/common_engine.py @@ -42,10 +42,16 @@ from fastdeploy.inter_communicator import ( from fastdeploy.metrics.metrics import main_process_metrics from fastdeploy.metrics.trace_util import start_span, start_span_request from fastdeploy.model_executor.guided_decoding import schema_checker -from fastdeploy.output.token_processor import TokenProcessor +from fastdeploy.plugins.token_processor import load_token_processor_plugins from fastdeploy.splitwise.splitwise_connector import SplitwiseConnector from fastdeploy.utils import EngineError, envs, llm_logger +try: + TokenProcessor = load_token_processor_plugins() + llm_logger.info(f"TokenProcessor plugin {TokenProcessor} loaded") +except: + from fastdeploy.output.token_processor import TokenProcessor + class EngineService: """ diff --git a/fastdeploy/plugins/__init__.py b/fastdeploy/plugins/__init__.py index 5972f1b4a..08c292296 100644 --- a/fastdeploy/plugins/__init__.py +++ b/fastdeploy/plugins/__init__.py @@ -18,10 +18,12 @@ from .input_processor import load_input_processor_plugins from .model_register import load_model_register_plugins from .model_runner import load_model_runner_plugins from .reasoning_parser import load_reasoning_parser_plugins +from .token_processor import load_token_processor_plugins __all__ = [ "load_model_register_plugins", "load_model_runner_plugins", "load_input_processor_plugins", "load_reasoning_parser_plugins", + "load_token_processor_plugins", ] diff --git a/fastdeploy/plugins/token_processor/__init__.py b/fastdeploy/plugins/token_processor/__init__.py new file mode 100644 index 000000000..2b5c76b7a --- /dev/null +++ b/fastdeploy/plugins/token_processor/__init__.py @@ -0,0 +1,27 @@ +""" +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" + +from fastdeploy.plugins.utils import load_plugins_by_group + +# make sure one process only loads plugins once +PLUGINS_GROUP = "fastdeploy.token_processor_plugins" + + +def load_token_processor_plugins(): + """load_token_processor_plugins""" + plugins = load_plugins_by_group(group=PLUGINS_GROUP) + assert len(plugins) <= 1, "Most one plugin is allowed to be loaded." + return next(iter(plugins.values()))()