[plugin] Custom model_runner/model support (#3186)

* support custom model&&model_runner

* fix merge

* add test && update doc

* fix codestyle

* fix unittest

* load model in rl
This commit is contained in:
lizhenyun01
2025-08-05 09:52:39 +08:00
committed by GitHub
parent 72ef5a9c93
commit fe540f6caa
15 changed files with 150 additions and 13 deletions

View File

@@ -20,12 +20,23 @@ Assuming you have a custom model class `MyModelForCasualLM` and a pretrained cla
# File: fd_add_dummy_model/__init__.py or fd_add_dummy_model/register.py
from fastdeploy.model_registry import ModelRegistry
from my_custom_model import MyModelForCasualLM, MyPretrainedModel
from fastdeploy.config import ErnieArchitectures
def register():
if "MyModelForCasualLM" not in ModelRegistry.get_supported_archs():
if MyModelForCasualLM.name().startswith("Ernie"):
ErnieArchitectures.register_ernie_model_arch(MyModelForCasualLM)
ModelRegistry.register_model_class(MyModelForCasualLM)
ModelRegistry.register_pretrained_model(MyPretrainedModel)
```
Assuming you have a custom model_runner class `MyModelRunner`, you can write the following registration function:
```python
# File: fd_add_dummy_model_runner/__init__.py
from .my_model_runner import MyModelRunner
def get_runner():
return MyModelRunner
```
#### 2. Register Plugin in `setup.py`
@@ -36,11 +47,14 @@ from setuptools import setup
setup(
name="fastdeploy-plugins",
version="0.1",
packages=["fd_add_dummy_model"],
packages=["fd_add_dummy_model", "fd_add_dummy_model_runner"],
entry_points={
"fastdeploy.model_register_plugins": [
"fd_add_dummy_model = fd_add_dummy_model:register",
],
"fastdeploy.model_runner_plugins": [
"model_runner = fd_add_dummy_model:get_runner"
],
},
)
```

View File

@@ -63,6 +63,11 @@ class ErnieArchitectures:
"Ernie4_5_VLMoeForConditionalGeneration",
}
@classmethod
def register_ernie_model_arch(cls, model_class):
if model_class.name().startswith("Ernie") and model_class.name() not in cls.ARCHITECTURES:
cls.ARCHITECTURES.add(model_class.name())
@classmethod
def contains_ernie_arch(cls, architectures):
"""Check if any ERNIE architecture is present in the given architectures."""

View File

@@ -28,6 +28,7 @@ from tqdm import tqdm
from fastdeploy.engine.args_utils import EngineArgs
from fastdeploy.engine.engine import LLMEngine
from fastdeploy.engine.sampling_params import SamplingParams
from fastdeploy.plugins.model_register import load_model_register_plugins
from fastdeploy.utils import (
deprecated_kwargs_warning,
llm_logger,
@@ -76,6 +77,7 @@ class LLM:
):
deprecated_kwargs_warning(**kwargs)
load_model_register_plugins()
model = retrive_model_from_server(model, revision)
engine_args = EngineArgs(
model=model,

View File

@@ -46,6 +46,7 @@ from fastdeploy.metrics.metrics import (
main_process_metrics,
)
from fastdeploy.metrics.trace_util import inject_to_metadata, instrument
from fastdeploy.plugins.model_register import load_model_register_plugins
from fastdeploy.utils import (
FlexibleArgumentParser,
api_server_logger,
@@ -393,6 +394,7 @@ def launch_controller_server():
def main():
"""main函数"""
load_model_register_plugins()
if load_engine() is None:
return

View File

@@ -1,7 +1,7 @@
"""
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#

View File

@@ -48,6 +48,7 @@ class Attention(nn.Layer):
linear_shift: paddle.Tensor = None,
linear_smooth: paddle.Tensor = None,
use_neox_rotary_style: bool = False,
use_qk_norm: bool = False,
) -> None:
"""
Initializes `LMLayer` with the given parameters.

View File

@@ -64,9 +64,9 @@ class ModelRegistry:
@classmethod
def get_supported_archs(cls):
assert len(cls._arch_to_model_cls) == len(
cls._arch_to_model_cls
), "model class / pretrained model registry num is not same"
assert len(cls._arch_to_model_cls) >= len(
cls._arch_to_pretrained_model_cls
), "model class num is more than pretrained model registry num"
return [key for key in cls._arch_to_model_cls.keys()]

View File

@@ -28,5 +28,5 @@ def load_model_runner_plugins():
plugins_loaded = True
plugins = load_plugins_by_group(group=PLUGINS_GROUP)
assert len(plugins) == 1, "Only one plugin is allowed to be loaded."
return next(iter(plugins.values()))
assert len(plugins) <= 1, "Most one plugin is allowed to be loaded."
return next(iter(plugins.values()))()

View File

@@ -56,6 +56,9 @@ class RolloutModel(nn.Layer):
def _init_model(self) -> nn.Layer:
"""Load model from loader based on config."""
context = paddle.LazyGuard()
from fastdeploy.plugins.model_register import load_model_register_plugins
load_model_register_plugins()
architectures = f"{self.fd_config.model_config.architectures[0]}RL"
with context:
model_cls = ModelRegistry.get_class(architectures)

View File

@@ -26,13 +26,19 @@ from fastdeploy import envs
from fastdeploy.config import FDConfig
from fastdeploy.engine.request import Request
from fastdeploy.platforms import current_platform
from fastdeploy.plugins.model_runner import load_model_runner_plugins
from fastdeploy.utils import get_logger
from fastdeploy.worker.gpu_model_runner import GPUModelRunner
from fastdeploy.worker.model_runner_base import ModelRunnerBase
from fastdeploy.worker.output import ModelRunnerOutput
from fastdeploy.worker.worker_base import WorkerBase
logger = get_logger("gpu_worker", "gpu_worker.log")
try:
ModelRunner = load_model_runner_plugins()
except:
from fastdeploy.worker.gpu_model_runner import GPUModelRunner as ModelRunner
class GpuWorker(WorkerBase):
def __init__(
@@ -70,7 +76,7 @@ class GpuWorker(WorkerBase):
raise RuntimeError(f"Not support device type: {self.device_config.device}")
# Construct model runner
self.model_runner: GPUModelRunner = GPUModelRunner(
self.model_runner: ModelRunnerBase = ModelRunner(
fd_config=self.fd_config,
device=self.device,
device_id=self.device_ids[self.local_rank % self.max_chips_per_node],

View File

@@ -1,6 +1,21 @@
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddleformers.transformers import PretrainedModel
from fastdeploy import ModelRegistry
from fastdeploy.config import ErnieArchitectures
from fastdeploy.model_executor.models.model_base import ModelForCasualLM
@@ -31,5 +46,7 @@ class MyModelForCasualLM(ModelForCasualLM):
def register():
if "MyModelForCasualLM" not in ModelRegistry.get_supported_archs():
if MyModelForCasualLM.name().startswith("Ernie"):
ErnieArchitectures.register_ernie_model_arch(MyModelForCasualLM)
ModelRegistry.register_model_class(MyModelForCasualLM)
ModelRegistry.register_pretrained_model(MyPretrainedModel)

View File

@@ -0,0 +1,26 @@
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
class MyModelRunner:
def __init__(self, rank=0) -> None:
super().__init__()
self.rank = rank
def get_rank(self):
return self.rank
def get_runner():
return MyModelRunner

View File

@@ -1,15 +1,27 @@
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from setuptools import setup
setup(
name="fastdeploy-plugins",
version="0.1",
packages=["fd_add_dummy_model"],
packages=["fd_add_dummy_model", "fd_add_dummy_model_runner"],
entry_points={
"fastdeploy.model_register_plugins": [
"fd_add_dummy_model = fd_add_dummy_model:register",
],
# 'fastdeploy.model_runner_plugins': [
# "model_runner = model_runner:get_runner"
# ]
"fastdeploy.model_runner_plugins": ["fd_add_dummy_model_runner = fd_add_dummy_model_runner:get_runner"],
},
)

View File

@@ -1,3 +1,17 @@
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from fastdeploy import ModelRegistry

View File

@@ -0,0 +1,35 @@
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
from fastdeploy.plugins import load_model_runner_plugins
class TestModelRunnerRegistryPlugins(unittest.TestCase):
def test_model_runner_callable(self):
runner_class = load_model_runner_plugins()
device_id = 1
# create runner
runner = runner_class(device_id)
# test func
res = runner.get_rank()
self.assertEqual(res, device_id)
if __name__ == "__main__":
unittest.main()