mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
113 lines
3.4 KiB
Python
113 lines
3.4 KiB
Python
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
from typing import ClassVar, Literal, Protocol, Type
|
|
|
|
import paddle
|
|
from paddle import nn
|
|
from typing_extensions import TypeVar, runtime_checkable
|
|
|
|
from fastdeploy.config import FDConfig
|
|
from fastdeploy.model_executor.forward_meta import ForwardMeta
|
|
from fastdeploy.model_executor.layers.pooler import Pooler
|
|
|
|
T = TypeVar("T", default=paddle.Tensor)
|
|
T_co = TypeVar("T_co", default=paddle.Tensor, covariant=True)
|
|
|
|
|
|
def is_text_generation_model(model_cls: Type[nn.Layer]) -> bool:
|
|
from .model_base import ModelForCasualLM
|
|
|
|
return issubclass(model_cls, ModelForCasualLM)
|
|
|
|
|
|
def is_pooling_model(model_cls: Type[nn.Layer]) -> bool:
|
|
return getattr(model_cls, "is_pooling_model", False)
|
|
|
|
|
|
def is_multimodal_model(class_name: str) -> bool:
|
|
multimodal_indicators = ["VL", "Vision", "ConditionalGeneration"]
|
|
return any(indicator in class_name for indicator in multimodal_indicators)
|
|
|
|
|
|
def determine_model_category(class_name: str):
|
|
from fastdeploy.model_executor.models.model_base import ModelCategory
|
|
|
|
if any(pattern in class_name for pattern in ["VL", "Vision", "ConditionalGeneration"]):
|
|
return ModelCategory.MULTIMODAL
|
|
elif any(pattern in class_name for pattern in ["Embedding", "ForSequenceClassification"]):
|
|
return ModelCategory.EMBEDDING
|
|
elif any(pattern in class_name for pattern in ["Reward"]):
|
|
return ModelCategory.REWARD
|
|
return ModelCategory.TEXT_GENERATION
|
|
|
|
|
|
def get_default_pooling_type(model_cls: Type[nn.Layer] = None) -> str:
|
|
if model_cls is not None:
|
|
return getattr(model_cls, "default_pooling_type", "LAST")
|
|
return "LAST"
|
|
|
|
|
|
@runtime_checkable
|
|
class FdModel(Protocol[T_co]):
|
|
"""The interface required for all models in FastDeploy."""
|
|
|
|
def __init__(
|
|
self,
|
|
fd_config: FDConfig,
|
|
prefix: str = "",
|
|
) -> None:
|
|
pass
|
|
|
|
def forward(
|
|
self,
|
|
ids_remove_padding: paddle.Tensor,
|
|
forward_metadata: ForwardMeta,
|
|
) -> T_co:
|
|
pass
|
|
|
|
|
|
class FdModelForPooling(FdModel[T_co], Protocol[T_co]):
|
|
"""The interface required for all pooling models in FastDeploy."""
|
|
|
|
is_pooling_model: ClassVar[Literal[True]] = True
|
|
"""
|
|
A flag that indicates this model supports pooling.
|
|
|
|
Note:
|
|
There is no need to redefine this flag if this class is in the
|
|
MRO of your model class.
|
|
"""
|
|
|
|
default_pooling_type: ClassVar[str] = "LAST"
|
|
"""
|
|
Indicates the
|
|
[fastdeploy.config.PoolerConfig.pooling_type][]
|
|
to use by default.
|
|
|
|
You can use the
|
|
[fastdeploy.model_executor.models.interfaces_base.default_pooling_type][]
|
|
decorator to conveniently set this field.
|
|
"""
|
|
pooler: Pooler
|
|
"""The pooler is only called on TP rank 0."""
|
|
|
|
|
|
def default_pooling_type(pooling_type: str):
|
|
def func(model):
|
|
model.default_pooling_type = pooling_type # type: ignore
|
|
return model
|
|
|
|
return func
|