Files
FastDeploy/fastdeploy/model_executor/layers/activation.py
lizexu123 c86945ef49 [Feature] support pool (#3827)
* support pool

* update pooling

* add pooler_config and check

* update

* support AutoWeightsLoader load weight

* fix

* update

* delete print

* update pre-commit

* fix

* fix xpu

* fix ModelRegistry->model_registry

* fix Copilot review

* fix pooler.py

* delete StepPooler

* fix abstract

* fix default_loader_v1

* fix Pre Commit

* support torch qwen3 dense

* add test and fix torch-qwen

* fix

* fix

* adapter ci:

* fix review

* fix pooling_params.py

* fix

* fix tasks.py 2025

* fix print and logger

* Modefy ModelRegistry and delete AutoWeightsLoader

* fix logger

* fix test_embedding

* fix ci bug

* ernie4_5 model_registry

* fix test

* support Qwen3-Embedding-0.6B tp=1 load

* fix extra code

* fix

* delete fix vocab_size

* delete prepare_params_dict

* fix:
2025-09-22 14:09:09 +08:00

172 lines
5.8 KiB
Python

"""
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
from typing import Optional
import paddle
from paddle import nn
from paddle.incubate.nn.functional import fused_bias_act, swiglu
from fastdeploy.config import FDConfig
from fastdeploy.platforms import current_platform
class SiluAndMul(nn.Layer):
"""
SiluAndMul Layer
"""
def __init__(
self,
fd_config: FDConfig,
bias: paddle.Tensor = None,
act_method: str = "gelu",
dequant_scales: Optional[paddle.Tensor] = None,
shift: Optional[paddle.Tensor] = None,
smooth: Optional[paddle.Tensor] = None,
quant_scale: float = -1,
):
"""
Initialize the activation layer with optional parameters for quantization, bias,
activation method, and more.
Args:
fd_config (Any): Arguments related to inference, including quantization
settings.
bias (Optional[Tensor]): Optional bias term to be added to the output.
act_method (str): Activation method to be applied. Defaults to "gelu".
dequant_scales (Optional[Tensor]): Dequantization scales, used in
quantization scenarios.
shift (Optional[Tensor]): Shift factor, used in quantization scenarios.
smooth (Optional[Tensor]): Smoothing factor, used for specific activation
functions.
quant_scale (float, optional): Quantization scale, used in quantization
scenarios. Defaults to -1, indicating no quantization.
Raises:
ValueError: If the default data type is not supported (only float32, float16,
and bfloat16 are supported).
"""
super().__init__()
if (
current_platform.is_cuda()
or current_platform.is_xpu()
or current_platform.is_iluvatar()
or current_platform.is_dcu()
or current_platform.is_maca()
):
self.forward = self.forward_cuda
elif current_platform.is_gcu():
self.forward = self.forward_gcu
else:
raise NotImplementedError
self.bias = bias
act_method = act_method.lower()
if act_method == "silu":
act_method = "swiglu"
self.act_method = act_method
self.dequant_scales = dequant_scales
self.shift = shift
self.smooth = smooth
self.quant_scale = quant_scale
self.quant_round_type = fd_config.quant_config.quant_round_type if fd_config.quant_config else 0
self.quant_max_bound = fd_config.quant_config.quant_max_bound if fd_config.quant_config else 0
self.quant_min_bound = fd_config.quant_config.quant_min_bound if fd_config.quant_config else 0
self._dtype = self._helper.get_default_dtype()
if self._dtype == "bfloat16":
self._fuse_kernel_compute_dtype = "bf16"
elif self._dtype == "float16":
self._fuse_kernel_compute_dtype = "fp16"
elif self._dtype == "float32":
self._fuse_kernel_compute_dtype = "fp32"
else:
raise ValueError(
f"Just support float32, float16 and \
bfloat16 as default dtype, but received {self._dtype}"
)
# fp8 is not support smooth quantization
if fd_config.quant_config and "fp8" in fd_config.quant_config.name():
self.dequant_scales = None
self.shift = None
self.smooth = None
def forward_cuda(self, x: paddle.Tensor) -> paddle.Tensor:
"""
Forward propagation of the custom activation layer.
Args:
x (Tensor): Input tensor to the activation layer.
Returns:
Tensor: Output tensor.
"""
return fused_bias_act(
x,
bias=self.bias,
act_method=self.act_method,
compute_dtype=self._fuse_kernel_compute_dtype,
dequant_scales=self.dequant_scales,
shift=self.shift,
smooth=self.smooth,
quant_scale=self.quant_scale,
quant_round_type=self.quant_round_type,
quant_max_bound=self.quant_max_bound,
quant_min_bound=self.quant_min_bound,
)
def forward_gcu(self, x):
"""
Forward propagation of the custom activation layer.
Args:
x (Tensor): Input tensor to the activation layer.
Returns:
Tensor: Output tensor.
"""
out = swiglu(x)
if self.bias is not None:
out = out + self.bias
return out
def get_act_fn(act_fn_name: str) -> nn.Layer:
"""Get an activation function by name."""
act_fn_name = act_fn_name.lower()
if act_fn_name.startswith("paddle.nn.Layer"):
activation_name = act_fn_name.split(".")[-1]
if activation_name == "identity":
return nn.Identity()
act_fn_name = activation_name
activation_map = {
"gelu": nn.GELU(),
"relu": nn.ReLU(),
"silu": nn.Silu(),
"tanh": nn.Tanh(),
"sigmoid": nn.Sigmoid(),
}
if act_fn_name in activation_map:
return activation_map[act_fn_name]
else:
raise ValueError(f"Activation function {act_fn_name!r} is not supported.")