mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 09:07:10 +08:00

* support pool * update pooling * add pooler_config and check * update * support AutoWeightsLoader load weight * fix * update * delete print * update pre-commit * fix * fix xpu * fix ModelRegistry->model_registry * fix Copilot review * fix pooler.py * delete StepPooler * fix abstract * fix default_loader_v1 * fix Pre Commit * support torch qwen3 dense * add test and fix torch-qwen * fix * fix * adapter ci: * fix review * fix pooling_params.py * fix * fix tasks.py 2025 * fix print and logger * Modefy ModelRegistry and delete AutoWeightsLoader * fix logger * fix test_embedding * fix ci bug * ernie4_5 model_registry * fix test * support Qwen3-Embedding-0.6B tp=1 load * fix extra code * fix * delete fix vocab_size * delete prepare_params_dict * fix:
172 lines
5.8 KiB
Python
172 lines
5.8 KiB
Python
"""
|
|
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""
|
|
|
|
from typing import Optional
|
|
|
|
import paddle
|
|
from paddle import nn
|
|
from paddle.incubate.nn.functional import fused_bias_act, swiglu
|
|
|
|
from fastdeploy.config import FDConfig
|
|
from fastdeploy.platforms import current_platform
|
|
|
|
|
|
class SiluAndMul(nn.Layer):
|
|
"""
|
|
SiluAndMul Layer
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
fd_config: FDConfig,
|
|
bias: paddle.Tensor = None,
|
|
act_method: str = "gelu",
|
|
dequant_scales: Optional[paddle.Tensor] = None,
|
|
shift: Optional[paddle.Tensor] = None,
|
|
smooth: Optional[paddle.Tensor] = None,
|
|
quant_scale: float = -1,
|
|
):
|
|
"""
|
|
Initialize the activation layer with optional parameters for quantization, bias,
|
|
activation method, and more.
|
|
|
|
Args:
|
|
fd_config (Any): Arguments related to inference, including quantization
|
|
settings.
|
|
bias (Optional[Tensor]): Optional bias term to be added to the output.
|
|
act_method (str): Activation method to be applied. Defaults to "gelu".
|
|
dequant_scales (Optional[Tensor]): Dequantization scales, used in
|
|
quantization scenarios.
|
|
shift (Optional[Tensor]): Shift factor, used in quantization scenarios.
|
|
smooth (Optional[Tensor]): Smoothing factor, used for specific activation
|
|
functions.
|
|
quant_scale (float, optional): Quantization scale, used in quantization
|
|
scenarios. Defaults to -1, indicating no quantization.
|
|
|
|
Raises:
|
|
ValueError: If the default data type is not supported (only float32, float16,
|
|
and bfloat16 are supported).
|
|
"""
|
|
super().__init__()
|
|
|
|
if (
|
|
current_platform.is_cuda()
|
|
or current_platform.is_xpu()
|
|
or current_platform.is_iluvatar()
|
|
or current_platform.is_dcu()
|
|
or current_platform.is_maca()
|
|
):
|
|
self.forward = self.forward_cuda
|
|
elif current_platform.is_gcu():
|
|
self.forward = self.forward_gcu
|
|
else:
|
|
raise NotImplementedError
|
|
|
|
self.bias = bias
|
|
act_method = act_method.lower()
|
|
if act_method == "silu":
|
|
act_method = "swiglu"
|
|
|
|
self.act_method = act_method
|
|
self.dequant_scales = dequant_scales
|
|
self.shift = shift
|
|
self.smooth = smooth
|
|
self.quant_scale = quant_scale
|
|
self.quant_round_type = fd_config.quant_config.quant_round_type if fd_config.quant_config else 0
|
|
self.quant_max_bound = fd_config.quant_config.quant_max_bound if fd_config.quant_config else 0
|
|
self.quant_min_bound = fd_config.quant_config.quant_min_bound if fd_config.quant_config else 0
|
|
|
|
self._dtype = self._helper.get_default_dtype()
|
|
if self._dtype == "bfloat16":
|
|
self._fuse_kernel_compute_dtype = "bf16"
|
|
elif self._dtype == "float16":
|
|
self._fuse_kernel_compute_dtype = "fp16"
|
|
elif self._dtype == "float32":
|
|
self._fuse_kernel_compute_dtype = "fp32"
|
|
else:
|
|
raise ValueError(
|
|
f"Just support float32, float16 and \
|
|
bfloat16 as default dtype, but received {self._dtype}"
|
|
)
|
|
|
|
# fp8 is not support smooth quantization
|
|
if fd_config.quant_config and "fp8" in fd_config.quant_config.name():
|
|
self.dequant_scales = None
|
|
self.shift = None
|
|
self.smooth = None
|
|
|
|
def forward_cuda(self, x: paddle.Tensor) -> paddle.Tensor:
|
|
"""
|
|
Forward propagation of the custom activation layer.
|
|
|
|
Args:
|
|
x (Tensor): Input tensor to the activation layer.
|
|
|
|
Returns:
|
|
Tensor: Output tensor.
|
|
"""
|
|
return fused_bias_act(
|
|
x,
|
|
bias=self.bias,
|
|
act_method=self.act_method,
|
|
compute_dtype=self._fuse_kernel_compute_dtype,
|
|
dequant_scales=self.dequant_scales,
|
|
shift=self.shift,
|
|
smooth=self.smooth,
|
|
quant_scale=self.quant_scale,
|
|
quant_round_type=self.quant_round_type,
|
|
quant_max_bound=self.quant_max_bound,
|
|
quant_min_bound=self.quant_min_bound,
|
|
)
|
|
|
|
def forward_gcu(self, x):
|
|
"""
|
|
Forward propagation of the custom activation layer.
|
|
|
|
Args:
|
|
x (Tensor): Input tensor to the activation layer.
|
|
|
|
Returns:
|
|
Tensor: Output tensor.
|
|
"""
|
|
out = swiglu(x)
|
|
if self.bias is not None:
|
|
out = out + self.bias
|
|
return out
|
|
|
|
|
|
def get_act_fn(act_fn_name: str) -> nn.Layer:
|
|
"""Get an activation function by name."""
|
|
act_fn_name = act_fn_name.lower()
|
|
|
|
if act_fn_name.startswith("paddle.nn.Layer"):
|
|
activation_name = act_fn_name.split(".")[-1]
|
|
if activation_name == "identity":
|
|
return nn.Identity()
|
|
act_fn_name = activation_name
|
|
|
|
activation_map = {
|
|
"gelu": nn.GELU(),
|
|
"relu": nn.ReLU(),
|
|
"silu": nn.Silu(),
|
|
"tanh": nn.Tanh(),
|
|
"sigmoid": nn.Sigmoid(),
|
|
}
|
|
if act_fn_name in activation_map:
|
|
return activation_map[act_fn_name]
|
|
else:
|
|
raise ValueError(f"Activation function {act_fn_name!r} is not supported.")
|