[Others] remove add_bias option (#5425)

This commit is contained in:
Haonan Luo
2025-12-09 17:39:35 +08:00
committed by GitHub
parent 1f63000ef9
commit e397c4fba6
5 changed files with 6 additions and 30 deletions

View File

@@ -97,7 +97,6 @@ class LinearBase(nn.Layer):
input_size: int = None,
output_size: int = None,
with_bias: bool = False,
add_bias: bool = False,
skip_quant: bool = False,
weight_dtype: str = "",
weight_key: str = "",
@@ -112,7 +111,6 @@ class LinearBase(nn.Layer):
input_size (int): Number of input features. Defaults to None.
output_size (int): Number of output features. Defaults to None.
with_bias (bool): Whether to include bias or not. Defaults to False.
add_bias (bool): Whether to add bias in the current layer or in the pre/post layer. Defaults to False.
skip_quant (bool): Whether to skip quantization. Defaults to False.
Raises:
@@ -137,7 +135,6 @@ class LinearBase(nn.Layer):
self.input_size = input_size
self.output_size = output_size
self.with_bias = with_bias
self.add_bias = add_bias
self.prefix = prefix
self.is_quantized = fd_config.model_config.is_quantized and not (
fd_config.quant_config.name() == "mix_quant" and fd_config.quant_config.dense_quant_type is None
@@ -270,7 +267,6 @@ class ReplicatedLinear(LinearBase):
input_size: int = None,
output_size: int = None,
with_bias: bool = False,
add_bias: bool = False,
skip_quant: bool = False,
weight_dtype: str = "",
weight_key: str = "",
@@ -286,7 +282,6 @@ class ReplicatedLinear(LinearBase):
input_size (int): Number of input features. Defaults to None.
output_size (int): Number of output features. Defaults to None.
with_bias (bool): Whether to include bias or not. Defaults to False.
add_bias (bool): Whether to add bias in the current layer or in the pre/post layer. Defaults to False.
skip_quant (bool): Whether to skip quantization. Defaults to False.
"""
super().__init__(
@@ -295,7 +290,6 @@ class ReplicatedLinear(LinearBase):
input_size=input_size,
output_size=output_size,
with_bias=with_bias,
add_bias=add_bias,
skip_quant=skip_quant,
weight_dtype=weight_dtype,
weight_key=weight_key,
@@ -325,7 +319,6 @@ class MergedReplicatedLinear(ReplicatedLinear):
input_size: int = None,
output_sizes: list[int] = None,
with_bias: bool = False,
add_bias: bool = False,
skip_quant: bool = False,
weight_dtype: str = "",
weight_key: str = "",
@@ -339,7 +332,6 @@ class MergedReplicatedLinear(ReplicatedLinear):
input_size (int): Number of input features. Defaults to None.
output_sizes (list[int]): Number of output features list. Defaults to None.
with_bias (bool): Whether to include bias or not. Defaults to False.
add_bias (bool): Whether to add bias in the current layer or in the pre/post layer. Defaults to False.
skip_quant (bool): Whether to skip quantization. Defaults to False.
"""
super().__init__(
@@ -348,7 +340,6 @@ class MergedReplicatedLinear(ReplicatedLinear):
input_size=input_size,
output_size=sum(output_sizes),
with_bias=with_bias,
add_bias=add_bias,
skip_quant=skip_quant,
weight_dtype=weight_dtype,
weight_key=weight_key,
@@ -403,7 +394,6 @@ class ColumnParallelLinear(LinearBase):
input_size: int = None,
output_size: int = None,
with_bias: bool = False,
add_bias: bool = False,
skip_quant: bool = False,
weight_dtype: str = "",
):
@@ -417,7 +407,6 @@ class ColumnParallelLinear(LinearBase):
input_size (int): Number of input features. Defaults to None.
output_size (int): Number of output features. Defaults to None.
with_bias (bool): Whether to include bias or not. Defaults to False.
add_bias (bool): Whether to add bias in the current layer or in the pre/post layer. Defaults to False.
skip_quant (bool): Whether to skip quantization. Defaults to False.
"""
self.fd_config = fd_config
@@ -432,7 +421,6 @@ class ColumnParallelLinear(LinearBase):
input_size=self.input_size,
output_size=self.output_size,
with_bias=with_bias,
add_bias=add_bias,
skip_quant=skip_quant,
weight_dtype=weight_dtype,
)
@@ -480,7 +468,6 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
input_size: int = None,
output_size: int = None,
with_bias: bool = False,
add_bias: bool = False,
activation: str = "gelu",
skip_quant: bool = False,
):
@@ -494,7 +481,6 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
input_size (int): Number of input features. Defaults to None.
output_size (int): Number of output features. Defaults to None.
with_bias (bool): Whether to include bias or not. Defaults to False.
add_bias (bool): Whether to add bias in the current layer or in the pre/post layer. Defaults to False.
activation (str): Activation function to use. Defaults to "gelu".
skip_quant (bool): Whether to skip quantization. Defaults to False.
"""
@@ -510,7 +496,6 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
input_size=input_size,
output_size=output_size,
with_bias=with_bias,
add_bias=add_bias,
skip_quant=skip_quant,
)
@@ -617,7 +602,6 @@ class QKVParallelLinear(ColumnParallelLinear):
fd_config,
prefix,
with_bias=False,
add_bias=True,
num_heads: Optional[int] = None,
kv_num_heads: Optional[int] = None,
hidden_size: Optional[int] = None,
@@ -633,7 +617,6 @@ class QKVParallelLinear(ColumnParallelLinear):
prefix (str): Unique name of the layer, used to name internal attributes.
Can be arbitrarily named.
with_bias (bool): Whether to include bias or not. Defaults to False.
add_bias (bool): Whether to add bias in the current layer or in the pre/post layer. Defaults to True.
num_heads (Optional[int]): Number of attention heads in the model.
kv_num_heads (Optional[int]): Number of key/value heads, used for multi-query or grouped-query attention.
hidden_size (Optional[int]): Total hidden layer dimension, typically the embedding size.
@@ -661,7 +644,6 @@ class QKVParallelLinear(ColumnParallelLinear):
input_size=input_size,
output_size=output_size,
with_bias=with_bias,
add_bias=add_bias,
skip_quant=skip_quant,
weight_dtype=weight_dtype,
)
@@ -831,7 +813,6 @@ class RowParallelLinear(LinearBase):
input_size: int = None,
output_size: int = None,
with_bias: bool = False,
add_bias: bool = False,
reduce_results: bool = True,
skip_quant: bool = False,
weight_dtype: str = "",
@@ -847,7 +828,6 @@ class RowParallelLinear(LinearBase):
input_size (int): Number of input features. Defaults to None.
output_size (int): Number of output features. Defaults to None.
with_bias (bool): Whether to include bias or not. Defaults to False.
add_bias (bool): Whether to add bias in the current layer or in the pre/post layer. Defaults to False.
skip_quant (bool): Whether to skip quantization. Defaults to False.
"""
self.fd_config = fd_config
@@ -875,7 +855,6 @@ class RowParallelLinear(LinearBase):
input_size=self.input_size,
output_size=self.output_size,
with_bias=with_bias,
add_bias=add_bias,
skip_quant=skip_quant,
weight_dtype=weight_dtype,
)
@@ -896,10 +875,8 @@ class RowParallelLinear(LinearBase):
self.reduce_results = reduce_results and not self.split_token
if add_bias:
assert with_bias, "with_bias must be True when add_bias is True."
if self.tp_size > 1 and self.reduce_results:
set_weight_attrs(self.bias, {"tp_row_bias": True})
if self.with_bias and self.tp_size > 1 and self.reduce_results:
set_weight_attrs(self.bias, {"tp_row_bias": True})
def all2all_transpose(self, x: paddle.Tensor) -> paddle.Tensor:
token_num = x.shape[0]

View File

@@ -107,7 +107,7 @@ class W4AFP8LinearMethod(QuantMethodBase):
layer.weight,
layer.weight_scale,
zero_points=None,
bias=layer.bias if layer.add_bias else None,
bias=layer.bias if layer.with_bias else None,
out_scale=self.quant_config.weight_scale_dict.get(layer.prefix + ".weight_scale")
/ (
self.quant_config.act_scale_dict.get(layer.prefix + ".activation_scale")

View File

@@ -361,7 +361,7 @@ class WeightOnlyLinearMethod(QuantMethodBase):
linear_out = weight_only_linear(
x,
weight=layer.weight,
bias=layer.bias if layer.add_bias else None,
bias=layer.bias if layer.with_bias else None,
weight_scale=layer.weight_scale,
weight_dtype=("int8" if self.quant_config.name() == "wint8" else "int4"),
arch=self.quant_config.weight_only_linear_arch,

View File

@@ -67,7 +67,6 @@ class GptOssAttention(nn.Layer):
input_size=self.num_attention_heads * self.head_dim,
output_size=self.hidden_size,
with_bias=True,
add_bias=True,
)
self.attn = Attention(

View File

@@ -41,7 +41,7 @@ class TestW4AFP8(unittest.TestCase):
self.layer.weight_shape = [8, 4]
self.layer.create_parameter.return_value = "created_weight"
self.layer.bias = "bias"
self.layer.add_bias = True
self.layer.with_bias = True
self.layer._dtype = "float16"
self.layer.prefix = "layer"
@@ -138,7 +138,7 @@ class TestW4AFP8(unittest.TestCase):
@mock.patch("fastdeploy.model_executor.ops.gpu.scaled_gemm_f8_i4_f16")
def test_apply_without_bias(self, mock_gemm):
self.layer.add_bias = False
self.layer.with_bias = False
mock_gemm.return_value = "out"
x = "x"