|
|
|
|
@@ -97,7 +97,6 @@ class LinearBase(nn.Layer):
|
|
|
|
|
input_size: int = None,
|
|
|
|
|
output_size: int = None,
|
|
|
|
|
with_bias: bool = False,
|
|
|
|
|
add_bias: bool = False,
|
|
|
|
|
skip_quant: bool = False,
|
|
|
|
|
weight_dtype: str = "",
|
|
|
|
|
weight_key: str = "",
|
|
|
|
|
@@ -112,7 +111,6 @@ class LinearBase(nn.Layer):
|
|
|
|
|
input_size (int): Number of input features. Defaults to None.
|
|
|
|
|
output_size (int): Number of output features. Defaults to None.
|
|
|
|
|
with_bias (bool): Whether to include bias or not. Defaults to False.
|
|
|
|
|
add_bias (bool): Whether to add bias in the current layer or in the pre/post layer. Defaults to False.
|
|
|
|
|
skip_quant (bool): Whether to skip quantization. Defaults to False.
|
|
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
|
@@ -137,7 +135,6 @@ class LinearBase(nn.Layer):
|
|
|
|
|
self.input_size = input_size
|
|
|
|
|
self.output_size = output_size
|
|
|
|
|
self.with_bias = with_bias
|
|
|
|
|
self.add_bias = add_bias
|
|
|
|
|
self.prefix = prefix
|
|
|
|
|
self.is_quantized = fd_config.model_config.is_quantized and not (
|
|
|
|
|
fd_config.quant_config.name() == "mix_quant" and fd_config.quant_config.dense_quant_type is None
|
|
|
|
|
@@ -270,7 +267,6 @@ class ReplicatedLinear(LinearBase):
|
|
|
|
|
input_size: int = None,
|
|
|
|
|
output_size: int = None,
|
|
|
|
|
with_bias: bool = False,
|
|
|
|
|
add_bias: bool = False,
|
|
|
|
|
skip_quant: bool = False,
|
|
|
|
|
weight_dtype: str = "",
|
|
|
|
|
weight_key: str = "",
|
|
|
|
|
@@ -286,7 +282,6 @@ class ReplicatedLinear(LinearBase):
|
|
|
|
|
input_size (int): Number of input features. Defaults to None.
|
|
|
|
|
output_size (int): Number of output features. Defaults to None.
|
|
|
|
|
with_bias (bool): Whether to include bias or not. Defaults to False.
|
|
|
|
|
add_bias (bool): Whether to add bias in the current layer or in the pre/post layer. Defaults to False.
|
|
|
|
|
skip_quant (bool): Whether to skip quantization. Defaults to False.
|
|
|
|
|
"""
|
|
|
|
|
super().__init__(
|
|
|
|
|
@@ -295,7 +290,6 @@ class ReplicatedLinear(LinearBase):
|
|
|
|
|
input_size=input_size,
|
|
|
|
|
output_size=output_size,
|
|
|
|
|
with_bias=with_bias,
|
|
|
|
|
add_bias=add_bias,
|
|
|
|
|
skip_quant=skip_quant,
|
|
|
|
|
weight_dtype=weight_dtype,
|
|
|
|
|
weight_key=weight_key,
|
|
|
|
|
@@ -325,7 +319,6 @@ class MergedReplicatedLinear(ReplicatedLinear):
|
|
|
|
|
input_size: int = None,
|
|
|
|
|
output_sizes: list[int] = None,
|
|
|
|
|
with_bias: bool = False,
|
|
|
|
|
add_bias: bool = False,
|
|
|
|
|
skip_quant: bool = False,
|
|
|
|
|
weight_dtype: str = "",
|
|
|
|
|
weight_key: str = "",
|
|
|
|
|
@@ -339,7 +332,6 @@ class MergedReplicatedLinear(ReplicatedLinear):
|
|
|
|
|
input_size (int): Number of input features. Defaults to None.
|
|
|
|
|
output_sizes (list[int]): Number of output features list. Defaults to None.
|
|
|
|
|
with_bias (bool): Whether to include bias or not. Defaults to False.
|
|
|
|
|
add_bias (bool): Whether to add bias in the current layer or in the pre/post layer. Defaults to False.
|
|
|
|
|
skip_quant (bool): Whether to skip quantization. Defaults to False.
|
|
|
|
|
"""
|
|
|
|
|
super().__init__(
|
|
|
|
|
@@ -348,7 +340,6 @@ class MergedReplicatedLinear(ReplicatedLinear):
|
|
|
|
|
input_size=input_size,
|
|
|
|
|
output_size=sum(output_sizes),
|
|
|
|
|
with_bias=with_bias,
|
|
|
|
|
add_bias=add_bias,
|
|
|
|
|
skip_quant=skip_quant,
|
|
|
|
|
weight_dtype=weight_dtype,
|
|
|
|
|
weight_key=weight_key,
|
|
|
|
|
@@ -403,7 +394,6 @@ class ColumnParallelLinear(LinearBase):
|
|
|
|
|
input_size: int = None,
|
|
|
|
|
output_size: int = None,
|
|
|
|
|
with_bias: bool = False,
|
|
|
|
|
add_bias: bool = False,
|
|
|
|
|
skip_quant: bool = False,
|
|
|
|
|
weight_dtype: str = "",
|
|
|
|
|
):
|
|
|
|
|
@@ -417,7 +407,6 @@ class ColumnParallelLinear(LinearBase):
|
|
|
|
|
input_size (int): Number of input features. Defaults to None.
|
|
|
|
|
output_size (int): Number of output features. Defaults to None.
|
|
|
|
|
with_bias (bool): Whether to include bias or not. Defaults to False.
|
|
|
|
|
add_bias (bool): Whether to add bias in the current layer or in the pre/post layer. Defaults to False.
|
|
|
|
|
skip_quant (bool): Whether to skip quantization. Defaults to False.
|
|
|
|
|
"""
|
|
|
|
|
self.fd_config = fd_config
|
|
|
|
|
@@ -432,7 +421,6 @@ class ColumnParallelLinear(LinearBase):
|
|
|
|
|
input_size=self.input_size,
|
|
|
|
|
output_size=self.output_size,
|
|
|
|
|
with_bias=with_bias,
|
|
|
|
|
add_bias=add_bias,
|
|
|
|
|
skip_quant=skip_quant,
|
|
|
|
|
weight_dtype=weight_dtype,
|
|
|
|
|
)
|
|
|
|
|
@@ -480,7 +468,6 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
|
|
|
|
|
input_size: int = None,
|
|
|
|
|
output_size: int = None,
|
|
|
|
|
with_bias: bool = False,
|
|
|
|
|
add_bias: bool = False,
|
|
|
|
|
activation: str = "gelu",
|
|
|
|
|
skip_quant: bool = False,
|
|
|
|
|
):
|
|
|
|
|
@@ -494,7 +481,6 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
|
|
|
|
|
input_size (int): Number of input features. Defaults to None.
|
|
|
|
|
output_size (int): Number of output features. Defaults to None.
|
|
|
|
|
with_bias (bool): Whether to include bias or not. Defaults to False.
|
|
|
|
|
add_bias (bool): Whether to add bias in the current layer or in the pre/post layer. Defaults to False.
|
|
|
|
|
activation (str): Activation function to use. Defaults to "gelu".
|
|
|
|
|
skip_quant (bool): Whether to skip quantization. Defaults to False.
|
|
|
|
|
"""
|
|
|
|
|
@@ -510,7 +496,6 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
|
|
|
|
|
input_size=input_size,
|
|
|
|
|
output_size=output_size,
|
|
|
|
|
with_bias=with_bias,
|
|
|
|
|
add_bias=add_bias,
|
|
|
|
|
skip_quant=skip_quant,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
@@ -617,7 +602,6 @@ class QKVParallelLinear(ColumnParallelLinear):
|
|
|
|
|
fd_config,
|
|
|
|
|
prefix,
|
|
|
|
|
with_bias=False,
|
|
|
|
|
add_bias=True,
|
|
|
|
|
num_heads: Optional[int] = None,
|
|
|
|
|
kv_num_heads: Optional[int] = None,
|
|
|
|
|
hidden_size: Optional[int] = None,
|
|
|
|
|
@@ -633,7 +617,6 @@ class QKVParallelLinear(ColumnParallelLinear):
|
|
|
|
|
prefix (str): Unique name of the layer, used to name internal attributes.
|
|
|
|
|
Can be arbitrarily named.
|
|
|
|
|
with_bias (bool): Whether to include bias or not. Defaults to False.
|
|
|
|
|
add_bias (bool): Whether to add bias in the current layer or in the pre/post layer. Defaults to True.
|
|
|
|
|
num_heads (Optional[int]): Number of attention heads in the model.
|
|
|
|
|
kv_num_heads (Optional[int]): Number of key/value heads, used for multi-query or grouped-query attention.
|
|
|
|
|
hidden_size (Optional[int]): Total hidden layer dimension, typically the embedding size.
|
|
|
|
|
@@ -661,7 +644,6 @@ class QKVParallelLinear(ColumnParallelLinear):
|
|
|
|
|
input_size=input_size,
|
|
|
|
|
output_size=output_size,
|
|
|
|
|
with_bias=with_bias,
|
|
|
|
|
add_bias=add_bias,
|
|
|
|
|
skip_quant=skip_quant,
|
|
|
|
|
weight_dtype=weight_dtype,
|
|
|
|
|
)
|
|
|
|
|
@@ -831,7 +813,6 @@ class RowParallelLinear(LinearBase):
|
|
|
|
|
input_size: int = None,
|
|
|
|
|
output_size: int = None,
|
|
|
|
|
with_bias: bool = False,
|
|
|
|
|
add_bias: bool = False,
|
|
|
|
|
reduce_results: bool = True,
|
|
|
|
|
skip_quant: bool = False,
|
|
|
|
|
weight_dtype: str = "",
|
|
|
|
|
@@ -847,7 +828,6 @@ class RowParallelLinear(LinearBase):
|
|
|
|
|
input_size (int): Number of input features. Defaults to None.
|
|
|
|
|
output_size (int): Number of output features. Defaults to None.
|
|
|
|
|
with_bias (bool): Whether to include bias or not. Defaults to False.
|
|
|
|
|
add_bias (bool): Whether to add bias in the current layer or in the pre/post layer. Defaults to False.
|
|
|
|
|
skip_quant (bool): Whether to skip quantization. Defaults to False.
|
|
|
|
|
"""
|
|
|
|
|
self.fd_config = fd_config
|
|
|
|
|
@@ -875,7 +855,6 @@ class RowParallelLinear(LinearBase):
|
|
|
|
|
input_size=self.input_size,
|
|
|
|
|
output_size=self.output_size,
|
|
|
|
|
with_bias=with_bias,
|
|
|
|
|
add_bias=add_bias,
|
|
|
|
|
skip_quant=skip_quant,
|
|
|
|
|
weight_dtype=weight_dtype,
|
|
|
|
|
)
|
|
|
|
|
@@ -896,10 +875,8 @@ class RowParallelLinear(LinearBase):
|
|
|
|
|
|
|
|
|
|
self.reduce_results = reduce_results and not self.split_token
|
|
|
|
|
|
|
|
|
|
if add_bias:
|
|
|
|
|
assert with_bias, "with_bias must be True when add_bias is True."
|
|
|
|
|
if self.tp_size > 1 and self.reduce_results:
|
|
|
|
|
set_weight_attrs(self.bias, {"tp_row_bias": True})
|
|
|
|
|
if self.with_bias and self.tp_size > 1 and self.reduce_results:
|
|
|
|
|
set_weight_attrs(self.bias, {"tp_row_bias": True})
|
|
|
|
|
|
|
|
|
|
def all2all_transpose(self, x: paddle.Tensor) -> paddle.Tensor:
|
|
|
|
|
token_num = x.shape[0]
|
|
|
|
|
|