From d40a1046def4c03f00d8f0860a835b6abb0c2742 Mon Sep 17 00:00:00 2001 From: lizhenyun01 <1500424927@qq.com> Date: Mon, 8 Sep 2025 16:20:32 +0800 Subject: [PATCH] [Feature] support rl_tp_degree (#3934) * [Feature] support rl_tp_degree * add rl_tp_degree in lmhead * add rl_tp_degree in bias * fix split_axis=0 in bias * fix split_axis in weight * fix bias rl_tp_degree * fix bias rl_tp_degree * change attr to dict --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com> --- fastdeploy/model_executor/layers/embeddings.py | 5 +++++ fastdeploy/model_executor/layers/linear.py | 18 +++++++++++++++++- fastdeploy/model_executor/layers/lm_head.py | 9 +++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/fastdeploy/model_executor/layers/embeddings.py b/fastdeploy/model_executor/layers/embeddings.py index 377ff19bb..0ac2d0d70 100644 --- a/fastdeploy/model_executor/layers/embeddings.py +++ b/fastdeploy/model_executor/layers/embeddings.py @@ -77,6 +77,11 @@ class VocabParallelEmbedding(nn.Layer): ) if self.world_size > 1: set_weight_attrs(self.embeddings.weight, {"output_dim": False}) + set_weight_attrs( + self.embeddings.weight, + {"rl_need_attr": {"rl_tp_degree": fd_config.parallel_config.tensor_parallel_size}}, + ) + else: # column cut embedding self.embeddings = nn.Embedding( diff --git a/fastdeploy/model_executor/layers/linear.py b/fastdeploy/model_executor/layers/linear.py index 2c7f9aef3..0d079c90c 100644 --- a/fastdeploy/model_executor/layers/linear.py +++ b/fastdeploy/model_executor/layers/linear.py @@ -356,11 +356,21 @@ class ColumnParallelLinear(LinearBase): ) if self.nranks > 0: + _set_var_distributed(self.weight, split_axis=-1) if self.with_bias: # col parallel - _set_var_distributed(self.bias, split_axis=1) + _set_var_distributed(self.bias, split_axis=0) set_weight_attrs(self.bias, {"output_dim": True}) + # set_rl_tp_degree + set_weight_attrs( + self.weight, {"rl_need_attr": {"rl_tp_degree": fd_config.parallel_config.tensor_parallel_size}} + ) + if self.with_bias: + set_weight_attrs( + self.bias, {"rl_need_attr": {"rl_tp_degree": fd_config.parallel_config.tensor_parallel_size}} + ) + class MergedColumnParallelLinear(ColumnParallelLinear): """ @@ -743,6 +753,7 @@ class RowParallelLinear(LinearBase): model_format=fd_config.model_config.model_format, ) if self.nranks > 0: + _set_var_distributed(self.weight, split_axis=0) if self.with_bias: # col parallel _set_var_distributed(self.bias, split_axis=0) @@ -755,6 +766,11 @@ class RowParallelLinear(LinearBase): self.reduce_results = reduce_results + # set_rl_tp_degree + set_weight_attrs( + self.weight, {"rl_need_attr": {"rl_tp_degree": fd_config.parallel_config.tensor_parallel_size}} + ) + def forward_cuda(self, x: paddle.Tensor) -> paddle.Tensor: if self.fd_config.quant_config: out = self.quant_method.apply(self, x) diff --git a/fastdeploy/model_executor/layers/lm_head.py b/fastdeploy/model_executor/layers/lm_head.py index a62e46d61..b9dc06ab0 100644 --- a/fastdeploy/model_executor/layers/lm_head.py +++ b/fastdeploy/model_executor/layers/lm_head.py @@ -94,6 +94,12 @@ class ParallelLMHead(nn.Layer): "model_format": self.fd_config.model_config.model_format, }, ) + if self.bias_key is not None: + set_weight_attrs( + self.linear.bias, + {"rl_need_attr": {"rl_tp_degree": fd_config.parallel_config.tensor_parallel_size}}, + ) + if self.nranks > 1: set_weight_attrs(self.linear.weight, {"output_dim": True}) else: @@ -116,6 +122,9 @@ class ParallelLMHead(nn.Layer): if self.nranks > 1: set_weight_attrs(self.linear.weight, {"output_dim": False}) + set_weight_attrs( + self.linear.weight, {"rl_need_attr": {"rl_tp_degree": fd_config.parallel_config.tensor_parallel_size}} + ) def load_state_dict(self, state_dict: Dict[str, paddle.Tensor | np.ndarray]): """