From 1e9a8e8cef4d6b0f5e46605f32051014710ba191 Mon Sep 17 00:00:00 2001 From: RichardWooSJTU <37864677+RichardWooSJTU@users.noreply.github.com> Date: Tue, 5 Aug 2025 15:40:24 +0800 Subject: [PATCH] fix lm head bias (#3185) Co-authored-by: yuanxiaolan --- fastdeploy/model_executor/layers/lm_head.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/fastdeploy/model_executor/layers/lm_head.py b/fastdeploy/model_executor/layers/lm_head.py index 5c1fd3c15..3f8002737 100644 --- a/fastdeploy/model_executor/layers/lm_head.py +++ b/fastdeploy/model_executor/layers/lm_head.py @@ -72,6 +72,13 @@ class ParallelLMHead(nn.Layer): dtype=paddle.get_default_dtype(), is_bias=False, ) + if self.bias_key is not None: + self.bias = self.create_parameter( + shape=[num_embeddings], + dtype=paddle.get_default_dtype(), + is_bias=True, + ) + else: if self.column_cut: need_gather = True @@ -107,6 +114,10 @@ class ParallelLMHead(nn.Layer): if self.use_ep: self.weight.set_value(get_tensor(state_dict.pop(self.weight_key)).astype(paddle.get_default_dtype())) + if self.bias_key is not None: + self.bias.set_value( + get_tensor(state_dict.pop(self.linear_bias_key)).astype(paddle.get_default_dtype()) + ) else: if self.tie_word_embeddings: self.linear.weight.set_value( @@ -134,7 +145,10 @@ class ParallelLMHead(nn.Layer): """ logits = input if self.use_ep: - logits = paddle.matmul(logits, self.weight) + if self.linear_bias_key is None: + logits = paddle.matmul(logits, self.weight) + else: + logits = paddle.incubate.nn.functional.fused_linear(logits, self.weight, self.bias) else: logits = self.linear(logits) return logits