mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-17 14:11:14 +08:00
[Precision] Change lm_head layer running in float32 (#3596)
* support lm_head fp32 bf16 fp16 * delete print * code check * check * check * code check * check * check
This commit is contained in:
@@ -257,14 +257,14 @@ class Qwen3ForCausalLM(ModelForCasualLM):
|
||||
"""
|
||||
self.model.load_state_dict(state_dict)
|
||||
if self.tie_word_embeddings:
|
||||
self.lm_head.linear.weight.set_value(self.model.embed_tokens.embeddings.weight.transpose([1, 0]))
|
||||
self.lm_head.load_state_dict({self.lm_head.weight_key: self.ernie.embed_tokens.embeddings.weight})
|
||||
else:
|
||||
self.lm_head.load_state_dict(state_dict)
|
||||
|
||||
def compute_logits(self, hidden_states: paddle.Tensor):
|
||||
""" """
|
||||
logits = self.lm_head(hidden_states)
|
||||
logits = paddle.cast(logits, paddle.float32)
|
||||
logits = logits.astype(paddle.float32)
|
||||
logits[:, self.ori_vocab_size :] = -float("inf")
|
||||
|
||||
return logits
|
||||
|
Reference in New Issue
Block a user