mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
refactor rl get_name_mappings_to_training (#2847)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
* refactor rl get_name_mappings_to_training * fix tp>1 * change variable name(ffn1->up_gate_proj/ffn2->down_proj) * change variable name(linear_weight->weight/linear_bias->bias) * add rl names mapping for vl * fix ernie 0.3B error * fix develop code * fix
This commit is contained in:
@@ -166,7 +166,7 @@ class Qwen3Model(nn.Layer):
|
||||
self.num_layers = fd_config.model_config.num_hidden_layers
|
||||
fd_config.model_config.pretrained_config.prefix_name = "model"
|
||||
|
||||
self.embeddings = VocabParallelEmbedding(
|
||||
self.embed_tokens = VocabParallelEmbedding(
|
||||
fd_config=fd_config,
|
||||
num_embeddings=fd_config.model_config.vocab_size,
|
||||
embedding_dim=fd_config.model_config.hidden_size,
|
||||
@@ -197,7 +197,7 @@ class Qwen3Model(nn.Layer):
|
||||
A dictionary containing model parameters, where keys are parameter names
|
||||
and values are NumPy arrays or PaddlePaddle tensors.
|
||||
"""
|
||||
self.embeddings.load_state_dict(state_dict)
|
||||
self.embed_tokens.load_state_dict(state_dict)
|
||||
self.norm.load_state_dict(state_dict)
|
||||
for i in range(self.num_layers):
|
||||
logger.info(f"Start load layer {i}")
|
||||
@@ -210,7 +210,7 @@ class Qwen3Model(nn.Layer):
|
||||
):
|
||||
"""
|
||||
"""
|
||||
hidden_states = self.embeddings(ids_remove_padding=ids_remove_padding)
|
||||
hidden_states = self.embed_tokens(ids_remove_padding=ids_remove_padding)
|
||||
|
||||
residual = None
|
||||
|
||||
@@ -266,8 +266,8 @@ class Qwen3ForCausalLM(ModelForCasualLM):
|
||||
"""
|
||||
self.model.load_state_dict(state_dict)
|
||||
if self.tie_word_embeddings:
|
||||
self.lm_head.out_linear.weight.set_value(
|
||||
self.model.embeddings.word_embeddings.weight.transpose([1, 0]))
|
||||
self.lm_head.linear.weight.set_value(
|
||||
self.model.embed_tokens.embeddings.weight.transpose([1, 0]))
|
||||
else:
|
||||
self.lm_head.load_state_dict(state_dict)
|
||||
|
||||
|
Reference in New Issue
Block a user