mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 16:22:57 +08:00
[Bug fix] Fixed the garbled text issues in Qwen3-8B (#2737)
* fix qwen3.py * update * update lm_head tie_word_embeddings * update tie_word_embeddings * fix * fix tie_word_embedding not in config.json --------- Co-authored-by: lizexu <lizexu@baidu.com>
This commit is contained in:
@@ -164,7 +164,6 @@ class Qwen3Model(nn.Layer):
|
||||
|
||||
self.num_layers = fd_config.model_config.num_layers
|
||||
fd_config.model_config.prefix_name = "model"
|
||||
fd_config.model_config.tie_word_embeddings = True
|
||||
|
||||
self.embeddings = VocabParallelEmbedding(
|
||||
fd_config=fd_config,
|
||||
@@ -240,14 +239,13 @@ class Qwen3ForCausalLM(ModelForCasualLM):
|
||||
self.model = Qwen3Model(fd_config=fd_config)
|
||||
|
||||
self.ori_vocab_size = fd_config.model_config.ori_vocab_size
|
||||
|
||||
self.tie_word_embeddings = fd_config.model_config.tie_word_embeddings
|
||||
self.lm_head = ParallelLMHead(
|
||||
fd_config=fd_config,
|
||||
embedding_dim=fd_config.model_config.hidden_size,
|
||||
num_embeddings=fd_config.model_config.vocab_size,
|
||||
prefix=(f"{fd_config.model_config.prefix_name}.embed_tokens"),
|
||||
prefix="lm_head",
|
||||
)
|
||||
self.tie_word_embeddings = fd_config.model_config.tie_word_embeddings
|
||||
|
||||
@classmethod
|
||||
def name(self):
|
||||
@@ -269,6 +267,7 @@ class Qwen3ForCausalLM(ModelForCasualLM):
|
||||
if self.tie_word_embeddings:
|
||||
self.lm_head.out_linear.weight.set_value(
|
||||
self.model.embeddings.word_embeddings.weight.transpose([1, 0]))
|
||||
else:
|
||||
self.lm_head.load_state_dict(state_dict)
|
||||
|
||||
def compute_logits(self, hidden_states: paddle.Tensor):
|
||||
@@ -324,6 +323,7 @@ class Qwen3PretrainedModel(PretrainedModel):
|
||||
|
||||
base_actions = {
|
||||
# Row Linear
|
||||
"lm_head.weight": partial(fn, is_column=True),
|
||||
"embed_tokens.weight": partial(fn, is_column=False),
|
||||
"layers.0.self_attn.o_proj.weight": partial(fn,
|
||||
is_column=False),
|
||||
|
@@ -577,12 +577,15 @@ def initialize_fd_config(config_or_args) -> FDConfig:
|
||||
# Get model config from model directory
|
||||
model_config_dict, _ = ModelConfig.get_config_dict(config_or_args.model_name_or_path)
|
||||
|
||||
|
||||
|
||||
# Handle MoE related configs
|
||||
if 'num_experts' in model_config_dict:
|
||||
model_config_dict['moe_num_experts'] = model_config_dict.pop('num_experts')
|
||||
if 'num_experts_per_tok' in model_config_dict:
|
||||
model_config_dict['moe_topk'] = model_config_dict.pop('num_experts_per_tok')
|
||||
|
||||
|
||||
# Set default values for model config
|
||||
model_config_dict["head_dim"] = model_config_dict.get(
|
||||
"head_dim", model_config_dict["hidden_size"] // model_config_dict["num_attention_heads"])
|
||||
@@ -592,6 +595,8 @@ def initialize_fd_config(config_or_args) -> FDConfig:
|
||||
model_config = ModelConfig.from_dict(model_config_dict)
|
||||
model_config.head_dim = model_config_dict["head_dim"]
|
||||
paddle.set_default_dtype(config_or_args.dtype)
|
||||
if 'tie_word_embeddings' in model_config_dict:
|
||||
model_config_dict['tie_word_embeddings'] = model_config_dict.pop('tie_word_embeddings')
|
||||
|
||||
# Initialize all config components
|
||||
device_config = DeviceConfig()
|
||||
|
Reference in New Issue
Block a user