diff --git a/fastdeploy/model_executor/models/qwen3.py b/fastdeploy/model_executor/models/qwen3.py index c1654f414..ef0ef9a9c 100644 --- a/fastdeploy/model_executor/models/qwen3.py +++ b/fastdeploy/model_executor/models/qwen3.py @@ -164,7 +164,6 @@ class Qwen3Model(nn.Layer): self.num_layers = fd_config.model_config.num_layers fd_config.model_config.prefix_name = "model" - fd_config.model_config.tie_word_embeddings = True self.embeddings = VocabParallelEmbedding( fd_config=fd_config, @@ -240,14 +239,13 @@ class Qwen3ForCausalLM(ModelForCasualLM): self.model = Qwen3Model(fd_config=fd_config) self.ori_vocab_size = fd_config.model_config.ori_vocab_size - + self.tie_word_embeddings = fd_config.model_config.tie_word_embeddings self.lm_head = ParallelLMHead( fd_config=fd_config, embedding_dim=fd_config.model_config.hidden_size, num_embeddings=fd_config.model_config.vocab_size, - prefix=(f"{fd_config.model_config.prefix_name}.embed_tokens"), + prefix="lm_head", ) - self.tie_word_embeddings = fd_config.model_config.tie_word_embeddings @classmethod def name(self): @@ -269,7 +267,8 @@ class Qwen3ForCausalLM(ModelForCasualLM): if self.tie_word_embeddings: self.lm_head.out_linear.weight.set_value( self.model.embeddings.word_embeddings.weight.transpose([1, 0])) - self.lm_head.load_state_dict(state_dict) + else: + self.lm_head.load_state_dict(state_dict) def compute_logits(self, hidden_states: paddle.Tensor): """ @@ -324,6 +323,7 @@ class Qwen3PretrainedModel(PretrainedModel): base_actions = { # Row Linear + "lm_head.weight": partial(fn, is_column=True), "embed_tokens.weight": partial(fn, is_column=False), "layers.0.self_attn.o_proj.weight": partial(fn, is_column=False), diff --git a/fastdeploy/worker/worker_process.py b/fastdeploy/worker/worker_process.py index 0591b7b91..fef56089b 100644 --- a/fastdeploy/worker/worker_process.py +++ b/fastdeploy/worker/worker_process.py @@ -577,12 +577,15 @@ def initialize_fd_config(config_or_args) -> FDConfig: # Get model config from model directory model_config_dict, _ = ModelConfig.get_config_dict(config_or_args.model_name_or_path) + + # Handle MoE related configs if 'num_experts' in model_config_dict: model_config_dict['moe_num_experts'] = model_config_dict.pop('num_experts') if 'num_experts_per_tok' in model_config_dict: model_config_dict['moe_topk'] = model_config_dict.pop('num_experts_per_tok') + # Set default values for model config model_config_dict["head_dim"] = model_config_dict.get( "head_dim", model_config_dict["hidden_size"] // model_config_dict["num_attention_heads"]) @@ -592,6 +595,8 @@ def initialize_fd_config(config_or_args) -> FDConfig: model_config = ModelConfig.from_dict(model_config_dict) model_config.head_dim = model_config_dict["head_dim"] paddle.set_default_dtype(config_or_args.dtype) + if 'tie_word_embeddings' in model_config_dict: + model_config_dict['tie_word_embeddings'] = model_config_dict.pop('tie_word_embeddings') # Initialize all config components device_config = DeviceConfig()