diff --git a/fastdeploy/model_executor/models/qwen3.py b/fastdeploy/model_executor/models/qwen3.py
index c1654f414..ef0ef9a9c 100644
--- a/fastdeploy/model_executor/models/qwen3.py
+++ b/fastdeploy/model_executor/models/qwen3.py
@@ -164,7 +164,6 @@ class Qwen3Model(nn.Layer):
 
         self.num_layers = fd_config.model_config.num_layers
         fd_config.model_config.prefix_name = "model"
-        fd_config.model_config.tie_word_embeddings = True
 
         self.embeddings = VocabParallelEmbedding(
             fd_config=fd_config,
@@ -240,14 +239,13 @@ class Qwen3ForCausalLM(ModelForCasualLM):
         self.model = Qwen3Model(fd_config=fd_config)
 
         self.ori_vocab_size = fd_config.model_config.ori_vocab_size
-
+        self.tie_word_embeddings = fd_config.model_config.tie_word_embeddings
         self.lm_head = ParallelLMHead(
             fd_config=fd_config,
             embedding_dim=fd_config.model_config.hidden_size,
             num_embeddings=fd_config.model_config.vocab_size,
-            prefix=(f"{fd_config.model_config.prefix_name}.embed_tokens"),
+            prefix="lm_head",
         )
-        self.tie_word_embeddings = fd_config.model_config.tie_word_embeddings
 
     @classmethod
     def name(self):
@@ -269,7 +267,8 @@ class Qwen3ForCausalLM(ModelForCasualLM):
         if self.tie_word_embeddings:
             self.lm_head.out_linear.weight.set_value(
                 self.model.embeddings.word_embeddings.weight.transpose([1, 0]))
-        self.lm_head.load_state_dict(state_dict)
+        else:
+            self.lm_head.load_state_dict(state_dict)
 
     def compute_logits(self, hidden_states: paddle.Tensor):
         """
@@ -324,6 +323,7 @@ class Qwen3PretrainedModel(PretrainedModel):
 
             base_actions = {
                 # Row Linear
+                "lm_head.weight": partial(fn, is_column=True),
                 "embed_tokens.weight": partial(fn, is_column=False),
                 "layers.0.self_attn.o_proj.weight": partial(fn,
                                                             is_column=False),
diff --git a/fastdeploy/worker/worker_process.py b/fastdeploy/worker/worker_process.py
index 0591b7b91..fef56089b 100644
--- a/fastdeploy/worker/worker_process.py
+++ b/fastdeploy/worker/worker_process.py
@@ -577,12 +577,15 @@ def initialize_fd_config(config_or_args) -> FDConfig:
     # Get model config from model directory
     model_config_dict, _ = ModelConfig.get_config_dict(config_or_args.model_name_or_path)
 
+
+
     # Handle MoE related configs
     if 'num_experts' in model_config_dict:
         model_config_dict['moe_num_experts'] = model_config_dict.pop('num_experts')
     if 'num_experts_per_tok' in model_config_dict:
         model_config_dict['moe_topk'] = model_config_dict.pop('num_experts_per_tok')
 
+
     # Set default values for model config
     model_config_dict["head_dim"] = model_config_dict.get(
         "head_dim", model_config_dict["hidden_size"] // model_config_dict["num_attention_heads"])
@@ -592,6 +595,8 @@ def initialize_fd_config(config_or_args) -> FDConfig:
     model_config = ModelConfig.from_dict(model_config_dict)
     model_config.head_dim = model_config_dict["head_dim"]
     paddle.set_default_dtype(config_or_args.dtype)
+    if 'tie_word_embeddings' in model_config_dict:
+        model_config_dict['tie_word_embeddings'] = model_config_dict.pop('tie_word_embeddings')
 
     # Initialize all config components
     device_config = DeviceConfig()