[Model] Provide clearer error for missing KV cache quantization scales (#3007)

This commit is contained in:
littledgg
2025-07-24 20:15:00 +08:00
committed by GitHub
parent c40df1802e
commit f37d00e856
3 changed files with 3 additions and 0 deletions

View File

@@ -420,6 +420,7 @@ class DeepseekV3MLAAttention(nn.Layer):
# NOTE(Ryan):Make sure kv_b_proj_bmm loaded before kv_b_proj, # NOTE(Ryan):Make sure kv_b_proj_bmm loaded before kv_b_proj,
# The same weight key will be poped after kv_b_proj. # The same weight key will be poped after kv_b_proj.
self.o_proj.load_state_dict(state_dict) self.o_proj.load_state_dict(state_dict)
self.mla_attn.load_state_dict(state_dict)
class DeepSeekV3DecoderLayer(nn.Layer): class DeepSeekV3DecoderLayer(nn.Layer):

View File

@@ -113,6 +113,7 @@ class Qwen2Attention(nn.Layer):
""" """ """ """
self.qkv_proj.load_state_dict(state_dict) self.qkv_proj.load_state_dict(state_dict)
self.o_proj.load_state_dict(state_dict) self.o_proj.load_state_dict(state_dict)
self.attn.load_state_dict(state_dict)
def forward( def forward(
self, self,

View File

@@ -95,6 +95,7 @@ class Qwen3Attention(nn.Layer):
self.o_proj.load_state_dict(state_dict) self.o_proj.load_state_dict(state_dict)
self.q_norm.load_state_dict(state_dict) self.q_norm.load_state_dict(state_dict)
self.k_norm.load_state_dict(state_dict) self.k_norm.load_state_dict(state_dict)
self.attn.load_state_dict(state_dict)
def forward( def forward(
self, self,