mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 00:33:03 +08:00
[Model] Provide clearer error for missing KV cache quantization scales (#3007)
This commit is contained in:
@@ -420,6 +420,7 @@ class DeepseekV3MLAAttention(nn.Layer):
|
|||||||
# NOTE(Ryan):Make sure kv_b_proj_bmm loaded before kv_b_proj,
|
# NOTE(Ryan):Make sure kv_b_proj_bmm loaded before kv_b_proj,
|
||||||
# The same weight key will be poped after kv_b_proj.
|
# The same weight key will be poped after kv_b_proj.
|
||||||
self.o_proj.load_state_dict(state_dict)
|
self.o_proj.load_state_dict(state_dict)
|
||||||
|
self.mla_attn.load_state_dict(state_dict)
|
||||||
|
|
||||||
|
|
||||||
class DeepSeekV3DecoderLayer(nn.Layer):
|
class DeepSeekV3DecoderLayer(nn.Layer):
|
||||||
|
@@ -113,6 +113,7 @@ class Qwen2Attention(nn.Layer):
|
|||||||
""" """
|
""" """
|
||||||
self.qkv_proj.load_state_dict(state_dict)
|
self.qkv_proj.load_state_dict(state_dict)
|
||||||
self.o_proj.load_state_dict(state_dict)
|
self.o_proj.load_state_dict(state_dict)
|
||||||
|
self.attn.load_state_dict(state_dict)
|
||||||
|
|
||||||
def forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
|
@@ -95,6 +95,7 @@ class Qwen3Attention(nn.Layer):
|
|||||||
self.o_proj.load_state_dict(state_dict)
|
self.o_proj.load_state_dict(state_dict)
|
||||||
self.q_norm.load_state_dict(state_dict)
|
self.q_norm.load_state_dict(state_dict)
|
||||||
self.k_norm.load_state_dict(state_dict)
|
self.k_norm.load_state_dict(state_dict)
|
||||||
|
self.attn.load_state_dict(state_dict)
|
||||||
|
|
||||||
def forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
|
Reference in New Issue
Block a user