[Model] Provide clearer error for missing KV cache quantization scales (#3007)

2025-10-04 08:16:42 +08:00 · 2025-07-24 20:15:00 +08:00
parent c40df1802e
commit f37d00e856
3 changed files with 3 additions and 0 deletions
--- a/fastdeploy/model_executor/models/qwen2.py
+++ b/fastdeploy/model_executor/models/qwen2.py
@@ -113,6 +113,7 @@ class Qwen2Attention(nn.Layer):
        """ """
        self.qkv_proj.load_state_dict(state_dict)
        self.o_proj.load_state_dict(state_dict)
+        self.attn.load_state_dict(state_dict)

    def forward(
        self,