[Bug Fix] Fix bug of MLA Attention Backend (#3178)

* fix typo * fix mla attention backend
2025-10-05 00:33:03 +08:00 · 2025-08-05 10:53:27 +08:00
parent 9561603ed9
commit bd77a3a643
1 changed files with 2 additions and 2 deletions
--- a/fastdeploy/model_executor/models/deepseek_v3.py
+++ b/fastdeploy/model_executor/models/deepseek_v3.py
@@ -315,7 +315,7 @@ class DeepseekV3MLAAttention(nn.Layer):
            dtype=layernorm_out.dtype,
        )

-        if forward_meta.max_enc_len_this_time:
+        if forward_meta.max_len_tensor_cpu[1]:  # max_enc_len_this_time
            query = self.q_a_proj(layernorm_out)
            query = self.q_a_layernorm(query)
            query = self.q_b_proj(query)
@@ -362,7 +362,7 @@ class DeepseekV3MLAAttention(nn.Layer):
            fmha_out_prefill = fmha_out_prefill * mask_encoder_batch.cast(fmha_out_prefill.dtype)

            fmha_out = fmha_out + fmha_out_prefill
-        if forward_meta.max_dec_len_this_time:
+        if forward_meta.max_len_tensor_cpu[2]:  # max_dec_len_this_time
            query = self.q_a_proj(layernorm_out)
            query = self.q_a_layernorm(query)
            ln_out_or_q_c = query