[Bug Fix] Fix bug of MLA Attention Backend (#3178)

* fix typo

* fix mla attention backend
This commit is contained in:
RAM
2025-08-05 10:53:27 +08:00
committed by GitHub
parent 9561603ed9
commit bd77a3a643

View File

@@ -315,7 +315,7 @@ class DeepseekV3MLAAttention(nn.Layer):
dtype=layernorm_out.dtype,
)
if forward_meta.max_enc_len_this_time:
if forward_meta.max_len_tensor_cpu[1]: # max_enc_len_this_time
query = self.q_a_proj(layernorm_out)
query = self.q_a_layernorm(query)
query = self.q_b_proj(query)
@@ -362,7 +362,7 @@ class DeepseekV3MLAAttention(nn.Layer):
fmha_out_prefill = fmha_out_prefill * mask_encoder_batch.cast(fmha_out_prefill.dtype)
fmha_out = fmha_out + fmha_out_prefill
if forward_meta.max_dec_len_this_time:
if forward_meta.max_len_tensor_cpu[2]: # max_dec_len_this_time
query = self.q_a_proj(layernorm_out)
query = self.q_a_layernorm(query)
ln_out_or_q_c = query