diff --git a/fastdeploy/model_executor/models/deepseek_v3.py b/fastdeploy/model_executor/models/deepseek_v3.py index e4b44f477..8cbd4a0bd 100644 --- a/fastdeploy/model_executor/models/deepseek_v3.py +++ b/fastdeploy/model_executor/models/deepseek_v3.py @@ -315,7 +315,7 @@ class DeepseekV3MLAAttention(nn.Layer): dtype=layernorm_out.dtype, ) - if forward_meta.max_enc_len_this_time: + if forward_meta.max_len_tensor_cpu[1]: # max_enc_len_this_time query = self.q_a_proj(layernorm_out) query = self.q_a_layernorm(query) query = self.q_b_proj(query) @@ -362,7 +362,7 @@ class DeepseekV3MLAAttention(nn.Layer): fmha_out_prefill = fmha_out_prefill * mask_encoder_batch.cast(fmha_out_prefill.dtype) fmha_out = fmha_out + fmha_out_prefill - if forward_meta.max_dec_len_this_time: + if forward_meta.max_len_tensor_cpu[2]: # max_dec_len_this_time query = self.q_a_proj(layernorm_out) query = self.q_a_layernorm(query) ln_out_or_q_c = query