diff --git a/fastdeploy/model_executor/layers/attention/flash_attn_backend.py b/fastdeploy/model_executor/layers/attention/flash_attn_backend.py index 306164635..199a26db8 100644 --- a/fastdeploy/model_executor/layers/attention/flash_attn_backend.py +++ b/fastdeploy/model_executor/layers/attention/flash_attn_backend.py @@ -208,7 +208,7 @@ class FlashAttentionBackend(AttentionBackend): ) = pre_cache_len_concat( forward_meta.seq_lens_decoder, forward_meta.seq_lens_this_time, - metadata.set_max_lengths[2], + forward_meta.max_len_tensor_cpu[2], self.block_size, )