From 172e69fe172b6ae3bd2d656dbeb80b9a28106ff4 Mon Sep 17 00:00:00 2001 From: chen <103103266+ckl117@users.noreply.github.com> Date: Wed, 23 Jul 2025 19:07:43 +0800 Subject: [PATCH] FA3 fix bug (#2987) --- .../model_executor/layers/attention/flash_attn_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastdeploy/model_executor/layers/attention/flash_attn_backend.py b/fastdeploy/model_executor/layers/attention/flash_attn_backend.py index 1de39507a..8290e3986 100644 --- a/fastdeploy/model_executor/layers/attention/flash_attn_backend.py +++ b/fastdeploy/model_executor/layers/attention/flash_attn_backend.py @@ -110,7 +110,7 @@ class FlashAttentionBackend(AttentionBackend): self.kv_num_heads = kv_num_heads self.num_heads = num_heads self.head_dim = fd_config.model_config.head_dim - self.hidden_size = fd_config.model_config.hidden_size + self.hidden_size = self.num_heads * self.head_dim self.block_size = fd_config.parallel_config.block_size self.num_layers: int = fd_config.model_config.num_hidden_layers