mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
【Fix Bug】 修复 fa3 支持集中式bug (#3235)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
* fix fa3 集中式bug * 增加qknorm参数
This commit is contained in:
@@ -344,7 +344,7 @@ class FlashAttentionBackend(AttentionBackend):
|
||||
forward_meta.decoder_batch_ids, # from buffer
|
||||
forward_meta.decoder_tile_ids_per_batch, # from buffer
|
||||
forward_meta.decoder_num_blocks_cpu,
|
||||
forward_meta.max_len_tensor_cpu,
|
||||
metadata.max_len_tensor_cpu_decoder,
|
||||
metadata.max_len_kv,
|
||||
metadata.rotary_embs,
|
||||
forward_meta.attn_mask,
|
||||
@@ -359,6 +359,9 @@ class FlashAttentionBackend(AttentionBackend):
|
||||
layer.linear_shift,
|
||||
layer.linear_smooth,
|
||||
metadata.kv_signal_data_list[layer.layer_id],
|
||||
getattr(layer, "q_norm_weight", None),
|
||||
getattr(layer, "k_norm_weight", None),
|
||||
getattr(layer, "rms_norm_eps", 1e-6),
|
||||
metadata._fuse_kernel_compute_dtype,
|
||||
getattr(layer, "cache_quant_type_str", "none"),
|
||||
layer.use_neox_rotary_style,
|
||||
|
Reference in New Issue
Block a user