mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-11-02 20:54:03 +08:00
support c4 attn && fix cache
This commit is contained in:
@@ -211,6 +211,7 @@ class GCUFlashAttnBackend(AttentionBackend):
|
||||
def get_kv_cache_shape(
|
||||
self,
|
||||
max_num_blocks: int,
|
||||
kv_cache_quant_type: str = None,
|
||||
):
|
||||
"""
|
||||
Caculate kv cache shape
|
||||
|
||||
@@ -222,6 +222,7 @@ class GCUMemEfficientAttnBackend(AttentionBackend):
|
||||
def get_kv_cache_shape(
|
||||
self,
|
||||
max_num_blocks: int,
|
||||
kv_cache_quant_type: str = None,
|
||||
):
|
||||
"""
|
||||
Caculate kv cache shape
|
||||
|
||||
Reference in New Issue
Block a user