mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-07 01:22:59 +08:00
[Sync] Update to latest code (#2679)
* [Sync] Update to latest code * Add new code files * Add new code files * update code * Try to fix build.sh * Try to fix build.sh * Update code * Update requirements.txt * Update code --------- Co-authored-by: Jiang-Jia-Jun <jiangjiajun@baidu.com>
This commit is contained in:
@@ -46,7 +46,7 @@ class CUDAPlatform(Platform):
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def get_attention_backend_cls(cls, selected_backend):
|
||||
def get_attention_backend_cls(cls, selected_backend: _Backend):
|
||||
"""
|
||||
get_attention_backend_cls
|
||||
"""
|
||||
@@ -60,5 +60,13 @@ class CUDAPlatform(Platform):
|
||||
return (
|
||||
"fastdeploy.model_executor.layers.attention.AppendAttentionBackend"
|
||||
)
|
||||
elif selected_backend == _Backend.MLA_ATTN:
|
||||
logger.info("Using MLA ATTN backend.")
|
||||
return (
|
||||
"fastdeploy.model_executor.layers.attention.MLAAttentionBackend"
|
||||
)
|
||||
else:
|
||||
logger.warning("Other backends are not supported for now.")
|
||||
raise ValueError(
|
||||
"Invalid attention backend you specified.\n"
|
||||
"Now only support [NATIVE_ATTN, MLA_ATTN, APPEND_ATTN] in cuda place."
|
||||
)
|
||||
|
Reference in New Issue
Block a user