[Sync] Update to latest code (#2679)

* [Sync] Update to latest code

* Add new code files

* Add new code files

* update code

* Try to fix build.sh

* Try to fix build.sh

* Update code

* Update requirements.txt

* Update code

---------

Co-authored-by: Jiang-Jia-Jun <jiangjiajun@baidu.com>
This commit is contained in:
Jiang-Jia-Jun
2025-07-03 15:43:53 +08:00
committed by GitHub
parent d222248d00
commit 05c670e593
95 changed files with 9916 additions and 1312 deletions

View File

@@ -46,7 +46,7 @@ class CUDAPlatform(Platform):
return False
@classmethod
def get_attention_backend_cls(cls, selected_backend):
def get_attention_backend_cls(cls, selected_backend: _Backend):
"""
get_attention_backend_cls
"""
@@ -60,5 +60,13 @@ class CUDAPlatform(Platform):
return (
"fastdeploy.model_executor.layers.attention.AppendAttentionBackend"
)
elif selected_backend == _Backend.MLA_ATTN:
logger.info("Using MLA ATTN backend.")
return (
"fastdeploy.model_executor.layers.attention.MLAAttentionBackend"
)
else:
logger.warning("Other backends are not supported for now.")
raise ValueError(
"Invalid attention backend you specified.\n"
"Now only support [NATIVE_ATTN, MLA_ATTN, APPEND_ATTN] in cuda place."
)