[Sync] Update to latest code (#2679)

* [Sync] Update to latest code * Add new code files * Add new code files * update code * Try to fix build.sh * Try to fix build.sh * Update code * Update requirements.txt * Update code --------- Co-authored-by: Jiang-Jia-Jun <jiangjiajun@baidu.com>
2025-10-07 01:22:59 +08:00 · 2025-07-03 15:43:53 +08:00
parent d222248d00
commit 05c670e593
95 changed files with 9916 additions and 1312 deletions
--- a/fastdeploy/platforms/cuda.py
+++ b/fastdeploy/platforms/cuda.py
@@ -46,7 +46,7 @@ class CUDAPlatform(Platform):
            return False

    @classmethod
-    def get_attention_backend_cls(cls, selected_backend):
+    def get_attention_backend_cls(cls, selected_backend: _Backend):
        """
        get_attention_backend_cls
        """
@@ -60,5 +60,13 @@ class CUDAPlatform(Platform):
            return (
                "fastdeploy.model_executor.layers.attention.AppendAttentionBackend"
            )
+        elif selected_backend == _Backend.MLA_ATTN:
+            logger.info("Using MLA ATTN backend.")
+            return (
+                "fastdeploy.model_executor.layers.attention.MLAAttentionBackend"
+            )
        else:
-            logger.warning("Other backends are not supported for now.")
+            raise ValueError(
+                "Invalid attention backend you specified.\n"
+                "Now only support [NATIVE_ATTN, MLA_ATTN, APPEND_ATTN] in cuda place."
+            )