[Sync] Update to latest code (#2679)

* [Sync] Update to latest code * Add new code files * Add new code files * update code * Try to fix build.sh * Try to fix build.sh * Update code * Update requirements.txt * Update code --------- Co-authored-by: Jiang-Jia-Jun <jiangjiajun@baidu.com>
2025-10-04 08:16:42 +08:00 · 2025-07-03 15:43:53 +08:00
parent d222248d00
commit 05c670e593
95 changed files with 9916 additions and 1312 deletions
--- a/fastdeploy/worker/xpu_model_runner.py
+++ b/fastdeploy/worker/xpu_model_runner.py
@@ -583,15 +583,14 @@ class XPUModelRunner(ModelRunnerBase):
        head_dim = self.model_config.head_dim

        # Get the attention backend
-        attn_cls = get_attention_backend(
-            self.parallel_config.attention_backend)
+        attn_cls = get_attention_backend()
        attn_backend = attn_cls(self.fd_config,
                                kv_num_heads=self.model_config.kv_num_heads,
                                num_heads=num_heads,
                                head_dim=head_dim)
        if attn_backend is None:
            raise NotImplementedError(
-                f"{ self.parallel_config.attention_backend} attention backend is not support by XPUModelRunner"
+                "Attention backend which you chose is not support by GPUModelRunner"
            )
        self.attn_backends.append(attn_backend)