[Sync] Update to latest code (#2679)

* [Sync] Update to latest code

* Add new code files

* Add new code files

* update code

* Try to fix build.sh

* Try to fix build.sh

* Update code

* Update requirements.txt

* Update code

---------

Co-authored-by: Jiang-Jia-Jun <jiangjiajun@baidu.com>
This commit is contained in:
Jiang-Jia-Jun
2025-07-03 15:43:53 +08:00
committed by GitHub
parent d222248d00
commit 05c670e593
95 changed files with 9916 additions and 1312 deletions

View File

@@ -583,15 +583,14 @@ class XPUModelRunner(ModelRunnerBase):
head_dim = self.model_config.head_dim
# Get the attention backend
attn_cls = get_attention_backend(
self.parallel_config.attention_backend)
attn_cls = get_attention_backend()
attn_backend = attn_cls(self.fd_config,
kv_num_heads=self.model_config.kv_num_heads,
num_heads=num_heads,
head_dim=head_dim)
if attn_backend is None:
raise NotImplementedError(
f"{ self.parallel_config.attention_backend} attention backend is not support by XPUModelRunner"
"Attention backend which you chose is not support by GPUModelRunner"
)
self.attn_backends.append(attn_backend)