Revert "[Feature] block sparse attention (#3209)" (#3647)

This reverts commit 646a0c2fd8.
This commit is contained in:
Jiang-Jia-Jun
2025-08-27 17:35:04 +08:00
committed by GitHub
parent b2afdf4fc6
commit c694fa2879
31 changed files with 10 additions and 6507 deletions

View File

@@ -64,9 +64,6 @@ class CUDAPlatform(Platform):
elif selected_backend == _Backend.FLASH_ATTN:
logger.info("Using FLASH ATTN backend.")
return "fastdeploy.model_executor.layers.attention.FlashAttentionBackend"
elif selected_backend == _Backend.MOBA_ATTN:
logger.info("Using MOBA ATTN backend.")
return "fastdeploy.model_executor.layers.attention.MobaAttentionBackend"
else:
raise ValueError(
"Invalid attention backend you specified.\n"