mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-27 18:41:02 +08:00
[feat] support fa3 backend for pd disaggregated (#2695)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
* support fa3 backend run in pd disaggregated * support fa3 backend run in pd disaggregated * support fa3 backend run in pd disaggregated * support fa3 backend run in pd disaggregated * delete use_fast_ffn
This commit is contained in:
@@ -12,16 +12,16 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from .attention import Attention
|
||||
from .append_attn_backend import AppendAttentionBackend
|
||||
from .attention_selecter import get_attention_backend
|
||||
from .base_attention_backend import AttentionBackend
|
||||
from .flash_attn_backend import FlashAttentionBackend
|
||||
from .mla_attention_backend import MLAAttentionBackend
|
||||
from .native_paddle_backend import PaddleNativeAttnBackend
|
||||
from .xpu_attn_backend import XPUAttentionBackend
|
||||
|
||||
__all__ = [
|
||||
"Attention", "AttentionBackend", "PaddleNativeAttnBackend",
|
||||
"AttentionBackend", "PaddleNativeAttnBackend",
|
||||
"get_attention_backend", "AppendAttentionBackend", "XPUAttentionBackend",
|
||||
"MLAAttentionBackend"
|
||||
"MLAAttentionBackend", "FlashAttentionBackend"
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user