Sync v2.0 version of code to github repo

This commit is contained in:
Jiang-Jia-Jun
2025-06-29 23:29:37 +00:00
parent d151496038
commit 92c2cfa2e7
597 changed files with 78776 additions and 22905 deletions

View File

@@ -14,26 +14,20 @@
# limitations under the License.
"""
"""
attention backend selecter
"""
from fastdeploy.model_executor.layers.attention.base_attention_backend import AttentionBackend
from fastdeploy.platforms import current_platform
from fastdeploy.utils import resolve_obj_from_strname
from functools import cache
from fastdeploy.platforms import _Backend
from fastdeploy.platforms import _Backend, current_platform
from fastdeploy.utils import resolve_obj_from_strname
def backend_name_to_enum(backend_name: str):
def backend_name_to_enum(backend_name: str) -> _Backend:
"""backend_name_to_enum """
assert backend_name is not None
return _Backend.__members__.get(backend_name)
@cache
def _get_attn_backend(
selected_backend
):
def _get_attn_backend(selected_backend: str) -> object:
"""_get_attn_backend """
if isinstance(selected_backend, str):
selected_backend = backend_name_to_enum(selected_backend)
@@ -46,10 +40,6 @@ def _get_attn_backend(
return resolve_obj_from_strname(attention_cls)
def get_attention_backend(
selected_backend
):
def get_attention_backend(selected_backend):
"""Selects which attention backend ."""
return _get_attn_backend(
selected_backend
)
return _get_attn_backend(selected_backend)