mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
[Executor] Move forward_meta.py to fastdeploy/model_executor (#2774)
* Use PEP 563 in attention.py and fix conflict * merge commit * Change what was left out last time
This commit is contained in:
@@ -24,15 +24,12 @@ import paddle
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from paddle._typing.dtype_like import _DTypeLiteral
|
||||
|
||||
from fastdeploy.config import FDConfig
|
||||
from fastdeploy.model_executor.layers.attention.attention import Attention
|
||||
from fastdeploy.model_executor.layers.attention.base_attention_backend import (
|
||||
AttentionBackend, AttentionMetadata)
|
||||
from fastdeploy.worker.forward_meta import ForwardMeta, ForwardMode
|
||||
if TYPE_CHECKING:
|
||||
from fastdeploy.model_executor.forward_meta import ForwardMeta, ForwardMode
|
||||
|
||||
from fastdeploy.model_executor.ops.gcu import (fused_rotary_embedding,
|
||||
mem_efficient_attention,
|
||||
@@ -47,7 +44,7 @@ class GCUFlashAttnMetadata(AttentionMetadata):
|
||||
"""
|
||||
forward_mode: ForwardMode = ForwardMode.MIXED
|
||||
|
||||
_dtype: _DTypeLiteral = paddle.bfloat16
|
||||
_dtype: paddle.dtype = paddle.bfloat16
|
||||
|
||||
seq_lens_encoder: Optional[paddle.Tensor] = None
|
||||
seq_lens_decoder: Optional[paddle.Tensor] = None
|
||||
|
@@ -25,28 +25,26 @@ import paddle
|
||||
import numpy as np
|
||||
import math
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from paddle._typing.dtype_like import _DTypeLiteral
|
||||
|
||||
from fastdeploy.config import FDConfig
|
||||
from fastdeploy.model_executor.layers.attention.attention import Attention
|
||||
from fastdeploy.model_executor.layers.attention.base_attention_backend import (
|
||||
AttentionBackend, AttentionMetadata)
|
||||
from fastdeploy.worker.forward_meta import ForwardMeta, ForwardMode
|
||||
|
||||
from fastdeploy.model_executor.ops.gcu import (fused_rotary_embedding,
|
||||
mem_efficient_attention,
|
||||
flash_attn_var_len)
|
||||
from paddleformers.utils.log import logger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from fastdeploy.model_executor.forward_meta import ForwardMeta, ForwardMode
|
||||
|
||||
@dataclass
|
||||
class GCUMemEfficientAttnMetadata(AttentionMetadata):
|
||||
"""
|
||||
GCUMemEfficientAttnMetadata
|
||||
"""
|
||||
forward_mode: ForwardMode = ForwardMode.MIXED
|
||||
_dtype: _DTypeLiteral = paddle.bfloat16
|
||||
_dtype: paddle.dtype = paddle.bfloat16
|
||||
|
||||
seq_lens_encoder: Optional[paddle.Tensor] = None
|
||||
seq_lens_decoder: Optional[paddle.Tensor] = None
|
||||
|
Reference in New Issue
Block a user