mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 00:57:33 +08:00
[MetaxGPU] Support FastDeploy on metax gpu (#3241)
* [MetaxGPU] Support FastDeploy on metax gpu * Update metax_worker.py 1. change worker log; 2. remove custom allreduce, adapt it later; 3. remove cuda graph; * Update __init__.py 1. remove metax's key work comment * Update __init__.py 1. remove metax's key word comment; 2. add fused_moe_kernel_paddle import --------- Co-authored-by: yongqiangma <xing.wo@163.com>
This commit is contained in:
@@ -86,6 +86,15 @@ class AttentionBackend(ABC):
|
||||
layer,
|
||||
forward_meta,
|
||||
)
|
||||
elif forward_meta.forward_mode.is_native():
|
||||
return self.forward_native_backend(
|
||||
q,
|
||||
k,
|
||||
v,
|
||||
qkv,
|
||||
layer,
|
||||
forward_meta,
|
||||
)
|
||||
else:
|
||||
return self.forward_extend(
|
||||
q,
|
||||
@@ -139,3 +148,15 @@ class AttentionBackend(ABC):
|
||||
) -> paddle.Tensor:
|
||||
"""Run a forward for extend."""
|
||||
raise NotImplementedError
|
||||
|
||||
def forward_native_backend(
|
||||
self,
|
||||
q: paddle.Tensor,
|
||||
k: paddle.Tensor,
|
||||
v: paddle.Tensor,
|
||||
qkv: paddle.Tensor,
|
||||
layer: paddle.nn.Layer,
|
||||
forward_meta: ForwardMeta,
|
||||
) -> paddle.Tensor:
|
||||
"""Run a forward for native."""
|
||||
raise NotImplementedError
|
||||
|
Reference in New Issue
Block a user