[MetaxGPU] Support FastDeploy on metax gpu (#3241)

* [MetaxGPU] Support FastDeploy on metax gpu

* Update metax_worker.py

1. change worker log;
2. remove custom allreduce, adapt it later;
3. remove cuda graph;

* Update __init__.py

1. remove metax's key work comment

* Update __init__.py

1. remove metax's key word comment;
2. add fused_moe_kernel_paddle import

---------

Co-authored-by: yongqiangma <xing.wo@163.com>
This commit is contained in:
Kane2011
2025-08-13 11:11:54 +08:00
committed by GitHub
parent ed6bff215a
commit b4fef2cf29
29 changed files with 3224 additions and 11 deletions

View File

@@ -86,6 +86,15 @@ class AttentionBackend(ABC):
layer,
forward_meta,
)
elif forward_meta.forward_mode.is_native():
return self.forward_native_backend(
q,
k,
v,
qkv,
layer,
forward_meta,
)
else:
return self.forward_extend(
q,
@@ -139,3 +148,15 @@ class AttentionBackend(ABC):
) -> paddle.Tensor:
"""Run a forward for extend."""
raise NotImplementedError
def forward_native_backend(
self,
q: paddle.Tensor,
k: paddle.Tensor,
v: paddle.Tensor,
qkv: paddle.Tensor,
layer: paddle.nn.Layer,
forward_meta: ForwardMeta,
) -> paddle.Tensor:
"""Run a forward for native."""
raise NotImplementedError