mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[Intel HPU] fix bug about RP 5138 (#5380)
This commit is contained in:
@@ -210,6 +210,7 @@ def rebuild_padding_v3_1(
|
||||
return output_data
|
||||
|
||||
|
||||
from fastdeploy.model_executor.forward_meta import ForwardMeta
|
||||
from fastdeploy.model_executor.layers.linear import QKVParallelLinear, RowParallelLinear
|
||||
from fastdeploy.model_executor.ops.intel_hpu import fused_mlp
|
||||
|
||||
@@ -259,10 +260,21 @@ def fused_self_atten_forward(
|
||||
return atten_out
|
||||
|
||||
|
||||
def fused_mlp_forward(self, x):
|
||||
""" """
|
||||
def fused_mlp_forward(
|
||||
self,
|
||||
hidden_states: paddle.Tensor,
|
||||
forward_meta: Optional[ForwardMeta] = None,
|
||||
):
|
||||
"""
|
||||
The forward function for the MLP (Multi-Layer Perceptron) layer.
|
||||
Args:
|
||||
hidden_states (paddle.Tensor): The input tensor to the MLP layer.
|
||||
forward_meta (Optional[ForwardMeta]): Optional metadata for the forward pass.
|
||||
Returns:
|
||||
paddle.Tensor: The output tensor after applying the MLP layer and (optionally) all-reduce.
|
||||
"""
|
||||
out = fused_mlp(
|
||||
x,
|
||||
hidden_states,
|
||||
self.up_gate_proj.weight,
|
||||
None,
|
||||
self.down_proj.weight,
|
||||
|
||||
Reference in New Issue
Block a user