diff --git a/fastdeploy/worker/hpu_model_runner.py b/fastdeploy/worker/hpu_model_runner.py index 16a781904..5811a1516 100644 --- a/fastdeploy/worker/hpu_model_runner.py +++ b/fastdeploy/worker/hpu_model_runner.py @@ -210,6 +210,7 @@ def rebuild_padding_v3_1( return output_data +from fastdeploy.model_executor.forward_meta import ForwardMeta from fastdeploy.model_executor.layers.linear import QKVParallelLinear, RowParallelLinear from fastdeploy.model_executor.ops.intel_hpu import fused_mlp @@ -259,10 +260,21 @@ def fused_self_atten_forward( return atten_out -def fused_mlp_forward(self, x): - """ """ +def fused_mlp_forward( + self, + hidden_states: paddle.Tensor, + forward_meta: Optional[ForwardMeta] = None, +): + """ + The forward function for the MLP (Multi-Layer Perceptron) layer. + Args: + hidden_states (paddle.Tensor): The input tensor to the MLP layer. + forward_meta (Optional[ForwardMeta]): Optional metadata for the forward pass. + Returns: + paddle.Tensor: The output tensor after applying the MLP layer and (optionally) all-reduce. + """ out = fused_mlp( - x, + hidden_states, self.up_gate_proj.weight, None, self.down_proj.weight,