[Intel HPU] fix bug about RP 5138 (#5380)

This commit is contained in:
fmiao2372
2025-12-05 11:33:29 +08:00
committed by GitHub
parent 7b0b6e470a
commit ebe613ccc8

View File

@@ -210,6 +210,7 @@ def rebuild_padding_v3_1(
return output_data
from fastdeploy.model_executor.forward_meta import ForwardMeta
from fastdeploy.model_executor.layers.linear import QKVParallelLinear, RowParallelLinear
from fastdeploy.model_executor.ops.intel_hpu import fused_mlp
@@ -259,10 +260,21 @@ def fused_self_atten_forward(
return atten_out
def fused_mlp_forward(self, x):
""" """
def fused_mlp_forward(
self,
hidden_states: paddle.Tensor,
forward_meta: Optional[ForwardMeta] = None,
):
"""
The forward function for the MLP (Multi-Layer Perceptron) layer.
Args:
hidden_states (paddle.Tensor): The input tensor to the MLP layer.
forward_meta (Optional[ForwardMeta]): Optional metadata for the forward pass.
Returns:
paddle.Tensor: The output tensor after applying the MLP layer and (optionally) all-reduce.
"""
out = fused_mlp(
x,
hidden_states,
self.up_gate_proj.weight,
None,
self.down_proj.weight,