mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[BugFix] fix real_bsz in ep (#3366)
* Your commit message here * fix ep * delete cuda_graph
This commit is contained in:
@@ -450,7 +450,7 @@ class Ernie4_5_MoeForCausalLM(ModelForCasualLM):
|
||||
self.fd_config.model_config.moe_layer_start_index,
|
||||
self.fd_config.model_config.num_hidden_layers,
|
||||
):
|
||||
self.ernie.layers[i].mlp.expert(fake_hidden_states)
|
||||
self.ernie.layers[i].mlp.experts(fake_hidden_states, self.ernie.layers[i].mlp.gate)
|
||||
|
||||
def forward(
|
||||
self,
|
||||
|
Reference in New Issue
Block a user