mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 00:33:03 +08:00
remove some code in ep.py (#2947)
This commit is contained in:
@@ -361,22 +361,6 @@ class EPDecoderRunner(EPRunner):
|
|||||||
return recv_hidden_states, recv_expert_count, handle
|
return recv_hidden_states, recv_expert_count, handle
|
||||||
|
|
||||||
def combine(self, ffn_out, topk_idx, topk_weights, handle):
|
def combine(self, ffn_out, topk_idx, topk_weights, handle):
|
||||||
# TODO(@wufeisheng): Delete them when deepep in PaddlePaddle is fixed
|
|
||||||
(
|
|
||||||
src_info,
|
|
||||||
layout_range,
|
|
||||||
num_max_dispatch_tokens_per_rank,
|
|
||||||
num_experts,
|
|
||||||
) = handle
|
|
||||||
|
|
||||||
handle = (
|
|
||||||
src_info,
|
|
||||||
layout_range,
|
|
||||||
num_max_dispatch_tokens_per_rank,
|
|
||||||
None,
|
|
||||||
num_experts,
|
|
||||||
)
|
|
||||||
|
|
||||||
combined_hidden_states, combine_hook = self.ep_engine.low_latency_combine(
|
combined_hidden_states, combine_hook = self.ep_engine.low_latency_combine(
|
||||||
ffn_out, topk_idx, topk_weights, handle
|
ffn_out, topk_idx, topk_weights, handle
|
||||||
)
|
)
|
||||||
|
Reference in New Issue
Block a user