This commit is contained in:
Ryan
2025-11-20 19:25:56 +08:00
committed by GitHub
parent c3994750b1
commit 0857099191

View File

@@ -19,6 +19,7 @@ from paddle import nn
import fastdeploy
from fastdeploy.distributed.communication import tensor_model_parallel_all_reduce
from fastdeploy.model_executor.ops.gpu import moe_expert_dispatch, moe_expert_reduce
from fastdeploy.utils import ceil_div
from ..quantization.quant_base import QuantMethodBase
@@ -266,7 +267,6 @@ class CutlassWint2FusedMoeMethod(Wint2MoeMethod):
Use Wint2 Triton Fusedmoe compute Fused MoE.
"""
gate_out = gate(x.cast("float32"))
from fastdeploy.model_executor.ops.gpu import moe_expert_dispatch
(
permute_input,
@@ -306,8 +306,6 @@ class CutlassWint2FusedMoeMethod(Wint2MoeMethod):
False,
)
from fastdeploy.model_executor.ops.gpu import moe_expert_reduce
fused_moe_out = moe_expert_reduce(
ffn_out,
topk_weights,