mv import (#5146)

2025-12-24 13:28:13 +08:00 · 2025-11-20 19:25:56 +08:00
parent c3994750b1
commit 0857099191
1 changed files with 1 additions and 3 deletions
--- a/fastdeploy/model_executor/layers/moe/fused_moe_wint2_backend.py
+++ b/fastdeploy/model_executor/layers/moe/fused_moe_wint2_backend.py
@@ -19,6 +19,7 @@ from paddle import nn

 import fastdeploy
 from fastdeploy.distributed.communication import tensor_model_parallel_all_reduce
+from fastdeploy.model_executor.ops.gpu import moe_expert_dispatch, moe_expert_reduce
 from fastdeploy.utils import ceil_div

 from ..quantization.quant_base import QuantMethodBase
@@ -266,7 +267,6 @@ class CutlassWint2FusedMoeMethod(Wint2MoeMethod):
        Use Wint2 Triton Fusedmoe compute Fused MoE.
        """
        gate_out = gate(x.cast("float32"))
-        from fastdeploy.model_executor.ops.gpu import moe_expert_dispatch

        (
            permute_input,
@@ -306,8 +306,6 @@ class CutlassWint2FusedMoeMethod(Wint2MoeMethod):
            False,
        )

-        from fastdeploy.model_executor.ops.gpu import moe_expert_reduce
-
        fused_moe_out = moe_expert_reduce(
            ffn_out,
            topk_weights,