From 0857099191e0102f539298a5c1b2a7a437d8354a Mon Sep 17 00:00:00 2001 From: Ryan Date: Thu, 20 Nov 2025 19:25:56 +0800 Subject: [PATCH] mv import (#5146) --- .../model_executor/layers/moe/fused_moe_wint2_backend.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fastdeploy/model_executor/layers/moe/fused_moe_wint2_backend.py b/fastdeploy/model_executor/layers/moe/fused_moe_wint2_backend.py index 78b38f31e..43e58a6f1 100644 --- a/fastdeploy/model_executor/layers/moe/fused_moe_wint2_backend.py +++ b/fastdeploy/model_executor/layers/moe/fused_moe_wint2_backend.py @@ -19,6 +19,7 @@ from paddle import nn import fastdeploy from fastdeploy.distributed.communication import tensor_model_parallel_all_reduce +from fastdeploy.model_executor.ops.gpu import moe_expert_dispatch, moe_expert_reduce from fastdeploy.utils import ceil_div from ..quantization.quant_base import QuantMethodBase @@ -266,7 +267,6 @@ class CutlassWint2FusedMoeMethod(Wint2MoeMethod): Use Wint2 Triton Fusedmoe compute Fused MoE. """ gate_out = gate(x.cast("float32")) - from fastdeploy.model_executor.ops.gpu import moe_expert_dispatch ( permute_input, @@ -306,8 +306,6 @@ class CutlassWint2FusedMoeMethod(Wint2MoeMethod): False, ) - from fastdeploy.model_executor.ops.gpu import moe_expert_reduce - fused_moe_out = moe_expert_reduce( ffn_out, topk_weights,