mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
mv import (#5146)
This commit is contained in:
@@ -19,6 +19,7 @@ from paddle import nn
|
||||
|
||||
import fastdeploy
|
||||
from fastdeploy.distributed.communication import tensor_model_parallel_all_reduce
|
||||
from fastdeploy.model_executor.ops.gpu import moe_expert_dispatch, moe_expert_reduce
|
||||
from fastdeploy.utils import ceil_div
|
||||
|
||||
from ..quantization.quant_base import QuantMethodBase
|
||||
@@ -266,7 +267,6 @@ class CutlassWint2FusedMoeMethod(Wint2MoeMethod):
|
||||
Use Wint2 Triton Fusedmoe compute Fused MoE.
|
||||
"""
|
||||
gate_out = gate(x.cast("float32"))
|
||||
from fastdeploy.model_executor.ops.gpu import moe_expert_dispatch
|
||||
|
||||
(
|
||||
permute_input,
|
||||
@@ -306,8 +306,6 @@ class CutlassWint2FusedMoeMethod(Wint2MoeMethod):
|
||||
False,
|
||||
)
|
||||
|
||||
from fastdeploy.model_executor.ops.gpu import moe_expert_reduce
|
||||
|
||||
fused_moe_out = moe_expert_reduce(
|
||||
ffn_out,
|
||||
topk_weights,
|
||||
|
||||
Reference in New Issue
Block a user