From d37331fc71f594243c84ed960c070b0dee7d99ac Mon Sep 17 00:00:00 2001 From: Yuan Xiaolan <845594810@qq.com> Date: Thu, 28 Aug 2025 11:42:23 +0800 Subject: [PATCH] fix w4afp8_gemm_scale_permute import error on A100 (#3611) --- .../model_executor/layers/moe/fused_moe_cutlass_backend.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fastdeploy/model_executor/layers/moe/fused_moe_cutlass_backend.py b/fastdeploy/model_executor/layers/moe/fused_moe_cutlass_backend.py index fa08b4a78..be38b56cb 100644 --- a/fastdeploy/model_executor/layers/moe/fused_moe_cutlass_backend.py +++ b/fastdeploy/model_executor/layers/moe/fused_moe_cutlass_backend.py @@ -31,8 +31,12 @@ if current_platform.is_cuda(): moe_expert_dispatch, moe_expert_reduce, noaux_tc, - w4afp8_gemm_scale_permute, ) + + try: + from fastdeploy.model_executor.ops.gpu import w4afp8_gemm_scale_permute + except: + logger.warning("import w4afp8_gemm_scale_permute Failed!") elif current_platform.is_iluvatar(): from fastdeploy.model_executor.ops.iluvatar import ( moe_expert_dispatch,