From 8f2b85362dc704dcf94ae8bddd6ece62b1de68f3 Mon Sep 17 00:00:00 2001 From: Lucas Date: Fri, 5 Dec 2025 17:49:40 +0800 Subject: [PATCH] [XPU] support moe_expert_ffn TGEMM selection (#5375) --- custom_ops/xpu_ops/src/ops/moe_expert_ffn.cc | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/custom_ops/xpu_ops/src/ops/moe_expert_ffn.cc b/custom_ops/xpu_ops/src/ops/moe_expert_ffn.cc index 458688214..9c1c0b194 100644 --- a/custom_ops/xpu_ops/src/ops/moe_expert_ffn.cc +++ b/custom_ops/xpu_ops/src/ops/moe_expert_ffn.cc @@ -393,7 +393,19 @@ std::vector MoeExpertFFNKernel( smooth_data, \ hadamard_blocksize) if (quant_method == "weight_only_int8") { - FFN_IMPL(XPU_TX1, XPU_TX2, int8_t, float); + static const char* xft_moe_fc_wint8_tgemm = + std::getenv("XFT_MOE_FC_WINT8_TGEMM"); + if (xft_moe_fc_wint8_tgemm != nullptr) { + if (std::string(xft_moe_fc_wint8_tgemm) == "INT8") { + FFN_IMPL(XPU_TX1, XPU_TX2, int8_t, int8_wo_t); + } else if (std::string(xft_moe_fc_wint8_tgemm) == "FLOAT16") { + FFN_IMPL(XPU_TX1, XPU_TX2, int8_t, float16); + } else { + FFN_IMPL(XPU_TX1, XPU_TX2, int8_t, float); + } + } else { + FFN_IMPL(XPU_TX1, XPU_TX2, int8_t, float); + } } else if (quant_method == "weight_only_int4") { FFN_IMPL(XPU_TX1, XPU_TX2, int4_t, int4_wo_int15); } else if (quant_method == "w4a8") {