MoE Default use triton's blockwise fp8 in TP Case (#3678)

This commit is contained in:
周周周
2025-08-29 11:07:30 +08:00
committed by GitHub
parent b6edd15d55
commit 17b414c2df
7 changed files with 5 additions and 10 deletions

View File

@@ -61,7 +61,7 @@ class BlockWiseFP8Config(QuantConfigBase):
Get quantization method.
"""
if isinstance(layer, FusedMoE):
if self.use_deep_gemm:
if layer.ep_size > 1 or self.use_deep_gemm:
from fastdeploy.model_executor.layers.moe.fused_moe_deepgemm_backend import (
DeepGemmFusedMoeMethod,
)