mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
MoE Default use triton's blockwise fp8 in TP Case (#3678)
This commit is contained in:
@@ -61,7 +61,7 @@ class BlockWiseFP8Config(QuantConfigBase):
|
||||
Get quantization method.
|
||||
"""
|
||||
if isinstance(layer, FusedMoE):
|
||||
if self.use_deep_gemm:
|
||||
if layer.ep_size > 1 or self.use_deep_gemm:
|
||||
from fastdeploy.model_executor.layers.moe.fused_moe_deepgemm_backend import (
|
||||
DeepGemmFusedMoeMethod,
|
||||
)
|
||||
|
Reference in New Issue
Block a user