mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
fix w4afp8_gemm_scale_permute import error on A100 (#3611)
This commit is contained in:
@@ -31,8 +31,12 @@ if current_platform.is_cuda():
|
|||||||
moe_expert_dispatch,
|
moe_expert_dispatch,
|
||||||
moe_expert_reduce,
|
moe_expert_reduce,
|
||||||
noaux_tc,
|
noaux_tc,
|
||||||
w4afp8_gemm_scale_permute,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from fastdeploy.model_executor.ops.gpu import w4afp8_gemm_scale_permute
|
||||||
|
except:
|
||||||
|
logger.warning("import w4afp8_gemm_scale_permute Failed!")
|
||||||
elif current_platform.is_iluvatar():
|
elif current_platform.is_iluvatar():
|
||||||
from fastdeploy.model_executor.ops.iluvatar import (
|
from fastdeploy.model_executor.ops.iluvatar import (
|
||||||
moe_expert_dispatch,
|
moe_expert_dispatch,
|
||||||
|
Reference in New Issue
Block a user