mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 09:07:10 +08:00
MoE Default use triton's blockwise fp8 in TP Case (#3678)
This commit is contained in:
@@ -174,7 +174,7 @@ model_param_map = {
|
||||
{
|
||||
"quant_type": "block_wise_fp8",
|
||||
"backend": "triton",
|
||||
"env": {"FD_USE_DEEP_GEMM": "0", "DG_NVCC_OVERRIDE_CPP_STANDARD": "17"},
|
||||
"env": {"DG_NVCC_OVERRIDE_CPP_STANDARD": "17"},
|
||||
},
|
||||
{"quant_type": "block_wise_fp8", "backend": "deepgemm", "env": {"DG_NVCC_OVERRIDE_CPP_STANDARD": "17"}},
|
||||
],
|
||||
|
Reference in New Issue
Block a user