mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
MoE Default use triton's blockwise fp8 in TP Case (#3678)
This commit is contained in:
@@ -65,7 +65,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
||||
# Whether to use fastsafetensor load weight (0 or 1)
|
||||
"FD_USE_FASTSAFETENSOR": lambda: bool(int(os.getenv("FD_USE_FASTSAFETENSOR", "0"))),
|
||||
# Whether to use DeepGemm for FP8 blockwise MoE.
|
||||
"FD_USE_DEEP_GEMM": lambda: bool(int(os.getenv("FD_USE_DEEP_GEMM", "1"))),
|
||||
"FD_USE_DEEP_GEMM": lambda: bool(int(os.getenv("FD_USE_DEEP_GEMM", "0"))),
|
||||
# Whether to use aggregate send.
|
||||
"FD_USE_AGGREGATE_SEND": lambda: bool(int(os.getenv("FD_USE_AGGREGATE_SEND", "0"))),
|
||||
# Whether to open Trace.
|
||||
|
Reference in New Issue
Block a user