[Feature] block_wise_fp8 support triton_moe_backend (#2767)

This commit is contained in:
chen
2025-07-09 19:22:47 +08:00
committed by GitHub
parent e3768c5a83
commit 888780ffde
5 changed files with 248 additions and 10 deletions

View File

@@ -67,6 +67,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
# Switch from standalone PD to centralized inference (0 or 1)
"FD_PD_CHANGEABLE":
lambda: os.getenv("FD_PD_CHANGEABLE", "1"),
# Whether to use DeepGemm for FP8 blockwise MoE.
"FD_USE_DEEP_GEMM":
lambda: bool(int(os.getenv("FD_USE_DEEP_GEMM", "1"))),
}
```
```