[Feature] block_wise_fp8 support triton_moe_backend (#2767)

2025-09-26 20:41:53 +08:00 · 2025-07-09 19:22:47 +08:00
parent e3768c5a83
commit 888780ffde
5 changed files with 248 additions and 10 deletions
--- a/fastdeploy/envs.py
+++ b/fastdeploy/envs.py
@@ -97,6 +97,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
    # Whether to use fastsafetensor load weight (0 or 1)
    "FD_USE_FASTSAFETENSOR":
    lambda: os.getenv("FD_USE_FASTSAFETENSOR", "0"),
+
+    # Whether to use DeepGemm for FP8 blockwise MoE.
+    "FD_USE_DEEP_GEMM":
+    lambda: bool(int(os.getenv("FD_USE_DEEP_GEMM", "1"))),
 }