[Feature] block_wise_fp8 support triton_moe_backend (#2767)

2025-12-24 13:28:13 +08:00 · 2025-07-09 19:22:47 +08:00
parent e3768c5a83
commit 888780ffde
5 changed files with 248 additions and 10 deletions
--- a/docs/zh/usage/environment_variables.md
+++ b/docs/zh/usage/environment_variables.md
@@ -1,5 +1,6 @@
 # FastDeploy 环境变量说明
 FastDeploy 的环境变量保存在了代码库根目录下 fastdeploy/envs.py 文件中，以下是其对应的中文版说明：
+
 ```python
 environment_variables: dict[str, Callable[[], Any]] = {
    # 构建 FastDeploy 时使用的 CUDA 架构版本，这是一个字符串列表，例如[80,90]
@@ -65,6 +66,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
    # 是否从单机 PD 分离转换为集中式推理
    "FD_PD_CHANGEABLE":
    lambda: os.getenv("FD_PD_CHANGEABLE", "1"),
-  
+
+    # 是否使用DeepGemm后端的FP8 blockwise MoE.
+    "FD_USE_DEEP_GEMM":
+    lambda: bool(int(os.getenv("FD_USE_DEEP_GEMM", "1"))),
+
 }
-```
+```