[fix] w4a8 model loading and hadamard config (#3013)

2025-09-30 22:32:30 +08:00 · 2025-07-28 18:17:59 +08:00
parent c8bf8b3913
commit b1d787a272
4 changed files with 32 additions and 4 deletions
--- a/fastdeploy/engine/engine.py
+++ b/fastdeploy/engine/engine.py
@@ -1000,7 +1000,10 @@ class LLMEngine:
            "FLAGS_use_append_attn": 1,
            "NCCL_ALGO": "Ring",
            "FLAGS_max_partition_size": int(os.getenv("FLAGS_max_partition_size", 32768)),
-            "FLAGS_hardamard_moe_block_size": 128,
+            "FLAGS_hardamard_moe_block_size": int(os.getenv("FLAGS_hardamard_moe_block_size", 128)),
+            "FLAGS_hardamard_use_diagonal_block_matrix": int(
+                os.getenv("FLAGS_hardamard_use_diagonal_block_matrix", 0)
+            ),
        }
        # environment variables needed by Dy2St
        variables.update(