[fix] w4a8 model loading and hadamard config (#3013)

This commit is contained in:
Yuan Xiaolan
2025-07-28 18:17:59 +08:00
committed by GitHub
parent c8bf8b3913
commit b1d787a272
4 changed files with 32 additions and 4 deletions

View File

@@ -1000,7 +1000,10 @@ class LLMEngine:
"FLAGS_use_append_attn": 1,
"NCCL_ALGO": "Ring",
"FLAGS_max_partition_size": int(os.getenv("FLAGS_max_partition_size", 32768)),
"FLAGS_hardamard_moe_block_size": 128,
"FLAGS_hardamard_moe_block_size": int(os.getenv("FLAGS_hardamard_moe_block_size", 128)),
"FLAGS_hardamard_use_diagonal_block_matrix": int(
os.getenv("FLAGS_hardamard_use_diagonal_block_matrix", 0)
),
}
# environment variables needed by Dy2St
variables.update(