support W4A8 EPLB (#3075)

This commit is contained in:
Yuan Xiaolan
2025-07-30 14:34:12 +08:00
committed by GitHub
parent 159767717d
commit 35935da9e5
5 changed files with 14 additions and 11 deletions

View File

@@ -276,7 +276,7 @@ class GCUWeightOnlyMoEMethod(GCUFusedMoeMethod):
up_gate_proj_expert_weight_scale_key = layer.weight_key_map.get("up_gate_proj_expert_weight_scale_key", None)
down_proj_expert_weight_scale_key = layer.weight_key_map.get("down_proj_expert_weight_scale_key", None)
up_gate_proj_weights, down_proj_weights, _ = layer.load_experts_weight(
up_gate_proj_weights, down_proj_weights, _, _ = layer.load_experts_weight(
state_dict,
up_gate_proj_expert_weight_key,
down_proj_expert_weight_key,