mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 08:16:42 +08:00
Fix w4a8 scale load (#3334)
* fix_eplb * fix eplb part3 * support_fp8_rope3d * fix w4a8 scale
This commit is contained in:
@@ -361,7 +361,7 @@ class CutlassW4A8MoEMethod(CutlassMoEMethod):
|
||||
if layer.ep_size > 1:
|
||||
for expert_idx in ep_rank_to_expert_id_list:
|
||||
scale_tensor = get_tensor(
|
||||
get_tensor(
|
||||
(
|
||||
state_dict[up_gate_proj_expert_in_scale_key.format(expert_idx)]
|
||||
if up_gate_proj_expert_in_scale_key.format(expert_idx) in state_dict
|
||||
else up_gate_proj_expert_in_scale_key.format(expert_idx)
|
||||
|
Reference in New Issue
Block a user