mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 16:22:57 +08:00
Fix w4a8 scale load (#3334)
* fix_eplb * fix eplb part3 * support_fp8_rope3d * fix w4a8 scale
This commit is contained in:
@@ -361,7 +361,7 @@ class CutlassW4A8MoEMethod(CutlassMoEMethod):
|
|||||||
if layer.ep_size > 1:
|
if layer.ep_size > 1:
|
||||||
for expert_idx in ep_rank_to_expert_id_list:
|
for expert_idx in ep_rank_to_expert_id_list:
|
||||||
scale_tensor = get_tensor(
|
scale_tensor = get_tensor(
|
||||||
get_tensor(
|
(
|
||||||
state_dict[up_gate_proj_expert_in_scale_key.format(expert_idx)]
|
state_dict[up_gate_proj_expert_in_scale_key.format(expert_idx)]
|
||||||
if up_gate_proj_expert_in_scale_key.format(expert_idx) in state_dict
|
if up_gate_proj_expert_in_scale_key.format(expert_idx) in state_dict
|
||||||
else up_gate_proj_expert_in_scale_key.format(expert_idx)
|
else up_gate_proj_expert_in_scale_key.format(expert_idx)
|
||||||
|
Reference in New Issue
Block a user