From bfeb664ab8573c2ac2d257a0b0e9a6def180f9d0 Mon Sep 17 00:00:00 2001 From: bukejiyu <52310069+bukejiyu@users.noreply.github.com> Date: Thu, 24 Jul 2025 00:16:42 +0800 Subject: [PATCH] update (#2978) --- fastdeploy/model_executor/layers/moe/moe.py | 44 ++++++++++++++++--- fastdeploy/model_executor/layers/utils.py | 27 ++---------- .../model_executor/load_weight_utils.py | 14 ++++++ 3 files changed, 57 insertions(+), 28 deletions(-) diff --git a/fastdeploy/model_executor/layers/moe/moe.py b/fastdeploy/model_executor/layers/moe/moe.py index 6ea31642f..b573ccb0d 100644 --- a/fastdeploy/model_executor/layers/moe/moe.py +++ b/fastdeploy/model_executor/layers/moe/moe.py @@ -228,19 +228,53 @@ class FusedMoE(nn.Layer): if is_ffn_merged: for i in range(self.num_local_experts): expert_idx = self.expert_id_offset + i + down_proj_expert_weight_key_name = down_proj_expert_weight_key.format(expert_idx) + up_gate_proj_expert_weight_key_name = up_gate_proj_expert_weight_key.format(expert_idx) up_gate_proj_weights.append( - get_tensor(state_dict.pop(up_gate_proj_expert_weight_key.format(expert_idx))) + get_tensor( + state_dict.pop(up_gate_proj_expert_weight_key_name) + if up_gate_proj_expert_weight_key_name in state_dict + else up_gate_proj_expert_weight_key_name, + self.fd_config.parallel_config.model_name_or_path, + ) + ) + down_proj_weights.append( + get_tensor( + state_dict.pop(down_proj_expert_weight_key_name) + if down_proj_expert_weight_key_name in state_dict + else down_proj_expert_weight_key_name, + self.fd_config.parallel_config.model_name_or_path, + ) ) - down_proj_weights.append(get_tensor(state_dict.pop(down_proj_expert_weight_key.format(expert_idx)))) else: gate_expert_weight_key = up_gate_proj_expert_weight_key.replace("up_gate_proj", "gate_proj") up_expert_weight_key = up_gate_proj_expert_weight_key.replace("up_gate_proj", "up_proj") for j in range(self.num_local_experts): expert_idx = self.expert_id_offset + j - gate = get_tensor(state_dict.pop(gate_expert_weight_key.format(expert_idx))) - up = get_tensor(state_dict.pop(up_expert_weight_key.format(expert_idx))) + gate_expert_weight_key_name = gate_expert_weight_key.format(expert_idx) + up_expert_weight_key_name = up_expert_weight_key.format(expert_idx) + down_proj_expert_weight_key_name = down_proj_expert_weight_key.format(expert_idx) + gate = get_tensor( + state_dict.pop(gate_expert_weight_key_name) + if gate_expert_weight_key_name in state_dict + else gate_expert_weight_key_name, + self.fd_config.parallel_config.model_name_or_path, + ) + up = get_tensor( + state_dict.pop(up_expert_weight_key_name) + if up_expert_weight_key_name in state_dict + else up_expert_weight_key_name, + self.fd_config.parallel_config.model_name_or_path, + ) up_gate_proj_weights.append(paddle.concat([gate, up], axis=-1)) - down_proj_weights.append(get_tensor(state_dict.pop(down_proj_expert_weight_key.format(expert_idx)))) + down_proj_weights.append( + get_tensor( + state_dict.pop(down_proj_expert_weight_key_name) + if down_proj_expert_weight_key_name in state_dict + else down_proj_expert_weight_key_name, + self.fd_config.parallel_config.model_name_or_path, + ) + ) return up_gate_proj_weights, down_proj_weights def extract_moe_ffn_weights(self, state_dict: dict): diff --git a/fastdeploy/model_executor/layers/utils.py b/fastdeploy/model_executor/layers/utils.py index fa057965f..75171982f 100644 --- a/fastdeploy/model_executor/layers/utils.py +++ b/fastdeploy/model_executor/layers/utils.py @@ -37,7 +37,6 @@ if current_platform.is_cuda() and current_platform.available(): "And ensure the Paddle version supports FastDeploy's custom operators" ) -import re from fastdeploy import envs @@ -107,7 +106,7 @@ def _set_var_distributed(var: Tensor, split_axis: int): main_block._find_var_recursive(var.name).is_distributed = True -def get_tensor(input: Union[paddle.Tensor, np.ndarray, str]) -> paddle.Tensor: +def get_tensor(input: Union[paddle.Tensor, np.ndarray, str], model_path=None) -> paddle.Tensor: """ Return a corresponding PaddlePaddle tensor based on the type and content of the input. @@ -125,28 +124,9 @@ def get_tensor(input: Union[paddle.Tensor, np.ndarray, str]) -> paddle.Tensor: elif isinstance(input, np.ndarray): return paddle.to_tensor(input) elif isinstance(input, str): - if ".safetensors" in input: - match = re.match(r"\[(.*?)\](.*)", input) - if match: - key_name = match.group(1) - model_path = match.group(2) - from safetensors import safe_open + from fastdeploy.model_executor.load_weight_utils import load_reordered_experts - with safe_open(model_path, framework="np", device="cpu") as f: - if key_name in f.keys(): - weight = f.get_tensor(key_name) - weight = paddle.Tensor(weight, zero_copy=True) - weight = weight._copy_to(paddle.framework._current_expected_place(), False) - return weight - else: - return None - else: - if cache_params != "none": - tmp_key = input.split("/")[-1] - if tmp_key in c8_state_dict: - print(f"Loading {tmp_key} in extra C8_state_dict") - return paddle.to_tensor(c8_state_dict.pop(tmp_key)) - return paddle.load(input) + return load_reordered_experts(model_path, input) else: return input @@ -377,6 +357,7 @@ def create_and_set_parameter(layer: nn.Layer, name: str, tensor: paddle.Tensor): ) getattr(layer, name).set_value(tensor) + @functools.cache def create_empty_tensor(shape: Tuple[int, ...], dtype: Union[paddle.dtype, str]) -> paddle.Tensor: """ diff --git a/fastdeploy/model_executor/load_weight_utils.py b/fastdeploy/model_executor/load_weight_utils.py index f5ed5543e..f04dc9b81 100644 --- a/fastdeploy/model_executor/load_weight_utils.py +++ b/fastdeploy/model_executor/load_weight_utils.py @@ -32,6 +32,20 @@ from fastdeploy.model_executor.models.tp_utils import ( from fastdeploy.platforms import current_platform +def load_reordered_experts(model_path: str, key_name: str): + from safetensors import safe_open + + with open(os.path.join(model_path, "model.safetensors.index.json"), "r") as f: + weight_list = json.load(f)["weight_map"] + safetensor_path = os.path.join(model_path, weight_list[key_name]) + with safe_open(safetensor_path, framework="np", device="cpu") as f: + if key_name in f.keys(): + weight = f.get_tensor(key_name) + weight = paddle.Tensor(weight, zero_copy=True) + weight = weight._copy_to(paddle.framework._current_expected_place(), False) + return weight + + def load_ep_checkpoint(model_path: str, fd_config: FDConfig, return_numpy: bool = False): """ load ep checkpoint