mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
This commit is contained in:
@@ -228,19 +228,53 @@ class FusedMoE(nn.Layer):
|
|||||||
if is_ffn_merged:
|
if is_ffn_merged:
|
||||||
for i in range(self.num_local_experts):
|
for i in range(self.num_local_experts):
|
||||||
expert_idx = self.expert_id_offset + i
|
expert_idx = self.expert_id_offset + i
|
||||||
|
down_proj_expert_weight_key_name = down_proj_expert_weight_key.format(expert_idx)
|
||||||
|
up_gate_proj_expert_weight_key_name = up_gate_proj_expert_weight_key.format(expert_idx)
|
||||||
up_gate_proj_weights.append(
|
up_gate_proj_weights.append(
|
||||||
get_tensor(state_dict.pop(up_gate_proj_expert_weight_key.format(expert_idx)))
|
get_tensor(
|
||||||
|
state_dict.pop(up_gate_proj_expert_weight_key_name)
|
||||||
|
if up_gate_proj_expert_weight_key_name in state_dict
|
||||||
|
else up_gate_proj_expert_weight_key_name,
|
||||||
|
self.fd_config.parallel_config.model_name_or_path,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
down_proj_weights.append(
|
||||||
|
get_tensor(
|
||||||
|
state_dict.pop(down_proj_expert_weight_key_name)
|
||||||
|
if down_proj_expert_weight_key_name in state_dict
|
||||||
|
else down_proj_expert_weight_key_name,
|
||||||
|
self.fd_config.parallel_config.model_name_or_path,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
down_proj_weights.append(get_tensor(state_dict.pop(down_proj_expert_weight_key.format(expert_idx))))
|
|
||||||
else:
|
else:
|
||||||
gate_expert_weight_key = up_gate_proj_expert_weight_key.replace("up_gate_proj", "gate_proj")
|
gate_expert_weight_key = up_gate_proj_expert_weight_key.replace("up_gate_proj", "gate_proj")
|
||||||
up_expert_weight_key = up_gate_proj_expert_weight_key.replace("up_gate_proj", "up_proj")
|
up_expert_weight_key = up_gate_proj_expert_weight_key.replace("up_gate_proj", "up_proj")
|
||||||
for j in range(self.num_local_experts):
|
for j in range(self.num_local_experts):
|
||||||
expert_idx = self.expert_id_offset + j
|
expert_idx = self.expert_id_offset + j
|
||||||
gate = get_tensor(state_dict.pop(gate_expert_weight_key.format(expert_idx)))
|
gate_expert_weight_key_name = gate_expert_weight_key.format(expert_idx)
|
||||||
up = get_tensor(state_dict.pop(up_expert_weight_key.format(expert_idx)))
|
up_expert_weight_key_name = up_expert_weight_key.format(expert_idx)
|
||||||
|
down_proj_expert_weight_key_name = down_proj_expert_weight_key.format(expert_idx)
|
||||||
|
gate = get_tensor(
|
||||||
|
state_dict.pop(gate_expert_weight_key_name)
|
||||||
|
if gate_expert_weight_key_name in state_dict
|
||||||
|
else gate_expert_weight_key_name,
|
||||||
|
self.fd_config.parallel_config.model_name_or_path,
|
||||||
|
)
|
||||||
|
up = get_tensor(
|
||||||
|
state_dict.pop(up_expert_weight_key_name)
|
||||||
|
if up_expert_weight_key_name in state_dict
|
||||||
|
else up_expert_weight_key_name,
|
||||||
|
self.fd_config.parallel_config.model_name_or_path,
|
||||||
|
)
|
||||||
up_gate_proj_weights.append(paddle.concat([gate, up], axis=-1))
|
up_gate_proj_weights.append(paddle.concat([gate, up], axis=-1))
|
||||||
down_proj_weights.append(get_tensor(state_dict.pop(down_proj_expert_weight_key.format(expert_idx))))
|
down_proj_weights.append(
|
||||||
|
get_tensor(
|
||||||
|
state_dict.pop(down_proj_expert_weight_key_name)
|
||||||
|
if down_proj_expert_weight_key_name in state_dict
|
||||||
|
else down_proj_expert_weight_key_name,
|
||||||
|
self.fd_config.parallel_config.model_name_or_path,
|
||||||
|
)
|
||||||
|
)
|
||||||
return up_gate_proj_weights, down_proj_weights
|
return up_gate_proj_weights, down_proj_weights
|
||||||
|
|
||||||
def extract_moe_ffn_weights(self, state_dict: dict):
|
def extract_moe_ffn_weights(self, state_dict: dict):
|
||||||
|
@@ -37,7 +37,6 @@ if current_platform.is_cuda() and current_platform.available():
|
|||||||
"And ensure the Paddle version supports FastDeploy's custom operators"
|
"And ensure the Paddle version supports FastDeploy's custom operators"
|
||||||
)
|
)
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from fastdeploy import envs
|
from fastdeploy import envs
|
||||||
|
|
||||||
@@ -107,7 +106,7 @@ def _set_var_distributed(var: Tensor, split_axis: int):
|
|||||||
main_block._find_var_recursive(var.name).is_distributed = True
|
main_block._find_var_recursive(var.name).is_distributed = True
|
||||||
|
|
||||||
|
|
||||||
def get_tensor(input: Union[paddle.Tensor, np.ndarray, str]) -> paddle.Tensor:
|
def get_tensor(input: Union[paddle.Tensor, np.ndarray, str], model_path=None) -> paddle.Tensor:
|
||||||
"""
|
"""
|
||||||
Return a corresponding PaddlePaddle tensor based on the type and content of the input.
|
Return a corresponding PaddlePaddle tensor based on the type and content of the input.
|
||||||
|
|
||||||
@@ -125,28 +124,9 @@ def get_tensor(input: Union[paddle.Tensor, np.ndarray, str]) -> paddle.Tensor:
|
|||||||
elif isinstance(input, np.ndarray):
|
elif isinstance(input, np.ndarray):
|
||||||
return paddle.to_tensor(input)
|
return paddle.to_tensor(input)
|
||||||
elif isinstance(input, str):
|
elif isinstance(input, str):
|
||||||
if ".safetensors" in input:
|
from fastdeploy.model_executor.load_weight_utils import load_reordered_experts
|
||||||
match = re.match(r"\[(.*?)\](.*)", input)
|
|
||||||
if match:
|
|
||||||
key_name = match.group(1)
|
|
||||||
model_path = match.group(2)
|
|
||||||
from safetensors import safe_open
|
|
||||||
|
|
||||||
with safe_open(model_path, framework="np", device="cpu") as f:
|
return load_reordered_experts(model_path, input)
|
||||||
if key_name in f.keys():
|
|
||||||
weight = f.get_tensor(key_name)
|
|
||||||
weight = paddle.Tensor(weight, zero_copy=True)
|
|
||||||
weight = weight._copy_to(paddle.framework._current_expected_place(), False)
|
|
||||||
return weight
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
if cache_params != "none":
|
|
||||||
tmp_key = input.split("/")[-1]
|
|
||||||
if tmp_key in c8_state_dict:
|
|
||||||
print(f"Loading {tmp_key} in extra C8_state_dict")
|
|
||||||
return paddle.to_tensor(c8_state_dict.pop(tmp_key))
|
|
||||||
return paddle.load(input)
|
|
||||||
else:
|
else:
|
||||||
return input
|
return input
|
||||||
|
|
||||||
@@ -377,6 +357,7 @@ def create_and_set_parameter(layer: nn.Layer, name: str, tensor: paddle.Tensor):
|
|||||||
)
|
)
|
||||||
getattr(layer, name).set_value(tensor)
|
getattr(layer, name).set_value(tensor)
|
||||||
|
|
||||||
|
|
||||||
@functools.cache
|
@functools.cache
|
||||||
def create_empty_tensor(shape: Tuple[int, ...], dtype: Union[paddle.dtype, str]) -> paddle.Tensor:
|
def create_empty_tensor(shape: Tuple[int, ...], dtype: Union[paddle.dtype, str]) -> paddle.Tensor:
|
||||||
"""
|
"""
|
||||||
|
@@ -32,6 +32,20 @@ from fastdeploy.model_executor.models.tp_utils import (
|
|||||||
from fastdeploy.platforms import current_platform
|
from fastdeploy.platforms import current_platform
|
||||||
|
|
||||||
|
|
||||||
|
def load_reordered_experts(model_path: str, key_name: str):
|
||||||
|
from safetensors import safe_open
|
||||||
|
|
||||||
|
with open(os.path.join(model_path, "model.safetensors.index.json"), "r") as f:
|
||||||
|
weight_list = json.load(f)["weight_map"]
|
||||||
|
safetensor_path = os.path.join(model_path, weight_list[key_name])
|
||||||
|
with safe_open(safetensor_path, framework="np", device="cpu") as f:
|
||||||
|
if key_name in f.keys():
|
||||||
|
weight = f.get_tensor(key_name)
|
||||||
|
weight = paddle.Tensor(weight, zero_copy=True)
|
||||||
|
weight = weight._copy_to(paddle.framework._current_expected_place(), False)
|
||||||
|
return weight
|
||||||
|
|
||||||
|
|
||||||
def load_ep_checkpoint(model_path: str, fd_config: FDConfig, return_numpy: bool = False):
|
def load_ep_checkpoint(model_path: str, fd_config: FDConfig, return_numpy: bool = False):
|
||||||
"""
|
"""
|
||||||
load ep checkpoint
|
load ep checkpoint
|
||||||
|
Reference in New Issue
Block a user