mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-30 11:26:39 +08:00
polish code with new pre-commit rule (#2923)
This commit is contained in:
@@ -13,6 +13,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
|
||||
"""redundant expert manger."""
|
||||
from typing import Optional, Tuple
|
||||
|
||||
@@ -28,8 +29,13 @@ class RedundantExpertManger:
|
||||
RedundantExpertManger
|
||||
"""
|
||||
|
||||
def __init__(self, n_routed_experts: int, num_hidden_layers: int,
|
||||
redundant_experts_num: int, ep_size: int) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
n_routed_experts: int,
|
||||
num_hidden_layers: int,
|
||||
redundant_experts_num: int,
|
||||
ep_size: int,
|
||||
) -> None:
|
||||
"""Initialize a redundant expert manager"""
|
||||
self.num_expert = n_routed_experts
|
||||
self.redundant_experts_num = redundant_experts_num
|
||||
@@ -41,26 +47,33 @@ class RedundantExpertManger:
|
||||
self.num_groups = 1
|
||||
|
||||
self.export_per_rank = self.num_replicas // ep_size
|
||||
assert self.num_replicas % ep_size == 0, \
|
||||
f"num_replicas must be divisible by ep_size, \
|
||||
assert (
|
||||
self.num_replicas % ep_size == 0
|
||||
), f"num_replicas must be divisible by ep_size, \
|
||||
but got num_replicas = {self.num_replicas}, ep_size = {ep_size}"
|
||||
|
||||
self.model_ep_rank_to_expert_id_list = paddle.full(shape=[
|
||||
self.num_hidden_layers,
|
||||
self.num_expert + self.redundant_experts_num
|
||||
],
|
||||
fill_value=-1,
|
||||
dtype="int32")
|
||||
self.model_expert_id_to_ep_rank_array = paddle.full(shape=[
|
||||
self.num_hidden_layers, self.num_expert,
|
||||
self.redundant_experts_num + 1
|
||||
],
|
||||
fill_value=-1,
|
||||
dtype="int32")
|
||||
self.model_ep_rank_to_expert_id_list = paddle.full(
|
||||
shape=[
|
||||
self.num_hidden_layers,
|
||||
self.num_expert + self.redundant_experts_num,
|
||||
],
|
||||
fill_value=-1,
|
||||
dtype="int32",
|
||||
)
|
||||
self.model_expert_id_to_ep_rank_array = paddle.full(
|
||||
shape=[
|
||||
self.num_hidden_layers,
|
||||
self.num_expert,
|
||||
self.redundant_experts_num + 1,
|
||||
],
|
||||
fill_value=-1,
|
||||
dtype="int32",
|
||||
)
|
||||
self.model_expert_in_rank_num_list = paddle.full(
|
||||
shape=[self.num_hidden_layers, self.num_expert],
|
||||
fill_value=0,
|
||||
dtype="int32")
|
||||
dtype="int32",
|
||||
)
|
||||
# self.model_ep_rank_to_expert_id_list = paddle.arange(
|
||||
# self.num_expert + self.redundant_experts_num,
|
||||
# dtype="int32").tile([self.num_hidden_layers, 1])
|
||||
@@ -73,20 +86,18 @@ class RedundantExpertManger:
|
||||
# dtype="int32")
|
||||
|
||||
self.model_tokens_per_expert_stats_list = paddle.ones(
|
||||
shape=[self.num_hidden_layers, self.num_expert], dtype="int32")
|
||||
shape=[self.num_hidden_layers, self.num_expert], dtype="int32"
|
||||
)
|
||||
|
||||
rank_expert_list, \
|
||||
logical_to_physical_map, \
|
||||
expert_count = rebalance_experts(
|
||||
self.model_tokens_per_expert_stats_list.cpu().numpy(),
|
||||
self.num_replicas,
|
||||
self.num_groups,
|
||||
self.num_nodes,
|
||||
self.num_gpus)
|
||||
rank_expert_list, logical_to_physical_map, expert_count = rebalance_experts(
|
||||
self.model_tokens_per_expert_stats_list.cpu().numpy(),
|
||||
self.num_replicas,
|
||||
self.num_groups,
|
||||
self.num_nodes,
|
||||
self.num_gpus,
|
||||
)
|
||||
|
||||
self.update_expert_rank_table(rank_expert_list,
|
||||
logical_to_physical_map, expert_count,
|
||||
False)
|
||||
self.update_expert_rank_table(rank_expert_list, logical_to_physical_map, expert_count, False)
|
||||
|
||||
logger.info(
|
||||
f"moe experts table manager init successfully, ep_size {ep_size} \
|
||||
@@ -99,10 +110,12 @@ class RedundantExpertManger:
|
||||
"""
|
||||
get_ep_rank_to_expert_id_list_by_layer
|
||||
"""
|
||||
return self.model_ep_rank_to_expert_id_list[layer_id], \
|
||||
self.model_expert_id_to_ep_rank_array[layer_id], \
|
||||
self.model_expert_in_rank_num_list[layer_id], \
|
||||
self.model_tokens_per_expert_stats_list[layer_id]
|
||||
return (
|
||||
self.model_ep_rank_to_expert_id_list[layer_id],
|
||||
self.model_expert_id_to_ep_rank_array[layer_id],
|
||||
self.model_expert_in_rank_num_list[layer_id],
|
||||
self.model_tokens_per_expert_stats_list[layer_id],
|
||||
)
|
||||
|
||||
def get_ep_rank_to_expert_id_list(
|
||||
self, layer_id: int
|
||||
@@ -110,28 +123,33 @@ class RedundantExpertManger:
|
||||
"""
|
||||
get_ep_rank_to_expert_id_list
|
||||
"""
|
||||
return self.model_ep_rank_to_expert_id_list[layer_id], \
|
||||
self.model_expert_id_to_ep_rank_array[layer_id], \
|
||||
self.model_expert_in_rank_num_list[layer_id], \
|
||||
self.model_tokens_per_expert_stats_list[layer_id]
|
||||
return (
|
||||
self.model_ep_rank_to_expert_id_list[layer_id],
|
||||
self.model_expert_id_to_ep_rank_array[layer_id],
|
||||
self.model_expert_in_rank_num_list[layer_id],
|
||||
self.model_tokens_per_expert_stats_list[layer_id],
|
||||
)
|
||||
|
||||
def get_expert_tokens_stats(
|
||||
self,
|
||||
verbose: bool = False,
|
||||
clear_stat: bool = False
|
||||
) -> Tuple[np.ndarray, Optional[np.ndarray], Optional[np.ndarray],
|
||||
Optional[np.ndarray]]:
|
||||
self, verbose: bool = False, clear_stat: bool = False
|
||||
) -> Tuple[np.ndarray, Optional[np.ndarray], Optional[np.ndarray], Optional[np.ndarray]]:
|
||||
"""
|
||||
get_per_expert_tokens_stats
|
||||
"""
|
||||
try:
|
||||
if verbose:
|
||||
return self.model_tokens_per_expert_stats_list.cpu().numpy(), \
|
||||
self.model_expert_id_to_ep_rank_array.cpu().numpy(), \
|
||||
self.model_ep_rank_to_expert_id_list.cpu().numpy(), \
|
||||
self.model_expert_in_rank_num_list.cpu().numpy()
|
||||
return self.model_tokens_per_expert_stats_list.cpu().numpy(
|
||||
), None, None, None
|
||||
return (
|
||||
self.model_tokens_per_expert_stats_list.cpu().numpy(),
|
||||
self.model_expert_id_to_ep_rank_array.cpu().numpy(),
|
||||
self.model_ep_rank_to_expert_id_list.cpu().numpy(),
|
||||
self.model_expert_in_rank_num_list.cpu().numpy(),
|
||||
)
|
||||
return (
|
||||
self.model_tokens_per_expert_stats_list.cpu().numpy(),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
)
|
||||
finally:
|
||||
if clear_stat:
|
||||
self.model_tokens_per_expert_stats_list.zero_()
|
||||
@@ -142,27 +160,28 @@ class RedundantExpertManger:
|
||||
"""
|
||||
return self.model_expert_id_to_ep_rank_array.cpu().numpy()
|
||||
|
||||
def update_expert_rank_table(self,
|
||||
rank_expert_list: np.ndarray,
|
||||
logical_to_physical_map: np.ndarray,
|
||||
expert_count: np.ndarray,
|
||||
clear_stat: bool = True) -> None:
|
||||
def update_expert_rank_table(
|
||||
self,
|
||||
rank_expert_list: np.ndarray,
|
||||
logical_to_physical_map: np.ndarray,
|
||||
expert_count: np.ndarray,
|
||||
clear_stat: bool = True,
|
||||
) -> None:
|
||||
"""
|
||||
update_expert_rank_table
|
||||
"""
|
||||
#update model info
|
||||
self.model_ep_rank_to_expert_id_list.copy_(
|
||||
paddle.to_tensor(rank_expert_list), True)
|
||||
# update model info
|
||||
self.model_ep_rank_to_expert_id_list.copy_(paddle.to_tensor(rank_expert_list), True)
|
||||
self.model_expert_id_to_ep_rank_array.fill_(-1)
|
||||
self.model_expert_id_to_ep_rank_array[:, :, :logical_to_physical_map.shape[-1]] = \
|
||||
paddle.to_tensor(logical_to_physical_map)
|
||||
self.model_expert_in_rank_num_list.copy_(
|
||||
paddle.to_tensor(expert_count), True)
|
||||
self.model_expert_id_to_ep_rank_array[:, :, : logical_to_physical_map.shape[-1]] = paddle.to_tensor(
|
||||
logical_to_physical_map
|
||||
)
|
||||
self.model_expert_in_rank_num_list.copy_(paddle.to_tensor(expert_count), True)
|
||||
|
||||
# reset
|
||||
if clear_stat:
|
||||
self.model_tokens_per_expert_stats_list.zero_()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
print(RedundantExpertManger(64, 2, 8, 8).model_expert_id_to_ep_rank_array)
|
||||
|
||||
Reference in New Issue
Block a user