""" # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ import json import os import time import numpy as np from fastdeploy.config import FDConfig from fastdeploy.inter_communicator import IPCSignal class RedundantExpertWorkload: """Redundant Expert Workload""" def __init__(self, redundant_expert_meta_dir="/tmp/redundant_expert_meta"): self.update_timestamp = time.time() self.tokens_per_expert_stats_list = None self.ep_rank_to_expert_id_list = None self.expert_id_to_ep_rank_array = None self.expert_in_rank_num_list = None self.cost_milliseconds = 0 self.meta_file_name = f"{redundant_expert_meta_dir}/rearrange-experts.json" if not os.path.exists(redundant_expert_meta_dir): os.makedirs(redundant_expert_meta_dir, exist_ok=True) def __json__(self): return self.__dict__ def dump(self): """Dump the object to a JSON file.""" begin = time.time() try: with open(self.meta_file_name, "w") as fout: json.dump(self.__dict__, fout) except Exception as e: return f"redundant_expert: dump expert workload failed, {e}" cost_time = int((time.time() - begin) * 1000 * 1000) return f"redundant_expert: dump expert workload result in {cost_time} us" def load(self): """Load the object from a JSON file.""" if not os.path.exists(self.meta_file_name): return {}, f"redundant_expert: file {self.meta_file_name} is not exists" try: with open(self.meta_file_name, "r") as fin: meta = json.load(fin) self.__dict__.update(meta) return self.__json__(), "ok" except Exception as e: return {}, f"redundant_expert: load file {self.meta_file_name} failed, {e}" def init_eplb_signals(config: FDConfig, ipc_signal_suffix): """ Initialize shared memory to indicate eplb status """ if config.parallel_config.local_data_parallel_id == 0: # rearrange_experts_status Record the expert's rearrangement status rearrange_experts_array = np.zeros([1], dtype=np.int32) _ = IPCSignal( name="rearrange_experts_status", array=rearrange_experts_array, dtype=np.int32, suffix=ipc_signal_suffix, create=True, ) # Record all DP rank IPs when receiving expert rearrangement requests rearrange_experts_ips_size_array = np.zeros([1], dtype=np.int32) _ = IPCSignal( name="rearrange_experts_ips_size", array=rearrange_experts_ips_size_array, dtype=np.int32, suffix=ipc_signal_suffix, create=True, ) _ = IPCSignal( name="rearrange_experts_ips_list", shm_size=config.eplb_config.redundant_expert_ip_shm_size, suffix=ipc_signal_suffix, create=True, ) # Receive signals for updating weights signal_update_weight_from_tensor = np.zeros([1], dtype=np.int32) _ = IPCSignal( name="signal_update_weight_from_tensor", array=signal_update_weight_from_tensor, dtype=np.int32, suffix=ipc_signal_suffix, create=True, ) # Record expert workload experts_token_stats = np.zeros( (config.model_config.num_hidden_layers, config.model_config.moe_num_experts), dtype=np.int32, ) _ = IPCSignal( name="all_experts_token_stats", array=experts_token_stats, dtype=np.int32, suffix=ipc_signal_suffix, create=True, ) _ = IPCSignal( name="local_experts_token_stats", array=experts_token_stats, dtype=np.int32, suffix=ipc_signal_suffix, create=True, ) # Receive signals for loading weights signal_update_weight_from_disk = np.zeros([1], dtype=np.int32) _ = IPCSignal( name="signal_update_weight_from_disk", array=signal_update_weight_from_disk, dtype=np.int32, suffix=ipc_signal_suffix, create=True, ) # Receive signals for clearing expert loads clear_experts_token_stats = np.zeros([1], dtype=np.int32) _ = IPCSignal( name="signal_clear_experts_token_stats", array=clear_experts_token_stats, dtype=np.int32, suffix=ipc_signal_suffix, create=True, ) result_update_weight_from_disk = np.zeros([1], dtype=np.int32) _ = IPCSignal( name="result_update_weight_from_disk", array=result_update_weight_from_disk, dtype=np.int32, suffix=ipc_signal_suffix, create=True, ) if __name__ == "__main__": print(RedundantExpertWorkload("/tmp").load())