From 85090ed79984ff444ebf95d6641abb962f84d158 Mon Sep 17 00:00:00 2001 From: RichardWooSJTU <37864677+RichardWooSJTU@users.noreply.github.com> Date: Tue, 1 Jul 2025 10:18:25 +0800 Subject: [PATCH] remove unuseful scripts (#2652) --- scripts/convert_ep_state_from_ep8.py | 59 ---- scripts/convert_ep_state_from_ep8.sh | 6 - scripts/convert_ep_state_from_tp8.py | 86 ------ scripts/convert_ep_state_from_tp8.sh | 3 - scripts/convert_ep_to_safetensor.py | 252 ------------------ .../run_prediction_ep_decoder_multi_node.sh | 22 -- ...n_prediction_ep_decoder_multi_node_perf.sh | 28 -- ..._prediction_ep_decoder_single_node_perf.sh | 22 -- scripts/run_prediction_ep_prefill_perf.sh | 19 -- 9 files changed, 497 deletions(-) delete mode 100644 scripts/convert_ep_state_from_ep8.py delete mode 100644 scripts/convert_ep_state_from_ep8.sh delete mode 100644 scripts/convert_ep_state_from_tp8.py delete mode 100644 scripts/convert_ep_state_from_tp8.sh delete mode 100644 scripts/convert_ep_to_safetensor.py delete mode 100644 scripts/run_prediction_ep_decoder_multi_node.sh delete mode 100644 scripts/run_prediction_ep_decoder_multi_node_perf.sh delete mode 100644 scripts/run_prediction_ep_decoder_single_node_perf.sh delete mode 100644 scripts/run_prediction_ep_prefill_perf.sh diff --git a/scripts/convert_ep_state_from_ep8.py b/scripts/convert_ep_state_from_ep8.py deleted file mode 100644 index 66452e8f9..000000000 --- a/scripts/convert_ep_state_from_ep8.py +++ /dev/null @@ -1,59 +0,0 @@ -""" -# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" - -import paddle -import paddle.distributed as dist -from glob import glob -import os - -import argparse - -parser = argparse.ArgumentParser() -parser.add_argument("--model_dir", type=str, required=True) -args = parser.parse_args() - -rank = dist.get_rank() -ep_num = dist.get_world_size() -print("rank: ", rank) -# merge tpn -> tp1 -model_dir = args.model_dir -save_merged_pp_dir = os.path.join(model_dir, "merged_tp1_state_split") -os.makedirs(save_merged_pp_dir, exist_ok=True) - -model_path_pp = glob(os.path.join(model_dir, "shangxianv1_ep_hadamard_quantmodel_to_eval_pp*")) -for p in model_path_pp: - model_path_ep = os.path.join(p, f"model_state.ep0{rank}.pdparams") - print(p, model_path_ep) - - state_dicts = paddle.load(model_path_ep, return_numpy=True) - - print("merge ep") - print("p: ", p) - for k, v in state_dicts.items(): - v = paddle.to_tensor(v) - if "mlp.experts" in k: - k_list = k.split(".") - export_id = rank * ep_num + int(k_list[5]) - k_list[5] = str(export_id) - k = ".".join(k_list) - print(f"key: {k}") - save_split_path = os.path.join(save_merged_pp_dir, k) - paddle.save(v, save_split_path) - elif rank == 0: - save_split_path = os.path.join(save_merged_pp_dir, k) - paddle.save(paddle.to_tensor(v), save_split_path) - print(f"merge {p} end") -print("merge end") diff --git a/scripts/convert_ep_state_from_ep8.sh b/scripts/convert_ep_state_from_ep8.sh deleted file mode 100644 index 0639c14f7..000000000 --- a/scripts/convert_ep_state_from_ep8.sh +++ /dev/null @@ -1,6 +0,0 @@ - -export devices=0,1,2,3,4,5,6,7 - - -python -m paddle.distributed.launch --gpus ${devices} convert_ep_state_from_ep8.py --model_dir /path/to/model_dir - diff --git a/scripts/convert_ep_state_from_tp8.py b/scripts/convert_ep_state_from_tp8.py deleted file mode 100644 index 69bb27d0d..000000000 --- a/scripts/convert_ep_state_from_tp8.py +++ /dev/null @@ -1,86 +0,0 @@ -""" -# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" - -import paddle -import paddle.distributed as dist -import pdb -from glob import glob -import os -import numpy as np - -import argparse - -parser = argparse.ArgumentParser() -parser.add_argument("--model_dir", type=str, required=True) -args = parser.parse_args() - -rank = dist.get_rank() -print("rank: ", rank) -# merge tpn -> tp1 -model_dir = args.model_dir -model_path_pp = glob(os.path.join(model_dir, f"pp{rank}")) -model_path_pp_tp = [] -for p in model_path_pp: - model_path_tp = glob(os.path.join(p, "model_state*")) - model_path_tp = sorted(model_path_tp) - save_merged_pp_path = os.path.join(p, "merged_tp1_state.pdparams") - save_merged_pp_dir = os.path.join(model_dir, "merged_tp1_state_split") - os.makedirs(save_merged_pp_dir, exist_ok=True) - print(p, model_path_tp) - - state_dicts = [paddle.load(path, return_numpy=True) for path in model_path_tp] - state = state_dicts[0] - - print("merge tp") - print("p: ", p) - for k, v in state.items(): - save_split_path = os.path.join(save_merged_pp_dir, k) - state_now = [] - for i in range(len(state_dicts)): - state_now.append(state_dicts[i][k]) - print("k: ", k, ", v.shape: ", v.shape) - if "qkv_proj" in k: - """not need prmt""" - # qkv not prmt - ori_q = [s[:, :1024] for s in state_now] - ori_k = [s[:, 1024:1152] for s in state_now] - ori_v = [s[:, 1152:] for s in state_now] - new_q = np.concatenate(ori_q, axis=1) - new_k = np.concatenate(ori_k, axis=1) - new_v = np.concatenate(ori_v, axis=1) - print(new_q.shape) - print(new_k.shape) - print(new_v.shape) - new_w = np.concatenate([new_q, new_k, new_v], axis=1) - # new_w = np.concatenate(state_now, axis=1) - elif "o_proj" in k or "down_proj" in k: - new_w = np.concatenate(state_now, axis=0) - elif "embed_tokens" in k: - new_w = np.concatenate(state_now, axis=0) - elif "up_gate_proj" in k: - dim = state_now[0].shape[1] - half_ffn1_1 = [s[:, :(dim // 2)] for s in state_now] - half_ffn1_2 = [s[:, (dim // 2):] for s in state_now] - new_ffn1_1 = np.concatenate(half_ffn1_1, axis=1) - new_ffn1_2 = np.concatenate(half_ffn1_2, axis=1) - new_w = np.concatenate([new_ffn1_1, new_ffn1_2], axis=1) - elif "lm_head" in k or "mtp_linear_proj" in k: - new_w = np.concatenate(state_now, axis=1) - else: - new_w = v - print("merged_shape: ", new_w.shape) - paddle.save(paddle.to_tensor(new_w), save_split_path) - print("merge end") \ No newline at end of file diff --git a/scripts/convert_ep_state_from_tp8.sh b/scripts/convert_ep_state_from_tp8.sh deleted file mode 100644 index e3f36dae2..000000000 --- a/scripts/convert_ep_state_from_tp8.sh +++ /dev/null @@ -1,3 +0,0 @@ -export devices=0,1,2,3,4,5,6,7 - -python -m paddle.distributed.launch --gpus ${devices} convert_ep_state_from_tp8.py --model_dir /path/to/model_dir \ No newline at end of file diff --git a/scripts/convert_ep_to_safetensor.py b/scripts/convert_ep_to_safetensor.py deleted file mode 100644 index a6dcb2837..000000000 --- a/scripts/convert_ep_to_safetensor.py +++ /dev/null @@ -1,252 +0,0 @@ -""" -# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" - -import paddle -import os -from paddlenlp.trainer import strtobool -from efficientllm.models.utils import load_checkpoint -from efficientllm.inference_args import InferenceArgs -from paddlenlp.utils.log import logger -from efficientllm.models.configuration import ErnieBotConfig -from efficientllm.models.tokenizer import ErnieBotTokenizer -from safetensors.numpy import save_file as safe_save_file -from paddlenlp.utils.env import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME -import shutil -import argparse -import importlib -import json -from paddlenlp.transformers.model_utils import shard_checkpoint - -MODEL_LIB_NAMES = [ - "efficientllm.models.modeling_ernie_bot", -] - - -def parse_arguments(): - """ - parse_arguments - """ - parser = argparse.ArgumentParser() - parser.add_argument( - "--model_name_or_path", - default=None, - required=True, - help="The directory of model.", - ) - parser.add_argument( - "--output_dir", - default="merged_output", - required=True, - help="The directory of merged model output.", - ) - parser.add_argument( - "--safe_serialization", - type=strtobool, - default="True", - help="Whether merge the model into safetensors format.", - ) - parser.add_argument( - "--predict_model_type", - type=str, - default="", - help="Quantization type for the model.", - ) - - parser.add_argument( - "--draft_type", - type=str, - default=None, - choices=["autoregressive", "inference_with_reference", "hydra", "mtp"], - help="Quantization type for the model.", - ) - - parser.add_argument( - "--moe_quant_type", - default="default", - type=str, - choices=["weight_only_int4", "weight_only_int8", "w4a8", "fp8", "default"], - help="quant type for moe part", - ) - - parser.add_argument( - "--use_ep", - type=strtobool, - default="True", - help="Whether merge the model into safetensors format.", - ) - parser.add_argument("--dtype", type=str, default="bfloat16") - return parser.parse_args() - - -def get_model_cls(config): - """ - Get model class from model configuration. - """ - init_class = "ErnieBotFusedModel" - for lib_name in MODEL_LIB_NAMES: - eb_lib = importlib.import_module(lib_name) - if hasattr(eb_lib, init_class): - cls = getattr(eb_lib, init_class) - return cls - - raise RuntimeError(f"Cannot find model architecture({init_class}) from eb_lib") - - -def save_safetensors(state_dict, args): - """ - save_safetensors - """ - logger.info("Move to numpy.") - for k in list(state_dict.keys()): - if isinstance(state_dict[k], paddle.Tensor): - state_dict[k] = state_dict.pop(k).cpu().numpy() - - logger.info("Save safetensors files.") - shards, index = shard_checkpoint( - state_dict, - max_shard_size="5GB", - weights_name=SAFE_WEIGHTS_NAME, - shard_format="naive", - ) - for shard_file, shard in shards.items(): - save_file = os.path.join(args.output_dir, shard_file) - logger.info(f"Saving {save_file}") - safe_save_file(shard, save_file, metadata={"format": "np"}) - - save_index_file = os.path.join(args.output_dir, SAFE_WEIGHTS_INDEX_NAME) - with open(save_index_file, "w", encoding="utf-8") as f: - content = json.dumps(index, indent=2) + "\n" - f.write(content) - - -def quanted_tensor(cls, state_dict, config): - """ - quanted_tensor - """ - name_action_mappings = cls._get_tensor_quantization_mappings(config) - state_keys_map = cls._resolve_prefix_keys( - name_action_mappings.keys(), state_dict.keys() - ) - for k, v in state_keys_map.items(): - name_action_mappings[v] = name_action_mappings.pop(k) - state_dict_to_save = {} - from efficientllm.layers.utils import get_tensor - from tqdm import tqdm - for key in tqdm(state_dict.keys(), desc="process quantized weights "): - tensor_path = state_dict[key] - if key in name_action_mappings: - ret = state_dict[key] - action = name_action_mappings.pop(key) - quanted_weight_tensor, weight_scale_tensor = action(get_tensor(ret)) - if quanted_weight_tensor._is_initialized(): - state_dict_to_save[key + ".quant_weight"] = quanted_weight_tensor.cpu() - if weight_scale_tensor._is_initialized(): - state_dict_to_save[key + ".quant_scale"] = weight_scale_tensor.cpu() - else: - state_dict_to_save[key] = quanted_weight_tensor.cpu() - else: - state_dict_to_save[key] = get_tensor(tensor_path).cpu() - - if len(name_action_mappings) > 0: - for x in name_action_mappings.keys(): - logger.debug( - f"key <{x}> need to merge tensor parallel but we can't find in model state." - ) - return state_dict_to_save - - -def get_quant_type(args): - """ - get_quant_type - """ - quant_type = args.predict_model_type.lower() - if quant_type == "default": - quant_type = "" - moe_quant_type = args.moe_quant_type.lower() - if moe_quant_type == "default": - moe_quant_type = "" - paddle.set_default_dtype(args.dtype) - offline_args = InferenceArgs( - quant_type=quant_type, - num_layers=1, - num_attention_heads=1, - num_key_value_heads=1, - hidden_size=1, - ffn_hidden_size=1, - mp_rank=1, - mp_size=1, - ) - weight_dtype, act_dtype, cachekv_dtype = ( - offline_args.weight_dtype, - offline_args.act_dtype, - offline_args.cachekv_dtype, - ) - return weight_dtype, act_dtype, cachekv_dtype, quant_type, moe_quant_type - - -def main(): - """ - main - """ - args = parse_arguments() - tokenizer = ErnieBotTokenizer.from_pretrained(args.model_name_or_path) - config = ErnieBotConfig.from_pretrained(args.model_name_or_path) - ( - config.weight_dtype, - config.act_dtype, - config.cachekv_dtype, - config.quant_type, - config.moe_quant_type, - ) = get_quant_type(args) - config.is_mtp = args.draft_type in ["eagle", "mtp"] - config.use_ep = args.use_ep - cls = get_model_cls(config) - # load - state_dict = load_checkpoint( - args.model_name_or_path, cls, config, return_numpy=True - ) - import time - - start = time.perf_counter() - state_dict_to_save = quanted_tensor(cls=cls, state_dict=state_dict, config=config) - end = time.perf_counter() - logger.info("Finish Quantize.") - logger.info(f"load和量化耗时: {end - start:.6f} 秒") - - logger.info("Begin to save model") - os.makedirs(args.output_dir, exist_ok=True) - start = time.perf_counter() - if not args.safe_serialization: - paddle.save( - state_dict_to_save, - os.path.join(args.output_dir, "model_state.pdparams"), - ) - else: - save_safetensors(state_dict_to_save, args) - - config.save_pretrained(args.output_dir) - tokenizer.save_pretrained(args.output_dir) - if config.moe_quant_type == "w4a8": - # cp act_scales.json - shutil.copy(args.model_name_or_path + '/act_scales.json', args.output_dir) - shutil.copy(args.model_name_or_path + '/weight_scales.json', args.output_dir) - end = time.perf_counter() - logger.info(f"save耗时: {end - start:.6f} 秒") - logger.info("Finish.") - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/scripts/run_prediction_ep_decoder_multi_node.sh b/scripts/run_prediction_ep_decoder_multi_node.sh deleted file mode 100644 index d327ae5fd..000000000 --- a/scripts/run_prediction_ep_decoder_multi_node.sh +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -# export IP_LIST='10.95.244.83,10.95.244.82' -export IP_LIST='10.95.244.83,10.95.244.82,10.95.246.141,10.95.246.145' -# export IP_LIST='10.95.244.83,10.95.244.82,10.95.246.141,10.95.246.145,10.95.246.162,10.95.247.31,10.95.247.39,10.95.246.158' - -mpirun \ ---host $IP_LIST \ -bash run_prediction_ep_decoder.sh ${1} ${2} ${BATCH_SIZE:-1} ${USE_MICRO_BATCH:-"False"} $IP_LIST diff --git a/scripts/run_prediction_ep_decoder_multi_node_perf.sh b/scripts/run_prediction_ep_decoder_multi_node_perf.sh deleted file mode 100644 index ef83de95f..000000000 --- a/scripts/run_prediction_ep_decoder_multi_node_perf.sh +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -# export IP_LIST='10.95.244.83,10.95.244.82' -export IP_LIST='10.95.244.83,10.95.244.82,10.95.246.141,10.95.246.145' -# export IP_LIST='10.95.244.83,10.95.244.82,10.95.246.141,10.95.246.145,10.95.246.162,10.95.247.31,10.95.247.39,10.95.246.158' - -export EP_DECODER_PERF_TEST=True -export USE_CACHE_KV_INT8=True -export MAX_SEQ_LEN=5000 -export MAX_DEC_LEN=64 - -mpirun \ --x EP_DECODER_PERF_TEST -x USE_CACHE_KV_INT8 -x MAX_SEQ_LEN -x MAX_DEC_LEN \ ---host $IP_LIST \ -bash run_prediction_ep_decoder.sh ${1} ${2} ${BATCH_SIZE:-92} ${USE_MICRO_BATCH:-"False"} $IP_LIST diff --git a/scripts/run_prediction_ep_decoder_single_node_perf.sh b/scripts/run_prediction_ep_decoder_single_node_perf.sh deleted file mode 100644 index 269266def..000000000 --- a/scripts/run_prediction_ep_decoder_single_node_perf.sh +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - - -export EP_DECODER_PERF_TEST=True -export USE_CACHE_KV_INT8=True -export MAX_SEQ_LEN=5000 -export MAX_DEC_LEN=64 - -bash run_prediction_ep_decoder.sh ${1} 1 ${BATCH_SIZE:-52} ${USE_MICRO_BATCH:-"False"} diff --git a/scripts/run_prediction_ep_prefill_perf.sh b/scripts/run_prediction_ep_prefill_perf.sh deleted file mode 100644 index 8ff656622..000000000 --- a/scripts/run_prediction_ep_prefill_perf.sh +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -export EP_PREFILL_PERF_TEST=True -export MAX_DEC_LEN=1 -export CKPT_PATH=${1:-$CKPT_PATH} - -bash run_prediction_ep_prefill.sh ${CKPT_PATH} \ No newline at end of file