remove unuseful scripts (#2652)

2025-12-24 13:28:13 +08:00 · 2025-07-01 10:18:25 +08:00
parent 50aa4080c0
commit 85090ed799
9 changed files with 0 additions and 497 deletions
--- a/scripts/convert_ep_state_from_ep8.py
+++ b/scripts/convert_ep_state_from_ep8.py
@@ -1,59 +0,0 @@
-"""
-# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-
-import paddle
-import paddle.distributed as dist
-from glob import glob
-import os
-
-import argparse
-
-parser = argparse.ArgumentParser()
-parser.add_argument("--model_dir", type=str, required=True)
-args = parser.parse_args()
-
-rank = dist.get_rank()
-ep_num = dist.get_world_size()
-print("rank: ", rank)
-# merge tpn -> tp1
-model_dir = args.model_dir
-save_merged_pp_dir = os.path.join(model_dir, "merged_tp1_state_split")
-os.makedirs(save_merged_pp_dir, exist_ok=True)
-
-model_path_pp = glob(os.path.join(model_dir, "shangxianv1_ep_hadamard_quantmodel_to_eval_pp*"))
-for p in model_path_pp:
-    model_path_ep = os.path.join(p, f"model_state.ep0{rank}.pdparams")
-    print(p, model_path_ep)
-
-    state_dicts = paddle.load(model_path_ep, return_numpy=True)
-
-    print("merge ep")
-    print("p: ", p)
-    for k, v in state_dicts.items():
-        v = paddle.to_tensor(v)
-        if "mlp.experts" in k:
-            k_list = k.split(".")
-            export_id = rank * ep_num + int(k_list[5])
-            k_list[5] = str(export_id)
-            k = ".".join(k_list)
-            print(f"key: {k}")
-            save_split_path = os.path.join(save_merged_pp_dir, k)
-            paddle.save(v, save_split_path)
-        elif rank == 0:
-            save_split_path = os.path.join(save_merged_pp_dir, k)
-            paddle.save(paddle.to_tensor(v), save_split_path)
-    print(f"merge {p} end")
-print("merge end")
--- a/scripts/convert_ep_state_from_ep8.sh
+++ b/scripts/convert_ep_state_from_ep8.sh
@@ -1,6 +0,0 @@
-
-export devices=0,1,2,3,4,5,6,7
-
-
-python -m paddle.distributed.launch --gpus ${devices} convert_ep_state_from_ep8.py --model_dir /path/to/model_dir
-
--- a/scripts/convert_ep_state_from_tp8.py
+++ b/scripts/convert_ep_state_from_tp8.py
@@ -1,86 +0,0 @@
-"""
-# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-
-import paddle
-import paddle.distributed as dist
-import pdb
-from glob import glob
-import os
-import numpy as np
-
-import argparse
-
-parser = argparse.ArgumentParser()
-parser.add_argument("--model_dir", type=str, required=True)
-args = parser.parse_args()
-
-rank = dist.get_rank()
-print("rank: ", rank)
-# merge tpn -> tp1
-model_dir = args.model_dir
-model_path_pp = glob(os.path.join(model_dir, f"pp{rank}"))
-model_path_pp_tp = []
-for p in model_path_pp:
-    model_path_tp = glob(os.path.join(p, "model_state*"))
-    model_path_tp = sorted(model_path_tp)
-    save_merged_pp_path = os.path.join(p, "merged_tp1_state.pdparams")
-    save_merged_pp_dir = os.path.join(model_dir, "merged_tp1_state_split")
-    os.makedirs(save_merged_pp_dir, exist_ok=True)
-    print(p, model_path_tp)
-
-    state_dicts = [paddle.load(path, return_numpy=True) for path in model_path_tp]
-    state = state_dicts[0]
-
-    print("merge tp")
-    print("p: ", p)
-    for k, v in state.items():
-        save_split_path = os.path.join(save_merged_pp_dir, k)
-        state_now = []
-        for i in range(len(state_dicts)):
-            state_now.append(state_dicts[i][k])
-        print("k: ", k, ", v.shape: ", v.shape)
-        if "qkv_proj" in k:
-            """not need prmt"""
-            # qkv not prmt
-            ori_q = [s[:, :1024] for s in state_now]
-            ori_k = [s[:, 1024:1152] for s in state_now]
-            ori_v = [s[:, 1152:] for s in state_now]
-            new_q = np.concatenate(ori_q, axis=1)
-            new_k = np.concatenate(ori_k, axis=1)
-            new_v = np.concatenate(ori_v, axis=1)
-            print(new_q.shape)
-            print(new_k.shape)
-            print(new_v.shape)
-            new_w = np.concatenate([new_q, new_k, new_v], axis=1)
-            # new_w = np.concatenate(state_now, axis=1)
-        elif "o_proj" in k or "down_proj" in k:
-            new_w = np.concatenate(state_now, axis=0)
-        elif "embed_tokens" in k:
-            new_w = np.concatenate(state_now, axis=0)
-        elif "up_gate_proj" in k:
-            dim = state_now[0].shape[1]
-            half_ffn1_1 = [s[:, :(dim // 2)] for s in state_now]
-            half_ffn1_2 = [s[:, (dim // 2):] for s in state_now]
-            new_ffn1_1 = np.concatenate(half_ffn1_1, axis=1)
-            new_ffn1_2 = np.concatenate(half_ffn1_2, axis=1)
-            new_w = np.concatenate([new_ffn1_1, new_ffn1_2], axis=1)
-        elif "lm_head" in k or "mtp_linear_proj" in k:
-            new_w = np.concatenate(state_now, axis=1)
-        else:
-            new_w = v
-        print("merged_shape: ", new_w.shape)
-        paddle.save(paddle.to_tensor(new_w), save_split_path)
-    print("merge end")
--- a/scripts/convert_ep_state_from_tp8.sh
+++ b/scripts/convert_ep_state_from_tp8.sh
@@ -1,3 +0,0 @@
-export devices=0,1,2,3,4,5,6,7
-
-python -m paddle.distributed.launch --gpus ${devices} convert_ep_state_from_tp8.py --model_dir /path/to/model_dir
--- a/scripts/convert_ep_to_safetensor.py
+++ b/scripts/convert_ep_to_safetensor.py
@@ -1,252 +0,0 @@
-"""
-# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-
-import paddle
-import os
-from paddlenlp.trainer import strtobool
-from efficientllm.models.utils import load_checkpoint
-from efficientllm.inference_args import InferenceArgs
-from paddlenlp.utils.log import logger
-from efficientllm.models.configuration import ErnieBotConfig
-from efficientllm.models.tokenizer import ErnieBotTokenizer
-from safetensors.numpy import save_file as safe_save_file
-from paddlenlp.utils.env import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME
-import shutil
-import argparse
-import importlib
-import json
-from paddlenlp.transformers.model_utils import shard_checkpoint
-
-MODEL_LIB_NAMES = [
-    "efficientllm.models.modeling_ernie_bot",
-]
-
-
-def parse_arguments():
-    """
-    parse_arguments
-    """
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--model_name_or_path",
-        default=None,
-        required=True,
-        help="The directory of model.",
-    )
-    parser.add_argument(
-        "--output_dir",
-        default="merged_output",
-        required=True,
-        help="The directory of merged model output.",
-    )
-    parser.add_argument(
-        "--safe_serialization",
-        type=strtobool,
-        default="True",
-        help="Whether merge the model into safetensors format.",
-    )
-    parser.add_argument(
-        "--predict_model_type",
-        type=str,
-        default="",
-        help="Quantization type for the model.",
-    )
-
-    parser.add_argument(
-        "--draft_type",
-        type=str,
-        default=None,
-        choices=["autoregressive", "inference_with_reference", "hydra", "mtp"],
-        help="Quantization type for the model.",
-    )
-
-    parser.add_argument(
-        "--moe_quant_type",
-        default="default",
-        type=str,
-        choices=["weight_only_int4", "weight_only_int8", "w4a8", "fp8", "default"],
-        help="quant type for moe part",
-    )
-
-    parser.add_argument(
-        "--use_ep",
-        type=strtobool,
-        default="True",
-        help="Whether merge the model into safetensors format.",
-    )
-    parser.add_argument("--dtype", type=str, default="bfloat16")
-    return parser.parse_args()
-
-
-def get_model_cls(config):
-    """
-    Get model class from model configuration.
-    """
-    init_class = "ErnieBotFusedModel"
-    for lib_name in MODEL_LIB_NAMES:
-        eb_lib = importlib.import_module(lib_name)
-        if hasattr(eb_lib, init_class):
-            cls = getattr(eb_lib, init_class)
-            return cls
-
-    raise RuntimeError(f"Cannot find model architecture({init_class}) from eb_lib")
-
-
-def save_safetensors(state_dict, args):
-    """
-    save_safetensors
-    """
-    logger.info("Move to numpy.")
-    for k in list(state_dict.keys()):
-        if isinstance(state_dict[k], paddle.Tensor):
-            state_dict[k] = state_dict.pop(k).cpu().numpy()
-
-    logger.info("Save safetensors files.")
-    shards, index = shard_checkpoint(
-        state_dict,
-        max_shard_size="5GB",
-        weights_name=SAFE_WEIGHTS_NAME,
-        shard_format="naive",
-    )
-    for shard_file, shard in shards.items():
-        save_file = os.path.join(args.output_dir, shard_file)
-        logger.info(f"Saving {save_file}")
-        safe_save_file(shard, save_file, metadata={"format": "np"})
-
-    save_index_file = os.path.join(args.output_dir, SAFE_WEIGHTS_INDEX_NAME)
-    with open(save_index_file, "w", encoding="utf-8") as f:
-        content = json.dumps(index, indent=2) + "\n"
-        f.write(content)
-
-
-def quanted_tensor(cls, state_dict, config):
-    """
-    quanted_tensor
-    """
-    name_action_mappings = cls._get_tensor_quantization_mappings(config)
-    state_keys_map = cls._resolve_prefix_keys(
-        name_action_mappings.keys(), state_dict.keys()
-    )
-    for k, v in state_keys_map.items():
-        name_action_mappings[v] = name_action_mappings.pop(k)
-    state_dict_to_save = {}
-    from efficientllm.layers.utils import get_tensor
-    from tqdm import tqdm
-    for key in tqdm(state_dict.keys(), desc="process quantized weights  "):
-        tensor_path = state_dict[key]
-        if key in name_action_mappings:
-            ret = state_dict[key]
-            action = name_action_mappings.pop(key)
-            quanted_weight_tensor, weight_scale_tensor = action(get_tensor(ret))
-            if quanted_weight_tensor._is_initialized():
-                state_dict_to_save[key + ".quant_weight"] = quanted_weight_tensor.cpu()
-            if weight_scale_tensor._is_initialized():
-                state_dict_to_save[key + ".quant_scale"] = weight_scale_tensor.cpu()
-            else:
-                state_dict_to_save[key] = quanted_weight_tensor.cpu()
-        else:
-            state_dict_to_save[key] = get_tensor(tensor_path).cpu()
-
-    if len(name_action_mappings) > 0:
-        for x in name_action_mappings.keys():
-            logger.debug(
-                f"key <{x}> need to merge tensor parallel but we can't find in model state."
-            )
-    return state_dict_to_save
-
-
-def get_quant_type(args):
-    """
-    get_quant_type
-    """
-    quant_type = args.predict_model_type.lower()
-    if quant_type == "default":
-        quant_type = ""
-    moe_quant_type = args.moe_quant_type.lower()
-    if moe_quant_type == "default":
-        moe_quant_type = ""
-    paddle.set_default_dtype(args.dtype)
-    offline_args = InferenceArgs(
-        quant_type=quant_type,
-        num_layers=1,
-        num_attention_heads=1,
-        num_key_value_heads=1,
-        hidden_size=1,
-        ffn_hidden_size=1,
-        mp_rank=1,
-        mp_size=1,
-    )
-    weight_dtype, act_dtype, cachekv_dtype = (
-        offline_args.weight_dtype,
-        offline_args.act_dtype,
-        offline_args.cachekv_dtype,
-    )
-    return weight_dtype, act_dtype, cachekv_dtype, quant_type, moe_quant_type
-
-
-def main():
-    """
-    main
-    """
-    args = parse_arguments()
-    tokenizer = ErnieBotTokenizer.from_pretrained(args.model_name_or_path)
-    config = ErnieBotConfig.from_pretrained(args.model_name_or_path)
-    (
-        config.weight_dtype,
-        config.act_dtype,
-        config.cachekv_dtype,
-        config.quant_type,
-        config.moe_quant_type,
-    ) = get_quant_type(args)
-    config.is_mtp = args.draft_type in ["eagle", "mtp"]
-    config.use_ep = args.use_ep
-    cls = get_model_cls(config)
-    # load
-    state_dict = load_checkpoint(
-        args.model_name_or_path, cls, config, return_numpy=True
-    )
-    import time
-
-    start = time.perf_counter()
-    state_dict_to_save = quanted_tensor(cls=cls, state_dict=state_dict, config=config)
-    end = time.perf_counter()
-    logger.info("Finish Quantize.")
-    logger.info(f"load和量化耗时: {end - start:.6f} 秒")
-
-    logger.info("Begin to save model")
-    os.makedirs(args.output_dir, exist_ok=True)
-    start = time.perf_counter()
-    if not args.safe_serialization:
-        paddle.save(
-            state_dict_to_save,
-            os.path.join(args.output_dir, "model_state.pdparams"),
-        )
-    else:
-        save_safetensors(state_dict_to_save, args)
-
-    config.save_pretrained(args.output_dir)
-    tokenizer.save_pretrained(args.output_dir)
-    if config.moe_quant_type == "w4a8":
-        # cp act_scales.json
-        shutil.copy(args.model_name_or_path + '/act_scales.json', args.output_dir)
-        shutil.copy(args.model_name_or_path + '/weight_scales.json', args.output_dir)
-    end = time.perf_counter()
-    logger.info(f"save耗时: {end - start:.6f} 秒")
-    logger.info("Finish.")
-
-
-if __name__ == "__main__":
-    main()
--- a/scripts/run_prediction_ep_decoder_multi_node.sh
+++ b/scripts/run_prediction_ep_decoder_multi_node.sh
@@ -1,22 +0,0 @@
-# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# export IP_LIST='10.95.244.83,10.95.244.82'
-export IP_LIST='10.95.244.83,10.95.244.82,10.95.246.141,10.95.246.145'
-# export IP_LIST='10.95.244.83,10.95.244.82,10.95.246.141,10.95.246.145,10.95.246.162,10.95.247.31,10.95.247.39,10.95.246.158'
-
-mpirun \
--host $IP_LIST  \
-bash run_prediction_ep_decoder.sh ${1} ${2} ${BATCH_SIZE:-1} ${USE_MICRO_BATCH:-"False"} $IP_LIST
--- a/scripts/run_prediction_ep_decoder_multi_node_perf.sh
+++ b/scripts/run_prediction_ep_decoder_multi_node_perf.sh
@@ -1,28 +0,0 @@
-# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# export IP_LIST='10.95.244.83,10.95.244.82'
-export IP_LIST='10.95.244.83,10.95.244.82,10.95.246.141,10.95.246.145'
-# export IP_LIST='10.95.244.83,10.95.244.82,10.95.246.141,10.95.246.145,10.95.246.162,10.95.247.31,10.95.247.39,10.95.246.158'
-
-export EP_DECODER_PERF_TEST=True
-export USE_CACHE_KV_INT8=True
-export MAX_SEQ_LEN=5000
-export MAX_DEC_LEN=64
-
-mpirun \
-x EP_DECODER_PERF_TEST -x USE_CACHE_KV_INT8 -x MAX_SEQ_LEN -x MAX_DEC_LEN \
--host $IP_LIST  \
-bash run_prediction_ep_decoder.sh ${1} ${2} ${BATCH_SIZE:-92} ${USE_MICRO_BATCH:-"False"} $IP_LIST
--- a/scripts/run_prediction_ep_decoder_single_node_perf.sh
+++ b/scripts/run_prediction_ep_decoder_single_node_perf.sh
@@ -1,22 +0,0 @@
-# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-
-export EP_DECODER_PERF_TEST=True
-export USE_CACHE_KV_INT8=True
-export MAX_SEQ_LEN=5000
-export MAX_DEC_LEN=64
-
-bash run_prediction_ep_decoder.sh ${1} 1 ${BATCH_SIZE:-52} ${USE_MICRO_BATCH:-"False"}
--- a/scripts/run_prediction_ep_prefill_perf.sh
+++ b/scripts/run_prediction_ep_prefill_perf.sh
@@ -1,19 +0,0 @@
-# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-export EP_PREFILL_PERF_TEST=True
-export MAX_DEC_LEN=1
-export CKPT_PATH=${1:-$CKPT_PATH}
-
-bash run_prediction_ep_prefill.sh ${CKPT_PATH}