FastDeploy/scripts/vit_model_split.py

"""
# Copyright (c) 2025  PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""

import argparse
import os

import paddle

parser = argparse.ArgumentParser()
parser.add_argument(
    "--model_path",
    default="./",
    type=str,
    required=True,
    help="The directory of model.",
)
parser.add_argument(
    "--output_path",
    default="./",
    type=str,
    help="The directory of splited model",
)
parser.add_argument("--model_degree", default=4, type=int, help="Input model mp degree.")
args = parser.parse_args()

hidden_size = 1280
kv_num_heads = 16
head_dim = 80

input_model_state_dict = paddle.load(os.path.join(args.model_path, "model_state.pdparams"))

for i in range(args.model_degree):
    static_dict = {}
    for k, v in input_model_state_dict.items():
        if "qkv.weight" in k:
            static_dict[k] = (
                input_model_state_dict[k]
                .reshape([hidden_size, 3, kv_num_heads, head_dim])
                .split(args.model_degree, axis=-2)[i]
                .reshape([hidden_size, -1])
            )
        elif "qkv.bias" in k:
            static_dict[k] = (
                input_model_state_dict[k]
                .reshape([3, kv_num_heads, head_dim])
                .split(args.model_degree, axis=-2)[i]
                .reshape([-1])
            )
        elif "attn.proj.weight" in k:
            static_dict[k] = input_model_state_dict[k].split(args.model_degree, axis=-2)[i]
        elif "fc1.weight" in k:
            static_dict[k] = input_model_state_dict[k].split(args.model_degree, axis=-1)[i]
        elif "fc1.bias" in k:
            static_dict[k] = input_model_state_dict[k].split(args.model_degree, axis=-1)[i]
        elif "fc2.weight" in k:
            static_dict[k] = input_model_state_dict[k].split(args.model_degree, axis=-2)[i]
        else:
            static_dict[k] = v

    paddle.save(
        static_dict,
        os.path.join(args.model_path, f"model_state_tp0{i}.pdparams"),
    )