"""
# Copyright (c) 2025  PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""


import requests
import os
from tqdm import tqdm
import argparse
import hashlib
import re


def parse_arguments():
    """
    ���������в���������һ��argparse.Namespace����
    
    Args:
        None
    
    Returns:
        argparse.Namespace (parser.parse_args()): ���������������в�����Namespace����
            - model_name (str, default='deepseek-ai/DeepSeek-R1/weight_only_int4'): ģ�����ơ�
            - dir (str, default='downloads'): ����Ŀ¼��
            - nnodes (int, default=1): �ڵ�������
            - mode (str, default="master"): ģʽ��ֻ֧�������ڵ�ģ���У�������ģʽ��master����slave��
            - speculate_model_path (str, default=None): ����ģ��·����
    """
    parser = argparse.ArgumentParser(description="download models")
    parser.add_argument('-m', '--model_name', default='deepseek-ai/DeepSeek-R1/weight_only_int4',
                       help="model_name")
    parser.add_argument('-d', '--dir', default='downloads',
                       help="save dir")
    parser.add_argument('-n', '--nnodes', type=int, default=1,
                       help="the number of node")
    parser.add_argument('-M', '--mode', default="master", choices=["master", "slave"],
                       help="only support in 2 nodes model. There are two modes, master or slave.")
    parser.add_argument('-s', '--speculate_model_path', default=None,
                       help="speculate model path")
    return parser.parse_args()


def calculate_md5(file_path, chunk_size=8192):
    """
    �����ļ���MD5ֵ��
    
    Args:
        file_path (str): �ļ�·����
        chunk_size (int, optional): ÿ�ζ�ȡ���ֽ�����Ĭ��Ϊ8192��
    
    Returns:
        str: �����ļ���MD5ֵ����ʽΪʮ�������ַ�����
    """
    hasher = hashlib.md5()
    with open(file_path, 'rb') as f:
        for chunk in iter(lambda: f.read(chunk_size), b''):
            hasher.update(chunk)
    return hasher.hexdigest()


def download_file(url, save_path, md5sum):
    """download file"""
    md5_check = int(os.getenv("MD5_CHECK", "0")) == 1
    try:
        with requests.get(url, stream=True) as response:
            response.raise_for_status()
            if os.path.exists(save_path):
                if not md5_check:
                    print(f"{save_path} already exists and md5 check is off, skip this step")
                    return save_path
                current_md5sum = calculate_md5(save_path)
                if md5sum != current_md5sum:
                    os.remove(save_path)
                    print("not complete file! start to download again")
                else:
                    print(f"{save_path} already exists and md5sum matches")
                    return save_path
            os.makedirs(os.path.dirname(save_path), exist_ok=True)

            file_name = save_path.split('/')[-1]
            total_size = int(response.headers.get('content-length', 0))
            progress_bar = tqdm(
                total=total_size,
                unit='iB',
                unit_scale=True,
                desc=f"download {file_name}"
            )

            with open(save_path, 'wb') as f:
                for chunk in response.iter_content(chunk_size=1024):
                    if chunk:
                        f.write(chunk)
                        progress_bar.update(len(chunk))

            progress_bar.close()
            if total_size != 0 and os.path.getsize(save_path) != total_size:
                raise RuntimeError("not complete")

            return save_path
    except Exception as e:
        if save_path and os.path.exists(save_path):
            os.remove(save_path)
        return None


def download_from_txt(base_url, save_dir, model_name=None):
    """
    ���ı��ļ��������ļ���
    
    Args:
        base_url (str): ����URL�������ļ��б���·����
        save_dir (str): ����Ŀ¼�������ص���Ŀ¼�¡���������ڣ��򴴽���
        model_name (str, optional): ģ�����ƣ�Ĭ��ΪNone����ѡ���������������ع�������ʾģ�����ơ�
    
    Returns:
        None, �޷���ֵ��
    
    Raises:
        Exception: ����ʧ��ʱ������һ���쳣���ṩ������Ϣ��
    """
    txt_url = base_url + "/file_list.txt"
    print(f"{txt_url}")
    try:
        response = requests.get(txt_url)
        response.raise_for_status()
        files_name = response.text.splitlines()
        files_name  = [file.strip() for file in files_name if file.strip()]

        md5sum = [file_name.rsplit(':', 1)[-1] for file_name in files_name]
        file_name = [file_name.rsplit(':', 1)[0] for file_name in files_name]

        if not files_name:
            print("No valid files found.")
            return

        print(f"Found {len(files_name)} files")

        for i in range(len(file_name)):
            cur_url = base_url + f"/{file_name[i]}"
            path = download_file(cur_url, os.path.join(save_dir, file_name[i]), md5sum[i])
            if path:
                print(f"[✓] Success: {path}")
            else:
                print(f"[×] Failed: {cur_url}")
    except requests.exceptions.RequestException as e:
        raise Exception(f"Failed to download file list from {txt_url}: {str(e)}")


def main():
    """
    ���������������ؾ�̬ģ�͡�
    
    Args:
        �޲�����
    
    Returns:
        bool: ����False����ʾ�ú���û�з���ֵ��
    
    Raises:
        ValueError (BaseException): ���ģ�����Ʋ���֧���б��У�����׳�ValueError�쳣��
    """
    args = parse_arguments()
    print(f"Save Path: {os.path.abspath(args.dir)}")

    # make dir
    path = os.path.join(args.dir, args.model_name)
    os.makedirs(path, exist_ok=True)

    model_name = args.model_name
    env = os.environ
    # Define supported model patterns
    supported_patterns = [
        r".*Qwen.*",
        r".+Llama.+",
        r".+Mixtral.+",
        r".+DeepSeek.+",
    ]

    # Check if model_name matches any supported pattern
    if not any(re.match(pattern, model_name) for pattern in supported_patterns):
        raise ValueError(
            f"{model_name} is not in the supported list. Currently supported models: Qwen, Llama, Mixtral, DeepSeek.",
            f"Please check the model name from this document ",
            "https://github.com/PaddlePaddle/PaddleNLP/blob/develop/llm/server/docs/static_models.md"
        )
    print(f"Start downloading model: {model_name}")
    tag = env.get("tag")
    base_url = f"https://paddlenlp.bj.bcebos.com/models/static/{tag}/{model_name}"
    temp_file = None
    if args.nnodes == 1:
        temp_file = "model"
    elif args.nnodes > 1:
        if args.mode == "master":
            temp_file = "node1"
        elif args.mode == "slave":
            temp_file = "node2"
        else:
            raise ValueError(f"Invalid mode: {args.mode}. Mode must be 'master' or 'slave'.")
    else:
        raise ValueError(f"Invalid nnodes: {args.nnodes}. nnodes must be >= 1.")

    if temp_file:
        model_url = base_url + f"/{temp_file}"
        download_from_txt(model_url, path)
    else:
        print(f"Don't support download the {model_name} in mode {args.mode}")

    if args.speculate_model_path:
        os.makedirs(args.speculate_model_path, exist_ok=True)
        print(f"Start downloading mtp model: {model_name}")
        model_url = base_url + "/mtp"
        download_from_txt(model_url, args.speculate_model_path)

if __name__ == "__main__":
    main()