mirror of
				https://github.com/PaddlePaddle/FastDeploy.git
				synced 2025-10-31 03:46:40 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			228 lines
		
	
	
		
			8.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			228 lines
		
	
	
		
			8.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """
 | ||
| # Copyright (c) 2025  PaddlePaddle Authors. All Rights Reserved.
 | ||
| #
 | ||
| # Licensed under the Apache License, Version 2.0 (the "License"
 | ||
| # you may not use this file except in compliance with the License.
 | ||
| # You may obtain a copy of the License at
 | ||
| #
 | ||
| #     http://www.apache.org/licenses/LICENSE-2.0
 | ||
| #
 | ||
| # Unless required by applicable law or agreed to in writing, software
 | ||
| # distributed under the License is distributed on an "AS IS" BASIS,
 | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | ||
| # See the License for the specific language governing permissions and
 | ||
| # limitations under the License.
 | ||
| """
 | ||
| 
 | ||
| 
 | ||
| import requests
 | ||
| import os
 | ||
| from tqdm import tqdm
 | ||
| import argparse
 | ||
| import hashlib
 | ||
| import re
 | ||
| 
 | ||
| 
 | ||
| def parse_arguments():
 | ||
|     """
 | ||
|     <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>в<EFBFBD><D0B2><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><D2BB>argparse.Namespace<63><65><EFBFBD><EFBFBD>
 | ||
|     
 | ||
|     Args:
 | ||
|         None
 | ||
|     
 | ||
|     Returns:
 | ||
|         argparse.Namespace (parser.parse_args()): <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>в<EFBFBD><D0B2><EFBFBD><EFBFBD><EFBFBD>Namespace<63><65><EFBFBD><EFBFBD>
 | ||
|             - model_name (str, default='deepseek-ai/DeepSeek-R1/weight_only_int4'): ģ<><C4A3><EFBFBD><EFBFBD><EFBFBD>ơ<EFBFBD>
 | ||
|             - dir (str, default='downloads'): <20><><EFBFBD><EFBFBD>Ŀ¼<C4BF><C2BC>
 | ||
|             - nnodes (int, default=1): <20>ڵ<EFBFBD><DAB5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
 | ||
|             - mode (str, default="master"): ģʽ<C4A3><CABD>ֻ֧<D6BB><D6A7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڵ<EFBFBD>ģ<EFBFBD><C4A3><EFBFBD>У<EFBFBD><D0A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ģʽ<C4A3><CABD>master<65><72><EFBFBD><EFBFBD>slave<76><65>
 | ||
|             - speculate_model_path (str, default=None): <20><><EFBFBD><EFBFBD>ģ<EFBFBD><C4A3>·<EFBFBD><C2B7><EFBFBD><EFBFBD>
 | ||
|     """
 | ||
|     parser = argparse.ArgumentParser(description="download models")
 | ||
|     parser.add_argument('-m', '--model_name', default='deepseek-ai/DeepSeek-R1/weight_only_int4',
 | ||
|                        help="model_name")
 | ||
|     parser.add_argument('-d', '--dir', default='downloads',
 | ||
|                        help="save dir")
 | ||
|     parser.add_argument('-n', '--nnodes', type=int, default=1,
 | ||
|                        help="the number of node")
 | ||
|     parser.add_argument('-M', '--mode', default="master", choices=["master", "slave"],
 | ||
|                        help="only support in 2 nodes model. There are two modes, master or slave.")
 | ||
|     parser.add_argument('-s', '--speculate_model_path', default=None,
 | ||
|                        help="speculate model path")
 | ||
|     return parser.parse_args()
 | ||
| 
 | ||
| 
 | ||
| def calculate_md5(file_path, chunk_size=8192):
 | ||
|     """
 | ||
|     <20><><EFBFBD><EFBFBD><EFBFBD>ļ<EFBFBD><C4BC><EFBFBD>MD5ֵ<35><D6B5>
 | ||
|     
 | ||
|     Args:
 | ||
|         file_path (str): <20>ļ<EFBFBD>·<EFBFBD><C2B7><EFBFBD><EFBFBD>
 | ||
|         chunk_size (int, optional): ÿ<>ζ<EFBFBD>ȡ<EFBFBD><C8A1><EFBFBD>ֽ<EFBFBD><D6BD><EFBFBD><EFBFBD><EFBFBD>Ĭ<EFBFBD><C4AC>Ϊ8192<39><32>
 | ||
|     
 | ||
|     Returns:
 | ||
|         str: <20><><EFBFBD><EFBFBD><EFBFBD>ļ<EFBFBD><C4BC><EFBFBD>MD5ֵ<35><D6B5><EFBFBD><EFBFBD>ʽΪʮ<CEAA><CAAE><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD>
 | ||
|     """
 | ||
|     hasher = hashlib.md5()
 | ||
|     with open(file_path, 'rb') as f:
 | ||
|         for chunk in iter(lambda: f.read(chunk_size), b''):
 | ||
|             hasher.update(chunk)
 | ||
|     return hasher.hexdigest()
 | ||
| 
 | ||
| 
 | ||
| def download_file(url, save_path, md5sum):
 | ||
|     """download file"""
 | ||
|     md5_check = int(os.getenv("MD5_CHECK", "0")) == 1
 | ||
|     try:
 | ||
|         with requests.get(url, stream=True) as response:
 | ||
|             response.raise_for_status()
 | ||
|             if os.path.exists(save_path):
 | ||
|                 if not md5_check:
 | ||
|                     print(f"{save_path} already exists and md5 check is off, skip this step")
 | ||
|                     return save_path
 | ||
|                 current_md5sum = calculate_md5(save_path)
 | ||
|                 if md5sum != current_md5sum:
 | ||
|                     os.remove(save_path)
 | ||
|                     print("not complete file! start to download again")
 | ||
|                 else:
 | ||
|                     print(f"{save_path} already exists and md5sum matches")
 | ||
|                     return save_path
 | ||
|             os.makedirs(os.path.dirname(save_path), exist_ok=True)
 | ||
| 
 | ||
|             file_name = save_path.split('/')[-1]
 | ||
|             total_size = int(response.headers.get('content-length', 0))
 | ||
|             progress_bar = tqdm(
 | ||
|                 total=total_size,
 | ||
|                 unit='iB',
 | ||
|                 unit_scale=True,
 | ||
|                 desc=f"download {file_name}"
 | ||
|             )
 | ||
| 
 | ||
|             with open(save_path, 'wb') as f:
 | ||
|                 for chunk in response.iter_content(chunk_size=1024):
 | ||
|                     if chunk:
 | ||
|                         f.write(chunk)
 | ||
|                         progress_bar.update(len(chunk))
 | ||
| 
 | ||
|             progress_bar.close()
 | ||
|             if total_size != 0 and os.path.getsize(save_path) != total_size:
 | ||
|                 raise RuntimeError("not complete")
 | ||
| 
 | ||
|             return save_path
 | ||
|     except Exception as e:
 | ||
|         if save_path and os.path.exists(save_path):
 | ||
|             os.remove(save_path)
 | ||
|         return None
 | ||
| 
 | ||
| 
 | ||
| def download_from_txt(base_url, save_dir, model_name=None):
 | ||
|     """
 | ||
|     <20><><EFBFBD>ı<EFBFBD><C4B1>ļ<EFBFBD><C4BC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ļ<EFBFBD><C4BC><EFBFBD>
 | ||
|     
 | ||
|     Args:
 | ||
|         base_url (str): <20><><EFBFBD><EFBFBD>URL<52><4C><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ļ<EFBFBD><C4BC>б<EFBFBD><D0B1><EFBFBD>·<EFBFBD><C2B7><EFBFBD><EFBFBD>
 | ||
|         save_dir (str): <20><><EFBFBD><EFBFBD>Ŀ¼<C4BF><C2BC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ص<EFBFBD><D8B5><EFBFBD>Ŀ¼<C4BF>¡<EFBFBD><C2A1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڣ<EFBFBD><DAA3><EFBFBD><F2B4B4BD><EFBFBD>
 | ||
|         model_name (str, optional): ģ<><C4A3><EFBFBD><EFBFBD><EFBFBD>ƣ<EFBFBD>Ĭ<EFBFBD><C4AC>ΪNone<6E><65><EFBFBD><EFBFBD>ѡ<EFBFBD><D1A1><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ع<EFBFBD><D8B9><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʾģ<CABE><C4A3><EFBFBD><EFBFBD><EFBFBD>ơ<EFBFBD>
 | ||
|     
 | ||
|     Returns:
 | ||
|         None, <20><EFBFBD><DEB7><EFBFBD>ֵ<EFBFBD><D6B5>
 | ||
|     
 | ||
|     Raises:
 | ||
|         Exception: <20><><EFBFBD><EFBFBD>ʧ<EFBFBD><CAA7>ʱ<EFBFBD><CAB1><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD>쳣<EFBFBD><ECB3A3><EFBFBD>ṩ<EFBFBD><E1B9A9><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϣ<EFBFBD><CFA2>
 | ||
|     """
 | ||
|     txt_url = base_url + "/file_list.txt"
 | ||
|     print(f"{txt_url}")
 | ||
|     try:
 | ||
|         response = requests.get(txt_url)
 | ||
|         response.raise_for_status()
 | ||
|         files_name = response.text.splitlines()
 | ||
|         files_name  = [file.strip() for file in files_name if file.strip()]
 | ||
| 
 | ||
|         md5sum = [file_name.rsplit(':', 1)[-1] for file_name in files_name]
 | ||
|         file_name = [file_name.rsplit(':', 1)[0] for file_name in files_name]
 | ||
| 
 | ||
|         if not files_name:
 | ||
|             print("No valid files found.")
 | ||
|             return
 | ||
| 
 | ||
|         print(f"Found {len(files_name)} files")
 | ||
| 
 | ||
|         for i in range(len(file_name)):
 | ||
|             cur_url = base_url + f"/{file_name[i]}"
 | ||
|             path = download_file(cur_url, os.path.join(save_dir, file_name[i]), md5sum[i])
 | ||
|             if path:
 | ||
|                 print(f"[✓] Success: {path}")
 | ||
|             else:
 | ||
|                 print(f"[×] Failed: {cur_url}")
 | ||
|     except requests.exceptions.RequestException as e:
 | ||
|         raise Exception(f"Failed to download file list from {txt_url}: {str(e)}")
 | ||
| 
 | ||
| 
 | ||
| def main():
 | ||
|     """
 | ||
|     <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ؾ<EFBFBD>̬ģ<CCAC>͡<EFBFBD>
 | ||
|     
 | ||
|     Args:
 | ||
|         <20><EFBFBD><DEB2><EFBFBD><EFBFBD><EFBFBD>
 | ||
|     
 | ||
|     Returns:
 | ||
|         bool: <20><><EFBFBD><EFBFBD>False<73><65><EFBFBD><EFBFBD>ʾ<EFBFBD>ú<EFBFBD><C3BA><EFBFBD>û<EFBFBD>з<EFBFBD><D0B7><EFBFBD>ֵ<EFBFBD><D6B5>
 | ||
|     
 | ||
|     Raises:
 | ||
|         ValueError (BaseException): <20><><EFBFBD>ģ<EFBFBD><C4A3><EFBFBD><EFBFBD><EFBFBD>Ʋ<EFBFBD><C6B2><EFBFBD>֧<EFBFBD><D6A7><EFBFBD>б<EFBFBD><D0B1>У<EFBFBD><D0A3><EFBFBD><EFBFBD><EFBFBD>׳<EFBFBD>ValueError<6F>쳣<EFBFBD><ECB3A3>
 | ||
|     """
 | ||
|     args = parse_arguments()
 | ||
|     print(f"Save Path: {os.path.abspath(args.dir)}")
 | ||
| 
 | ||
|     # make dir
 | ||
|     path = os.path.join(args.dir, args.model_name)
 | ||
|     os.makedirs(path, exist_ok=True)
 | ||
| 
 | ||
|     model_name = args.model_name
 | ||
|     env = os.environ
 | ||
|     # Define supported model patterns
 | ||
|     supported_patterns = [
 | ||
|         r".*Qwen.*",
 | ||
|         r".+Llama.+",
 | ||
|         r".+Mixtral.+",
 | ||
|         r".+DeepSeek.+",
 | ||
|     ]
 | ||
| 
 | ||
|     # Check if model_name matches any supported pattern
 | ||
|     if not any(re.match(pattern, model_name) for pattern in supported_patterns):
 | ||
|         raise ValueError(
 | ||
|             f"{model_name} is not in the supported list. Currently supported models: Qwen, Llama, Mixtral, DeepSeek.",
 | ||
|             f"Please check the model name from this document ",
 | ||
|             "https://github.com/PaddlePaddle/PaddleNLP/blob/develop/llm/server/docs/static_models.md"
 | ||
|         )
 | ||
|     print(f"Start downloading model: {model_name}")
 | ||
|     tag = env.get("tag")
 | ||
|     base_url = f"https://paddlenlp.bj.bcebos.com/models/static/{tag}/{model_name}"
 | ||
|     temp_file = None
 | ||
|     if args.nnodes == 1:
 | ||
|         temp_file = "model"
 | ||
|     elif args.nnodes > 1:
 | ||
|         if args.mode == "master":
 | ||
|             temp_file = "node1"
 | ||
|         elif args.mode == "slave":
 | ||
|             temp_file = "node2"
 | ||
|         else:
 | ||
|             raise ValueError(f"Invalid mode: {args.mode}. Mode must be 'master' or 'slave'.")
 | ||
|     else:
 | ||
|         raise ValueError(f"Invalid nnodes: {args.nnodes}. nnodes must be >= 1.")
 | ||
| 
 | ||
|     if temp_file:
 | ||
|         model_url = base_url + f"/{temp_file}"
 | ||
|         download_from_txt(model_url, path)
 | ||
|     else:
 | ||
|         print(f"Don't support download the {model_name} in mode {args.mode}")
 | ||
| 
 | ||
|     if args.speculate_model_path:
 | ||
|         os.makedirs(args.speculate_model_path, exist_ok=True)
 | ||
|         print(f"Start downloading mtp model: {model_name}")
 | ||
|         model_url = base_url + "/mtp"
 | ||
|         download_from_txt(model_url, args.speculate_model_path)
 | ||
| 
 | ||
| if __name__ == "__main__":
 | ||
|     main()
 | 
