mirror of
				https://github.com/PaddlePaddle/FastDeploy.git
				synced 2025-10-31 11:56:44 +08:00 
			
		
		
		
	 3754a9906d
			
		
	
	3754a9906d
	
	
	
		
			
			* 支持稀疏attn * fix bug * code style * fix moba attn get kv shape * 修复a100编译 * codestyle * code style * code style * code style * fix conflict * 增加单侧 * code style * 增加eblite 加载时间 * fix bug * for ci * for ci * for ci * for ci * 支持mlp block size 128 * 增加小算子单测 * fix 单测 mlp * 将环境变量加入到config里面 * fix rollout config * 修复显存 * add test server * add test server * fix mlp 最后一层使用full attn
		
			
				
	
	
		
			75 lines
		
	
	
		
			2.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			75 lines
		
	
	
		
			2.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """
 | |
| # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 | |
| #
 | |
| # Licensed under the Apache License, Version 2.0 (the "License");
 | |
| # you may not use this file except in compliance with the License.
 | |
| # You may obtain a copy of the License at
 | |
| #
 | |
| #     http://www.apache.org/licenses/LICENSE-2.0
 | |
| #
 | |
| # Unless required by applicable law or agreed to in writing, software
 | |
| # distributed under the License is distributed on an "AS IS" BASIS,
 | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| # See the License for the specific language governing permissions and
 | |
| # limitations under the License.
 | |
| """
 | |
| 
 | |
| import traceback
 | |
| 
 | |
| import paddle
 | |
| 
 | |
| from fastdeploy.utils import console_logger as logger
 | |
| 
 | |
| from .base import Platform, _Backend
 | |
| 
 | |
| 
 | |
| class CUDAPlatform(Platform):
 | |
|     """
 | |
|     cuda platform class
 | |
|     """
 | |
| 
 | |
|     device_name = "gpu"
 | |
| 
 | |
|     @classmethod
 | |
|     def available(self):
 | |
|         """
 | |
|         Check whether CUDA is available.
 | |
|         """
 | |
|         try:
 | |
|             assert len(paddle.static.cuda_places()) > 0
 | |
|             return True
 | |
|         except Exception as e:
 | |
|             logger.warning(
 | |
|                 "You are using GPU version PaddlePaddle, but there is no GPU "
 | |
|                 "detected on your machine. Maybe CUDA devices is not set properly."
 | |
|                 f"\n Original Error is {e}, "
 | |
|                 f"{str(traceback.format_exc())}"
 | |
|             )
 | |
|             return False
 | |
| 
 | |
|     @classmethod
 | |
|     def get_attention_backend_cls(cls, selected_backend: _Backend):
 | |
|         """
 | |
|         get_attention_backend_cls
 | |
|         """
 | |
|         if selected_backend == _Backend.NATIVE_ATTN:
 | |
|             logger.info("Using NATIVE ATTN backend.")
 | |
|             return "fastdeploy.model_executor.layers.attention.PaddleNativeAttnBackend"
 | |
|         elif selected_backend == _Backend.APPEND_ATTN:
 | |
|             logger.info("Using APPEND ATTN backend.")
 | |
|             return "fastdeploy.model_executor.layers.attention.AppendAttentionBackend"
 | |
|         elif selected_backend == _Backend.MLA_ATTN:
 | |
|             logger.info("Using MLA ATTN backend.")
 | |
|             return "fastdeploy.model_executor.layers.attention.MLAAttentionBackend"
 | |
|         elif selected_backend == _Backend.FLASH_ATTN:
 | |
|             logger.info("Using FLASH ATTN backend.")
 | |
|             return "fastdeploy.model_executor.layers.attention.FlashAttentionBackend"
 | |
|         elif selected_backend == _Backend.MOBA_ATTN:
 | |
|             logger.info("Using MOBA ATTN backend.")
 | |
|             return "fastdeploy.model_executor.layers.attention.MobaAttentionBackend"
 | |
|         else:
 | |
|             raise ValueError(
 | |
|                 "Invalid attention backend you specified.\n"
 | |
|                 "Now only support [NATIVE_ATTN, MLA_ATTN, APPEND_ATTN] in cuda place."
 | |
|             )
 |