mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[Iluvatar GPU] Optimize attention performance and fix moe load ckpt error (#3651)
This commit is contained in:
@@ -1186,9 +1186,7 @@ class CacheConfig:
|
||||
self.kv_cache_ratio = 1.0
|
||||
else:
|
||||
self.kv_cache_ratio = 0.75
|
||||
self.enc_dec_block_num = (
|
||||
0 if current_platform.is_iluvatar() or current_platform.is_maca() else envs.FD_ENC_DEC_BLOCK_NUM
|
||||
)
|
||||
self.enc_dec_block_num = 0 if current_platform.is_maca() else envs.FD_ENC_DEC_BLOCK_NUM
|
||||
self.prealloc_dec_block_slot_num_threshold = 12
|
||||
self.cache_dtype = "bfloat16"
|
||||
self.model_cfg = None
|
||||
|
Reference in New Issue
Block a user