mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[XPU] xpu currently disable prefix cache for VL model (#4695)
Co-authored-by: ddchenhao66 <dhaochen163.com>
This commit is contained in:
@@ -1078,6 +1078,10 @@ class EngineArgs:
|
||||
all_dict = asdict(self)
|
||||
model_cfg = ModelConfig(all_dict)
|
||||
|
||||
# XPU currently disable prefix cache for VL model
|
||||
if current_platform.is_xpu() and (self.enable_mm or model_cfg.enable_mm):
|
||||
self.enable_prefix_caching = False
|
||||
|
||||
if not model_cfg.is_unified_ckpt and hasattr(model_cfg, "tensor_parallel_size"):
|
||||
self.tensor_parallel_size = model_cfg.tensor_parallel_size
|
||||
|
||||
|
||||
Reference in New Issue
Block a user