diff --git a/fastdeploy/engine/args_utils.py b/fastdeploy/engine/args_utils.py index 82a6ddbd2..30dbff6b7 100644 --- a/fastdeploy/engine/args_utils.py +++ b/fastdeploy/engine/args_utils.py @@ -1078,6 +1078,10 @@ class EngineArgs: all_dict = asdict(self) model_cfg = ModelConfig(all_dict) + # XPU currently disable prefix cache for VL model + if current_platform.is_xpu() and (self.enable_mm or model_cfg.enable_mm): + self.enable_prefix_caching = False + if not model_cfg.is_unified_ckpt and hasattr(model_cfg, "tensor_parallel_size"): self.tensor_parallel_size = model_cfg.tensor_parallel_size