From b87384aa70cdbb1dbc20580d2f4a92714bb18ab1 Mon Sep 17 00:00:00 2001
From: ddchenhao66 <165133255+ddchenhao66@users.noreply.github.com>
Date: Fri, 31 Oct 2025 10:36:39 +0800
Subject: [PATCH] [XPU] xpu currently disable prefix cache for VL model (#4695)

Co-authored-by: ddchenhao66 <dhaochen163.com>
---
 fastdeploy/engine/args_utils.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fastdeploy/engine/args_utils.py b/fastdeploy/engine/args_utils.py
index 82a6ddbd2..30dbff6b7 100644
--- a/fastdeploy/engine/args_utils.py
+++ b/fastdeploy/engine/args_utils.py
@@ -1078,6 +1078,10 @@ class EngineArgs:
         all_dict = asdict(self)
         model_cfg = ModelConfig(all_dict)
 
+        # XPU currently disable prefix cache for VL model
+        if current_platform.is_xpu() and (self.enable_mm or model_cfg.enable_mm):
+            self.enable_prefix_caching = False
+
         if not model_cfg.is_unified_ckpt and hasattr(model_cfg, "tensor_parallel_size"):
             self.tensor_parallel_size = model_cfg.tensor_parallel_size