[XPU] xpu support mm prefix cache (#5356)

Co-authored-by: ddchenhao66 <dhaochen163.com>
2025-12-24 13:28:13 +08:00 · 2025-12-03 19:07:34 +08:00
parent a4bb3e9960
commit 4e8096bd0d
3 changed files with 184 additions and 51 deletions
--- a/fastdeploy/engine/args_utils.py
+++ b/fastdeploy/engine/args_utils.py
@@ -1233,10 +1233,6 @@ class EngineArgs:
        all_dict = asdict(self)
        model_cfg = ModelConfig(all_dict)

-        # XPU currently disable prefix cache for VL model
-        if current_platform.is_xpu() and (self.enable_mm or model_cfg.enable_mm):
-            self.enable_prefix_caching = False
-
        if not model_cfg.is_unified_ckpt and hasattr(model_cfg, "tensor_parallel_size"):
            self.tensor_parallel_size = model_cfg.tensor_parallel_size