[XPU] Fixed the issue of performance degradation caused by enabling ENABLE_V1_KVCACHE_SCHEDULER (#3393)

* fix v1 schedule oom bug * fix v1 schedule oom bug
2025-10-05 16:48:03 +08:00 · 2025-08-14 17:41:40 +08:00
parent 28918702c2
commit 101605869c
4 changed files with 22 additions and 9 deletions
--- a/fastdeploy/engine/args_utils.py
+++ b/fastdeploy/engine/args_utils.py
@@ -15,10 +15,12 @@
 """

 import json
+import os
 from dataclasses import asdict, dataclass
 from dataclasses import fields as dataclass_fields
 from typing import Any, Dict, List, Optional
-import os
+
+import paddle

 from fastdeploy.config import (
    CacheConfig,
@@ -866,10 +868,13 @@ class EngineArgs:
            if self.enable_chunked_prefill:
                self.max_num_batched_tokens = 2048
            else:
-                if not int(os.getenv('ENABLE_V1_KVCACHE_SCHEDULER', '0')):
+                if not int(os.getenv("ENABLE_V1_KVCACHE_SCHEDULER", "0")):
                    self.max_num_batched_tokens = self.max_model_len
                else:
-                    self.max_num_batched_tokens = 8192
+                    if paddle.is_compiled_with_xpu():
+                        self.max_num_batched_tokens = self.max_model_len
+                    else:
+                        self.max_num_batched_tokens = 8192

        all_dict = asdict(self)
        all_dict["model_cfg"] = model_cfg