[XPU]Fixed the issue of performance degradation caused by enabling ENABLE_V1_KVCACHE_SCHEDULER (#3900)

* fix bug * fix bug * update * udpate * update
2025-09-30 14:22:27 +08:00 · 2025-09-05 19:17:25 +08:00
parent 473cde779f
commit 77c1bd0813
4 changed files with 20 additions and 4 deletions
--- a/fastdeploy/engine/args_utils.py
+++ b/fastdeploy/engine/args_utils.py
@@ -19,6 +19,8 @@ from dataclasses import asdict, dataclass
 from dataclasses import fields as dataclass_fields
 from typing import Any, Dict, List, Optional

+import paddle
+
 from fastdeploy import envs
 from fastdeploy.config import (
    CacheConfig,
@@ -1006,7 +1008,10 @@ class EngineArgs:

        if self.max_num_batched_tokens is None:
            if int(envs.ENABLE_V1_KVCACHE_SCHEDULER):
-                self.max_num_batched_tokens = 8192  # if set to max_model_len, it's easy to be OOM
+                if paddle.is_compiled_with_xpu():
+                    self.max_num_batched_tokens = self.max_model_len
+                else:
+                    self.max_num_batched_tokens = 8192  # if set to max_model_len, it's easy to be OOM
            else:
                if self.enable_chunked_prefill:
                    self.max_num_batched_tokens = 2048