[XPU]Fixed the issue of performance degradation caused by enabling ENABLE_V1_KVCACHE_SCHEDULER (#3900)

* fix bug

* fix bug

* update

* udpate

* update
This commit is contained in:
yinwei
2025-09-05 19:17:25 +08:00
committed by GitHub
parent 473cde779f
commit 77c1bd0813
4 changed files with 20 additions and 4 deletions

View File

@@ -1236,7 +1236,10 @@ class FDConfig:
if self.max_num_batched_tokens is None:
if int(envs.ENABLE_V1_KVCACHE_SCHEDULER):
self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
if paddle.is_compiled_with_xpu():
self.max_num_batched_tokens = self.max_model_len
else:
self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
else:
if self.cache_config.enable_chunked_prefill:
self.max_num_batched_tokens = 2048