mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-09-27 04:46:16 +08:00
[XPU]Fixed the issue of performance degradation caused by enabling ENABLE_V1_KVCACHE_SCHEDULER (#3900)
* fix bug * fix bug * update * udpate * update
This commit is contained in:
@@ -1236,7 +1236,10 @@ class FDConfig:
|
||||
|
||||
if self.max_num_batched_tokens is None:
|
||||
if int(envs.ENABLE_V1_KVCACHE_SCHEDULER):
|
||||
self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
|
||||
if paddle.is_compiled_with_xpu():
|
||||
self.max_num_batched_tokens = self.max_model_len
|
||||
else:
|
||||
self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
|
||||
else:
|
||||
if self.cache_config.enable_chunked_prefill:
|
||||
self.max_num_batched_tokens = 2048
|
||||
|
Reference in New Issue
Block a user