mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-09-30 14:22:27 +08:00
[XPU]Fixed the issue of performance degradation caused by enabling ENABLE_V1_KVCACHE_SCHEDULER (#3900)
* fix bug * fix bug * update * udpate * update
This commit is contained in:
@@ -19,6 +19,8 @@ from dataclasses import asdict, dataclass
|
||||
from dataclasses import fields as dataclass_fields
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import paddle
|
||||
|
||||
from fastdeploy import envs
|
||||
from fastdeploy.config import (
|
||||
CacheConfig,
|
||||
@@ -1006,7 +1008,10 @@ class EngineArgs:
|
||||
|
||||
if self.max_num_batched_tokens is None:
|
||||
if int(envs.ENABLE_V1_KVCACHE_SCHEDULER):
|
||||
self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
|
||||
if paddle.is_compiled_with_xpu():
|
||||
self.max_num_batched_tokens = self.max_model_len
|
||||
else:
|
||||
self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
|
||||
else:
|
||||
if self.enable_chunked_prefill:
|
||||
self.max_num_batched_tokens = 2048
|
||||
|
Reference in New Issue
Block a user