[Fearture] Support mm model close prefix cache (#4502)

* support mm prefix cache close

* add

* fix

* fix

* fix

---------

Co-authored-by: ltd0924 <luotingdan@baidu.com>
This commit is contained in:
ltd0924
2025-10-21 09:56:47 +08:00
committed by GitHub
parent 9558912475
commit 3cd9d3060a
3 changed files with 41 additions and 0 deletions

View File

@@ -20,6 +20,17 @@ from fastdeploy.utils import get_logger
logger = get_logger("prefix_cache_manager", "prefix_cache_manager.log")
DISABLE_PREFIX_CACHE_MM_MODEL: set[str] = {
"Ernie5ForCausalLM",
}
def is_mm_model_disable_prefix_cache(model_arch):
"""
check if the model architecture is in DISABLE_PREFIX_CACHE_MM_MODEL
"""
return model_arch in DISABLE_PREFIX_CACHE_MM_MODEL
class CacheStatus(Enum):
"""