Revert "[Optim] Remove limitation of number of kvcache blocks (#5612)" (#5702)

This reverts commit 9da89a374b.
2025-12-24 13:28:13 +08:00 · 2025-12-23 15:41:33 +08:00
parent 0bef9b684f
commit c1aa66df02
7 changed files with 12 additions and 20 deletions
--- a/docs/usage/environment_variables.md
+++ b/docs/usage/environment_variables.md
@@ -88,9 +88,5 @@ environment_variables: dict[str, Callable[[], Any]] = {

    # Count for cache_transfer_manager process error
    "FD_CACHE_PROC_ERROR_COUNT": lambda: int(os.getenv("FD_CACHE_PROC_ERROR_COUNT", "10")),
-   
-    # Max allocated KV cache blocks. Use this to limit how many KV cache blocks the engine is allowed to allocate.
-    # Set to -1 (default) for no limit, or a positive integer to cap the maximum number of blocks that can be allocated.
-    "FD_MAX_KVCACHE_BLOCKS": lambda: int(os.getenv("FD_MAX_KVCACHE_BLOCKS", "-1")),
 }
 ```
--- a/docs/zh/usage/environment_variables.md
+++ b/docs/zh/usage/environment_variables.md
@@ -88,7 +88,4 @@ environment_variables: dict[str, Callable[[], Any]] = {

    # cache_transfer_manager 进程残留时连续错误阈值
    "FD_CACHE_PROC_ERROR_COUNT": lambda: int(os.getenv("FD_CACHE_PROC_ERROR_COUNT", "10")),}
-
-    # KVCache Block块分配值的上限。此变量限制引擎分配的块数上限。当为默认值-1时表示不设限
-    "FD_MAX_KVCACHE_BLOCKS": lambda: int(os.getenv("FD_MAX_KVCACHE_BLOCKS", "-1")),
 ```