diff --git a/docs/usage/environment_variables.md b/docs/usage/environment_variables.md
index 17fe91aee..c4c319f83 100644
--- a/docs/usage/environment_variables.md
+++ b/docs/usage/environment_variables.md
@@ -88,9 +88,5 @@ environment_variables: dict[str, Callable[[], Any]] = {
 
     # Count for cache_transfer_manager process error
     "FD_CACHE_PROC_ERROR_COUNT": lambda: int(os.getenv("FD_CACHE_PROC_ERROR_COUNT", "10")),
-   
-    # Max allocated KV cache blocks. Use this to limit how many KV cache blocks the engine is allowed to allocate.
-    # Set to -1 (default) for no limit, or a positive integer to cap the maximum number of blocks that can be allocated.
-    "FD_MAX_KVCACHE_BLOCKS": lambda: int(os.getenv("FD_MAX_KVCACHE_BLOCKS", "-1")),
 }
 ```
diff --git a/docs/zh/usage/environment_variables.md b/docs/zh/usage/environment_variables.md
index ad3cdad62..b0a162a8a 100644
--- a/docs/zh/usage/environment_variables.md
+++ b/docs/zh/usage/environment_variables.md
@@ -88,7 +88,4 @@ environment_variables: dict[str, Callable[[], Any]] = {
 
     # cache_transfer_manager 进程残留时连续错误阈值
     "FD_CACHE_PROC_ERROR_COUNT": lambda: int(os.getenv("FD_CACHE_PROC_ERROR_COUNT", "10")),}
-
-    # KVCache Block块分配值的上限。此变量限制引擎分配的块数上限。当为默认值-1时表示不设限
-    "FD_MAX_KVCACHE_BLOCKS": lambda: int(os.getenv("FD_MAX_KVCACHE_BLOCKS", "-1")),
 ```
diff --git a/fastdeploy/envs.py b/fastdeploy/envs.py
index c74f46205..6d294a0c8 100644
--- a/fastdeploy/envs.py
+++ b/fastdeploy/envs.py
@@ -119,9 +119,6 @@ environment_variables: dict[str, Callable[[], Any]] = {
     "FD_EP_BATCHED_TOKEN_TIMEOUT": lambda: float(os.getenv("FD_EP_BATCHED_TOKEN_TIMEOUT", "0.1")),
     # Max pre-fetch requests number in PD
     "FD_EP_MAX_PREFETCH_TASK_NUM": lambda: int(os.getenv("FD_EP_MAX_PREFETCH_TASK_NUM", "8")),
-    # Max allocated KV cache blocks. Use this to limit how many KV cache blocks the engine is allowed to allocate.
-    # Set to -1 (default) for no limit, or a positive integer to cap the maximum number of blocks that can be allocated.
-    "FD_MAX_KVCACHE_BLOCKS": lambda: int(os.getenv("FD_MAX_KVCACHE_BLOCKS", "-1")),
     # Enable or disable model caching.
     # When enabled, the quantized model is stored as a cache for future inference to improve loading efficiency.
     "FD_ENABLE_MODEL_LOAD_CACHE": lambda: bool(int(os.getenv("FD_ENABLE_MODEL_LOAD_CACHE", "0"))),
diff --git a/fastdeploy/worker/iluvatar_worker.py b/fastdeploy/worker/iluvatar_worker.py
index 6ac65c4b7..625aca86d 100644
--- a/fastdeploy/worker/iluvatar_worker.py
+++ b/fastdeploy/worker/iluvatar_worker.py
@@ -21,7 +21,6 @@ import time
 import numpy as np
 import paddle
 
-from fastdeploy import envs
 from fastdeploy.config import FDConfig
 from fastdeploy.inter_communicator import IPCSignal
 from fastdeploy.utils import get_logger, set_random_seed
@@ -127,10 +126,11 @@ class IluvatarPaddleDisWorkerProc(PaddleDisWorkerProc):
             # 2. Calculate the appropriate number of blocks
             model_block_memory_used = self.worker.cal_theortical_kvcache()
             num_blocks_local = int(available_kv_cache_memory // model_block_memory_used)
-
-            if envs.FD_MAX_KVCACHE_BLOCKS > 0 and num_blocks_local > envs.FD_MAX_KVCACHE_BLOCKS:
-                logger.info(f"------- Reset num_blocks_local {num_blocks_local} to {envs.FD_MAX_KVCACHE_BLOCKS}")
-                num_blocks_local = envs.FD_MAX_KVCACHE_BLOCKS
+            # NOTE(liuzichang): Too many block will lead to illegal memory access
+            # We will develop dynamic limits in future.
+            if num_blocks_local > 40000:
+                logger.info(f"------- Reset num_blocks_local {num_blocks_local} to 40000")
+                num_blocks_local = min(40000, num_blocks_local)
             logger.info(f"------- model_block_memory_used:{model_block_memory_used} --------")
             logger.info(f"------- num_blocks_local:{num_blocks_local} --------")
 
diff --git a/fastdeploy/worker/worker_process.py b/fastdeploy/worker/worker_process.py
index 74bf185bd..c3a3b5076 100644
--- a/fastdeploy/worker/worker_process.py
+++ b/fastdeploy/worker/worker_process.py
@@ -530,9 +530,11 @@ class PaddleDisWorkerProc:
             # 2. Calculate the appropriate number of blocks
             model_block_memory_used = self.worker.cal_theortical_kvcache()
             num_blocks_local = int(available_kv_cache_memory // model_block_memory_used)
-            if envs.FD_MAX_KVCACHE_BLOCKS > 0 and num_blocks_local > envs.FD_MAX_KVCACHE_BLOCKS:
-                logger.info(f"------- Reset num_blocks_local {num_blocks_local} to {envs.FD_MAX_KVCACHE_BLOCKS}")
-                num_blocks_local = envs.FD_MAX_KVCACHE_BLOCKS
+            # NOTE(liuzichang): Too many block will lead to illegal memory access
+            # We will develop dynamic limits in future.
+            if num_blocks_local > 40000:
+                logger.info(f"------- Reset num_blocks_local {num_blocks_local} to 40000")
+                num_blocks_local = min(40000, num_blocks_local)
             logger.info(f"------- model_block_memory_used:{model_block_memory_used / 1024**3} GB --------")
             logger.info(f"------- num_blocks_local:{num_blocks_local} --------")
 
diff --git a/tests/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py b/tests/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py
index 5c28fa67b..686c53779 100644
--- a/tests/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py
+++ b/tests/ci_use/EB_VL_Lite/test_EB_VL_Lite_serving.py
@@ -879,7 +879,7 @@ def test_structured_outputs_grammar(openai_client):
 def test_profile_reset_block_num():
     """测试profile reset_block_num功能，与baseline diff不能超过5%"""
     log_file = "./log/config.log"
-    baseline = 65565
+    baseline = 40000
 
     if not os.path.exists(log_file):
         pytest.fail(f"Log file not found: {log_file}")
diff --git a/tests/e2e/test_EB_VL_Lite_serving.py b/tests/e2e/test_EB_VL_Lite_serving.py
index 4a01f718a..fed152d0e 100644
--- a/tests/e2e/test_EB_VL_Lite_serving.py
+++ b/tests/e2e/test_EB_VL_Lite_serving.py
@@ -636,7 +636,7 @@ def test_chat_with_reasoning_max_tokens(openai_client):
 def test_profile_reset_block_num():
     """测试profile reset_block_num功能，与baseline diff不能超过5%"""
     log_file = "./log/config.log"
-    baseline = 65565
+    baseline = 40000
 
     if not os.path.exists(log_file):
         pytest.fail(f"Log file not found: {log_file}")