[docs] Update environment variables documentation (#3957)

2025-10-05 16:48:03 +08:00 · 2025-09-11 12:17:06 +08:00
parent 2af0f671b1
commit 2650f58740
5 changed files with 15 additions and 4 deletions
--- a/docs/usage/environment_variables.md
+++ b/docs/usage/environment_variables.md
@@ -72,7 +72,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
    "FD_USE_DEEP_GEMM":
    lambda: bool(int(os.getenv("FD_USE_DEEP_GEMM", "0"))),

+    # Whether to enable model cache feature
+    "FD_ENABLE_MODEL_LOAD_CACHE": lambda: bool(int(os.getenv("FD_ENABLE_MODEL_LOAD_CACHE", "0"))),
+
    # Whether to use Machete for wint4 dense GEMM.
    "FD_USE_MACHETE": lambda: os.getenv("FD_USE_MACHETE", "0"),
+
 }
 ```
--- a/docs/zh/usage/environment_variables.md
+++ b/docs/zh/usage/environment_variables.md
@@ -72,6 +72,9 @@ environment_variables: dict[str, Callable[[], Any]] = {
    "FD_USE_DEEP_GEMM":
    lambda: bool(int(os.getenv("FD_USE_DEEP_GEMM", "0"))),

+    # 是否启用模型权重缓存功能
+    "FD_ENABLE_MODEL_LOAD_CACHE": lambda: bool(int(os.getenv("FD_ENABLE_MODEL_LOAD_CACHE", "0"))),
+
    # 是否使用 Machete 后端的 wint4 GEMM.
    "FD_USE_MACHETE": lambda: os.getenv("FD_USE_MACHETE", "0"),
 }
--- a/fastdeploy/envs.py
+++ b/fastdeploy/envs.py
@@ -98,7 +98,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
    # Whether to use new get_output and save_output method (0 or 1)
    "FD_USE_GET_SAVE_OUTPUT_V1": lambda: bool(int(os.getenv("FD_USE_GET_SAVE_OUTPUT_V1", "0"))),
    # Whether to enable model cache feature
-    "FD_ENABLE_MODEL_CACHE": lambda: bool(int(os.getenv("FD_ENABLE_MODEL_CACHE", "0"))),
+    "FD_ENABLE_MODEL_LOAD_CACHE": lambda: bool(int(os.getenv("FD_ENABLE_MODEL_LOAD_CACHE", "0"))),
 }


--- a/fastdeploy/model_executor/load_weight_utils.py
+++ b/fastdeploy/model_executor/load_weight_utils.py
@@ -79,7 +79,7 @@ def is_weight_cache_enabled(fd_config, weight_cache_path=".cache"):
    weight_cache_context = contextlib.nullcontext()
    weight_cache_dir = None
    enable_cache = False
-    if envs.FD_ENABLE_MODEL_CACHE:
+    if envs.FD_ENABLE_MODEL_LOAD_CACHE:
        model_weight_cache_path = os.path.join(fd_config.model_config.model, weight_cache_path)
        # model_type + quantization + tp_size + ep_size
        weight_cache_key = "_".join(
@@ -132,7 +132,11 @@ def save_model(model_arg_name="model", config_arg_name="fd_config"):

            with context:
                result = func(*args, **kwargs)
-            if envs.FD_ENABLE_MODEL_CACHE and weight_cache_dir is not None and not os.path.exists(weight_cache_dir):
+            if (
+                envs.FD_ENABLE_MODEL_LOAD_CACHE
+                and weight_cache_dir is not None
+                and not os.path.exists(weight_cache_dir)
+            ):
                assert fd_config.quant_config is not None and getattr(
                    fd_config.quant_config, "is_checkpoint_bf16", False
                ), "Save cache only for dynamic quantization"
--- a/tests/model_loader/test_model_cache.py
+++ b/tests/model_loader/test_model_cache.py
@@ -41,7 +41,7 @@ model_param_map = {
        "quantizations": [
            {
                "quant_type": "wint4",
-                "env": {"FD_ENABLE_MODEL_CACHE": "1"},
+                "env": {"FD_ENABLE_MODEL_LOAD_CACHE": "1"},
            }
        ],
    }