mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[docs] Update environment variables documentation (#3957)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
This commit is contained in:
@@ -72,7 +72,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
|||||||
"FD_USE_DEEP_GEMM":
|
"FD_USE_DEEP_GEMM":
|
||||||
lambda: bool(int(os.getenv("FD_USE_DEEP_GEMM", "0"))),
|
lambda: bool(int(os.getenv("FD_USE_DEEP_GEMM", "0"))),
|
||||||
|
|
||||||
|
# Whether to enable model cache feature
|
||||||
|
"FD_ENABLE_MODEL_LOAD_CACHE": lambda: bool(int(os.getenv("FD_ENABLE_MODEL_LOAD_CACHE", "0"))),
|
||||||
|
|
||||||
# Whether to use Machete for wint4 dense GEMM.
|
# Whether to use Machete for wint4 dense GEMM.
|
||||||
"FD_USE_MACHETE": lambda: os.getenv("FD_USE_MACHETE", "0"),
|
"FD_USE_MACHETE": lambda: os.getenv("FD_USE_MACHETE", "0"),
|
||||||
|
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
@@ -72,6 +72,9 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
|||||||
"FD_USE_DEEP_GEMM":
|
"FD_USE_DEEP_GEMM":
|
||||||
lambda: bool(int(os.getenv("FD_USE_DEEP_GEMM", "0"))),
|
lambda: bool(int(os.getenv("FD_USE_DEEP_GEMM", "0"))),
|
||||||
|
|
||||||
|
# 是否启用模型权重缓存功能
|
||||||
|
"FD_ENABLE_MODEL_LOAD_CACHE": lambda: bool(int(os.getenv("FD_ENABLE_MODEL_LOAD_CACHE", "0"))),
|
||||||
|
|
||||||
# 是否使用 Machete 后端的 wint4 GEMM.
|
# 是否使用 Machete 后端的 wint4 GEMM.
|
||||||
"FD_USE_MACHETE": lambda: os.getenv("FD_USE_MACHETE", "0"),
|
"FD_USE_MACHETE": lambda: os.getenv("FD_USE_MACHETE", "0"),
|
||||||
}
|
}
|
||||||
|
@@ -98,7 +98,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
|||||||
# Whether to use new get_output and save_output method (0 or 1)
|
# Whether to use new get_output and save_output method (0 or 1)
|
||||||
"FD_USE_GET_SAVE_OUTPUT_V1": lambda: bool(int(os.getenv("FD_USE_GET_SAVE_OUTPUT_V1", "0"))),
|
"FD_USE_GET_SAVE_OUTPUT_V1": lambda: bool(int(os.getenv("FD_USE_GET_SAVE_OUTPUT_V1", "0"))),
|
||||||
# Whether to enable model cache feature
|
# Whether to enable model cache feature
|
||||||
"FD_ENABLE_MODEL_CACHE": lambda: bool(int(os.getenv("FD_ENABLE_MODEL_CACHE", "0"))),
|
"FD_ENABLE_MODEL_LOAD_CACHE": lambda: bool(int(os.getenv("FD_ENABLE_MODEL_LOAD_CACHE", "0"))),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@@ -79,7 +79,7 @@ def is_weight_cache_enabled(fd_config, weight_cache_path=".cache"):
|
|||||||
weight_cache_context = contextlib.nullcontext()
|
weight_cache_context = contextlib.nullcontext()
|
||||||
weight_cache_dir = None
|
weight_cache_dir = None
|
||||||
enable_cache = False
|
enable_cache = False
|
||||||
if envs.FD_ENABLE_MODEL_CACHE:
|
if envs.FD_ENABLE_MODEL_LOAD_CACHE:
|
||||||
model_weight_cache_path = os.path.join(fd_config.model_config.model, weight_cache_path)
|
model_weight_cache_path = os.path.join(fd_config.model_config.model, weight_cache_path)
|
||||||
# model_type + quantization + tp_size + ep_size
|
# model_type + quantization + tp_size + ep_size
|
||||||
weight_cache_key = "_".join(
|
weight_cache_key = "_".join(
|
||||||
@@ -132,7 +132,11 @@ def save_model(model_arg_name="model", config_arg_name="fd_config"):
|
|||||||
|
|
||||||
with context:
|
with context:
|
||||||
result = func(*args, **kwargs)
|
result = func(*args, **kwargs)
|
||||||
if envs.FD_ENABLE_MODEL_CACHE and weight_cache_dir is not None and not os.path.exists(weight_cache_dir):
|
if (
|
||||||
|
envs.FD_ENABLE_MODEL_LOAD_CACHE
|
||||||
|
and weight_cache_dir is not None
|
||||||
|
and not os.path.exists(weight_cache_dir)
|
||||||
|
):
|
||||||
assert fd_config.quant_config is not None and getattr(
|
assert fd_config.quant_config is not None and getattr(
|
||||||
fd_config.quant_config, "is_checkpoint_bf16", False
|
fd_config.quant_config, "is_checkpoint_bf16", False
|
||||||
), "Save cache only for dynamic quantization"
|
), "Save cache only for dynamic quantization"
|
||||||
|
@@ -41,7 +41,7 @@ model_param_map = {
|
|||||||
"quantizations": [
|
"quantizations": [
|
||||||
{
|
{
|
||||||
"quant_type": "wint4",
|
"quant_type": "wint4",
|
||||||
"env": {"FD_ENABLE_MODEL_CACHE": "1"},
|
"env": {"FD_ENABLE_MODEL_LOAD_CACHE": "1"},
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user