mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
WINT4/WINT8 dense gemm default use Machete (#4451)
This commit is contained in:
@@ -78,7 +78,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
||||
"FD_ENABLE_MODEL_LOAD_CACHE": lambda: bool(int(os.getenv("FD_ENABLE_MODEL_LOAD_CACHE", "0"))),
|
||||
|
||||
# Whether to use Machete for wint4 dense GEMM.
|
||||
"FD_USE_MACHETE": lambda: os.getenv("FD_USE_MACHETE", "0"),
|
||||
"FD_USE_MACHETE": lambda: os.getenv("FD_USE_MACHETE", "1"),
|
||||
|
||||
# Used to truncate the string inserted during thinking when reasoning in a model. (</think> for ernie4_5_vl, \n</think>\n\n for ernie_x1)
|
||||
"FD_LIMIT_THINKING_CONTENT_TRUNCATE_STR": lambda: os.getenv("FD_LIMIT_THINKING_CONTENT_TRUNCATE_STR", "</think>"),
|
||||
|
||||
Reference in New Issue
Block a user