WINT4/WINT8 dense gemm default use Machete (#4451)

This commit is contained in:
Sunny-bot1
2025-10-23 17:57:59 +08:00
committed by GitHub
parent a240425db9
commit 4ffe41a747
12 changed files with 310 additions and 15 deletions

View File

@@ -78,7 +78,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
"FD_ENABLE_MODEL_LOAD_CACHE": lambda: bool(int(os.getenv("FD_ENABLE_MODEL_LOAD_CACHE", "0"))),
# Whether to use Machete for wint4 dense GEMM.
"FD_USE_MACHETE": lambda: os.getenv("FD_USE_MACHETE", "0"),
"FD_USE_MACHETE": lambda: os.getenv("FD_USE_MACHETE", "1"),
# Used to truncate the string inserted during thinking when reasoning in a model. (</think> for ernie4_5_vl, \n</think>\n\n for ernie_x1)
"FD_LIMIT_THINKING_CONTENT_TRUNCATE_STR": lambda: os.getenv("FD_LIMIT_THINKING_CONTENT_TRUNCATE_STR", "</think>"),