[Precision] Support lm_head layer running in float32 (#3597)

* support lm_head fp32 bf16 fp16 * support lm_head fp32 bf16 fp16 * add doc and check code * lm_head_fp32 specify lm_head as fp32 * code check * check doc
2025-12-24 13:28:13 +08:00 · 2025-08-27 11:34:53 +08:00
parent ad319a87cc
commit ce9c0917c5
15 changed files with 99 additions and 60 deletions
--- a/fastdeploy/config.py
+++ b/fastdeploy/config.py
@@ -129,6 +129,7 @@ class ModelConfig:
        self.quantization = None
        self.pad_token_id: int = -1
        self.eos_tokens_lens: int = 2
+        self.lm_head_fp32: bool = False
        self.model_format = "auto"
        for key, value in args.items():
            if hasattr(self, key):