[Precision] Support lm_head layer running in float32 (#3597)

* support lm_head fp32 bf16 fp16 * support lm_head fp32 bf16 fp16 * add doc and check code * lm_head_fp32 specify lm_head as fp32 * code check * check doc
2025-10-04 00:06:38 +08:00 · 2025-08-27 11:34:53 +08:00
parent ad319a87cc
commit ce9c0917c5
15 changed files with 99 additions and 60 deletions
--- a/fastdeploy/engine/engine.py
+++ b/fastdeploy/engine/engine.py
@@ -477,6 +477,7 @@ class LLMEngine:
            "disable_any_whitespace": self.cfg.disable_any_whitespace,
            "disable_custom_all_reduce": self.cfg.parallel_config.disable_custom_all_reduce,
            "enable_logprob": self.cfg.model_config.enable_logprob,
+            "lm_head_fp32": self.cfg.model_config.lm_head_fp32,
        }
        for worker_flag, value in worker_append_flag.items():
            if value: