[Precision] Support lm_head layer running in float32 (#3597)

* support lm_head fp32 bf16 fp16 * support lm_head fp32 bf16 fp16 * add doc and check code * lm_head_fp32 specify lm_head as fp32 * code check * check doc
2025-10-05 08:37:06 +08:00 · 2025-08-27 11:34:53 +08:00
parent ad319a87cc
commit ce9c0917c5
15 changed files with 99 additions and 60 deletions
--- a/fastdeploy/model_executor/utils.py
+++ b/fastdeploy/model_executor/utils.py
@@ -14,6 +14,7 @@
 # limitations under the License.
 """

+from contextlib import contextmanager
 from typing import Any, Optional, Union

 import paddle
@@ -185,3 +186,15 @@ def default_weight_loader(fd_config: FDConfig) -> None:
        param.copy_(loaded_weight, False)

    return fn
+
+
+@contextmanager
+def temporary_dtype(dtype: str):
+    """Temporarily set Paddle default dtype"""
+    orig_dtype = paddle.get_default_dtype()
+    try:
+        if dtype is not None and dtype == "float32":
+            paddle.set_default_dtype(dtype)
+        yield
+    finally:
+        paddle.set_default_dtype(orig_dtype)