[v1 loader]qwen Offline fp8 (#4036)

* support offline fp8 * update ut * update ut * update ut * fix * update * update
2025-10-05 16:48:03 +08:00 · 2025-09-15 13:44:11 +08:00
parent b1a5b756a3
commit 29ed617f0f
21 changed files with 440 additions and 138 deletions
--- a/fastdeploy/model_executor/layers/lm_head.py
+++ b/fastdeploy/model_executor/layers/lm_head.py
@@ -91,7 +91,7 @@ class ParallelLMHead(nn.Layer):
                    self.linear.weight,
                    {
                        "weight_loader": default_weight_loader(self.fd_config),
-                        "model_format": self.fd_config.model_config.model_format,
+                        "weight_need_transpose": self.fd_config.model_config.model_format == "torch",
                    },
                )
                if self.nranks > 1:
@@ -110,7 +110,7 @@ class ParallelLMHead(nn.Layer):
                    self.linear.weight,
                    {
                        "weight_loader": default_weight_loader(self.fd_config),
-                        "model_format": self.fd_config.model_config.model_format,
+                        "weight_need_transpose": self.fd_config.model_config.model_format == "torch",
                    },
                )