[v1 loader]qwen Offline fp8 (#4036)

* support offline fp8

* update ut

* update ut

* update ut

* fix

* update

* update
This commit is contained in:
bukejiyu
2025-09-15 13:44:11 +08:00
committed by GitHub
parent b1a5b756a3
commit 29ed617f0f
21 changed files with 440 additions and 138 deletions

View File

@@ -91,7 +91,7 @@ class ParallelLMHead(nn.Layer):
self.linear.weight,
{
"weight_loader": default_weight_loader(self.fd_config),
"model_format": self.fd_config.model_config.model_format,
"weight_need_transpose": self.fd_config.model_config.model_format == "torch",
},
)
if self.nranks > 1:
@@ -110,7 +110,7 @@ class ParallelLMHead(nn.Layer):
self.linear.weight,
{
"weight_loader": default_weight_loader(self.fd_config),
"model_format": self.fd_config.model_config.model_format,
"weight_need_transpose": self.fd_config.model_config.model_format == "torch",
},
)