mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[v1 loader]qwen Offline fp8 (#4036)
* support offline fp8 * update ut * update ut * update ut * fix * update * update
This commit is contained in:
@@ -91,7 +91,7 @@ class ParallelLMHead(nn.Layer):
|
||||
self.linear.weight,
|
||||
{
|
||||
"weight_loader": default_weight_loader(self.fd_config),
|
||||
"model_format": self.fd_config.model_config.model_format,
|
||||
"weight_need_transpose": self.fd_config.model_config.model_format == "torch",
|
||||
},
|
||||
)
|
||||
if self.nranks > 1:
|
||||
@@ -110,7 +110,7 @@ class ParallelLMHead(nn.Layer):
|
||||
self.linear.weight,
|
||||
{
|
||||
"weight_loader": default_weight_loader(self.fd_config),
|
||||
"model_format": self.fd_config.model_config.model_format,
|
||||
"weight_need_transpose": self.fd_config.model_config.model_format == "torch",
|
||||
},
|
||||
)
|
||||
|
||||
|
Reference in New Issue
Block a user