mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 00:06:38 +08:00
fix bf16 and add comments (#4106)
This commit is contained in:
@@ -498,6 +498,7 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
|
||||
if weight_need_transpose:
|
||||
loaded_weight = get_tensor(loaded_weight)
|
||||
loaded_weight = loaded_weight.transpose([1, 0])
|
||||
# Avoid redundant transpose of fused weights when weight_loader is called iteratively
|
||||
param.weight_need_transpose = False
|
||||
# Loaded weight is already fused on disk.
|
||||
shard_offsets = [
|
||||
@@ -638,6 +639,7 @@ class QKVParallelLinear(ColumnParallelLinear):
|
||||
if weight_need_transpose:
|
||||
loaded_weight = get_tensor(loaded_weight)
|
||||
loaded_weight = loaded_weight.transpose([1, 0])
|
||||
# Avoid redundant transpose of fused weights when weight_loader is called iteratively
|
||||
param.weight_need_transpose = False
|
||||
# Loaded weight is already fused on disk
|
||||
shard_offsets = [
|
||||
|
Reference in New Issue
Block a user