[Cherry-Pick][Loader][BugFix] Fix some parameters place on CPU in PaddleOCR-VL (#5413) (#5414)

* [BugFix] Fix some parameter place on CPU in PaddleOCR-VL

* clean log

* fix codestyle
This commit is contained in:
Nyakku Shigure
2025-12-08 10:01:20 +08:00
committed by GitHub
parent 707d1a1fc9
commit 7926add37c
2 changed files with 11 additions and 3 deletions

View File

@@ -20,6 +20,8 @@ from typing import Optional
import paddle
import paddle.nn as nn
from fastdeploy.model_executor.utils import h2d_copy
class GELUActivation(nn.Layer):
"""
@@ -97,6 +99,8 @@ class Projector(nn.Layer):
def weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = None):
loaded_weight = loaded_weight.transpose([1, 0])
if not param._is_initialized():
param.initialize()
assert param.shape == loaded_weight.shape, (
f" Attempted to load weight ({loaded_weight.shape}) " f"into parameter ({param.shape})"
)
@@ -106,4 +110,4 @@ class Projector(nn.Layer):
loaded_weight = loaded_weight.view(param.dtype)
else:
loaded_weight = loaded_weight.cast(param.dtype)
param.copy_(loaded_weight, False)
h2d_copy(param, loaded_weight)

View File

@@ -100,6 +100,8 @@ class SiglipAttention(nn.Layer):
def out_proj_weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = None):
loaded_weight = loaded_weight.transpose([1, 0])
if not param._is_initialized():
param.initialize()
assert param.shape == loaded_weight.shape, (
f" Attempted to load weight ({loaded_weight.shape}) " f"into parameter ({param.shape})"
)
@@ -109,7 +111,7 @@ class SiglipAttention(nn.Layer):
loaded_weight = loaded_weight.view(param.dtype)
else:
loaded_weight = loaded_weight.cast(param.dtype)
param.copy_(loaded_weight, False)
h2d_copy(param, loaded_weight)
def forward(
self,
@@ -287,6 +289,8 @@ class SiglipMLP(nn.Layer):
def weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = None):
loaded_weight = loaded_weight.transpose([1, 0])
if not param._is_initialized():
param.initialize()
assert param.shape == loaded_weight.shape, (
f" Attempted to load weight ({loaded_weight.shape}) " f"into parameter ({param.shape})"
)
@@ -296,7 +300,7 @@ class SiglipMLP(nn.Layer):
loaded_weight = loaded_weight.view(param.dtype)
else:
loaded_weight = loaded_weight.cast(param.dtype)
param.copy_(loaded_weight, False)
h2d_copy(param, loaded_weight)
def forward(self, hidden_states: paddle.Tensor) -> paddle.Tensor:
hidden_states = self.fc1(hidden_states)