[Cherry-Pick][BugFix][CI] fix vl moe(#4867) (#4869)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled

* [CI] update paddlepaddle_gpu==3.2.1 and fix rollout_model test logic

* [Cherry-Pick][BugFix][CI] fix vl moe(#4867)
This commit is contained in:
YuBaoku
2025-11-07 00:03:36 +08:00
committed by GitHub
parent 89934edc10
commit 71bbedaf50
3 changed files with 10 additions and 6 deletions

View File

@@ -72,6 +72,7 @@ class VLMoEMeta:
image_index: paddle.Tensor
token_type_ids: paddle.Tensor
image_token_num: paddle.Tensor
num_image_patch_id: paddle.Tensor
def __str__(self):
return (
@@ -499,11 +500,13 @@ class Ernie4_5_VLModel(nn.Layer):
ids_remove_padding: paddle.Tensor,
) -> VLMoEMeta:
image_mask = ids_remove_padding == self.im_patch_id
image_mask = ids_remove_padding >= self.im_patch_id
token_type_ids = image_mask.cast("int32")
image_token_num = image_mask.sum()
token_num = ids_remove_padding.shape[0]
text_token_num = paddle.maximum((token_num - image_token_num), paddle.ones([], dtype="int64"))
num_image_patch_id = ids_remove_padding == self.im_patch_id
num_image_patch_id = num_image_patch_id.cast("int32").sum()
# The scenario requiring padding is CUDA graph, thus we only need to pad the maximum capture size.
self._cuda_graph_buffers["token_type_ids"][: self.fd_config.graph_opt_config.max_capture_size].fill_(-1)
@@ -517,6 +520,7 @@ class Ernie4_5_VLModel(nn.Layer):
image_index=self._cuda_graph_buffers["image_index"][:token_num],
token_type_ids=self._cuda_graph_buffers["token_type_ids"][:token_num],
image_token_num=self._cuda_graph_buffers["image_token_num"],
num_image_patch_id=num_image_patch_id,
)
def get_input_embeddings(self, ids_remove_padding: paddle.Tensor) -> paddle.Tensor:
@@ -787,7 +791,7 @@ class Ernie4_5_VLMoeForConditionalGeneration(ModelForCasualLM):
input_embeddings = self.get_input_embeddings(
ids_remove_padding=ids_remove_padding,
image_features=image_features,
image_token_num=vl_moe_meta.image_token_num.item(),
image_token_num=vl_moe_meta.num_image_patch_id.item(),
)
self._input_embeddings.copy_(input_embeddings, False)

View File

@@ -238,9 +238,9 @@ def test_consistency_between_runs(api_url, headers, consistent_payload):
# base result
base_path = os.getenv("MODEL_PATH")
if base_path:
base_file = os.path.join(base_path, "ernie-4_5-vl-base-tp2")
base_file = os.path.join(base_path, "ernie-4_5-vl-base-tp2-v2.3")
else:
base_file = "ernie-4_5-vl-base-tp2"
base_file = "ernie-4_5-vl-base-tp2-v2.3"
with open(base_file, "r") as f:
content2 = f.read()

View File

@@ -247,9 +247,9 @@ def test_consistency_between_runs(api_url, headers, consistent_payload):
# base result
base_path = os.getenv("MODEL_PATH")
if base_path:
base_file = os.path.join(base_path, "ernie-4_5-vl-base-tp2")
base_file = os.path.join(base_path, "ernie-4_5-vl-base-tp2-v2.3")
else:
base_file = "ernie-4_5-vl-base-tp2"
base_file = "ernie-4_5-vl-base-tp2-v2.3"
with open(base_file, "r") as f:
content2 = f.read()