mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[Cherry-Pick][BugFix][CI] fix vl moe(#4867) (#4869)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
* [CI] update paddlepaddle_gpu==3.2.1 and fix rollout_model test logic * [Cherry-Pick][BugFix][CI] fix vl moe(#4867)
This commit is contained in:
@@ -72,6 +72,7 @@ class VLMoEMeta:
|
||||
image_index: paddle.Tensor
|
||||
token_type_ids: paddle.Tensor
|
||||
image_token_num: paddle.Tensor
|
||||
num_image_patch_id: paddle.Tensor
|
||||
|
||||
def __str__(self):
|
||||
return (
|
||||
@@ -499,11 +500,13 @@ class Ernie4_5_VLModel(nn.Layer):
|
||||
ids_remove_padding: paddle.Tensor,
|
||||
) -> VLMoEMeta:
|
||||
|
||||
image_mask = ids_remove_padding == self.im_patch_id
|
||||
image_mask = ids_remove_padding >= self.im_patch_id
|
||||
token_type_ids = image_mask.cast("int32")
|
||||
image_token_num = image_mask.sum()
|
||||
token_num = ids_remove_padding.shape[0]
|
||||
text_token_num = paddle.maximum((token_num - image_token_num), paddle.ones([], dtype="int64"))
|
||||
num_image_patch_id = ids_remove_padding == self.im_patch_id
|
||||
num_image_patch_id = num_image_patch_id.cast("int32").sum()
|
||||
|
||||
# The scenario requiring padding is CUDA graph, thus we only need to pad the maximum capture size.
|
||||
self._cuda_graph_buffers["token_type_ids"][: self.fd_config.graph_opt_config.max_capture_size].fill_(-1)
|
||||
@@ -517,6 +520,7 @@ class Ernie4_5_VLModel(nn.Layer):
|
||||
image_index=self._cuda_graph_buffers["image_index"][:token_num],
|
||||
token_type_ids=self._cuda_graph_buffers["token_type_ids"][:token_num],
|
||||
image_token_num=self._cuda_graph_buffers["image_token_num"],
|
||||
num_image_patch_id=num_image_patch_id,
|
||||
)
|
||||
|
||||
def get_input_embeddings(self, ids_remove_padding: paddle.Tensor) -> paddle.Tensor:
|
||||
@@ -787,7 +791,7 @@ class Ernie4_5_VLMoeForConditionalGeneration(ModelForCasualLM):
|
||||
input_embeddings = self.get_input_embeddings(
|
||||
ids_remove_padding=ids_remove_padding,
|
||||
image_features=image_features,
|
||||
image_token_num=vl_moe_meta.image_token_num.item(),
|
||||
image_token_num=vl_moe_meta.num_image_patch_id.item(),
|
||||
)
|
||||
self._input_embeddings.copy_(input_embeddings, False)
|
||||
|
||||
|
||||
@@ -238,9 +238,9 @@ def test_consistency_between_runs(api_url, headers, consistent_payload):
|
||||
# base result
|
||||
base_path = os.getenv("MODEL_PATH")
|
||||
if base_path:
|
||||
base_file = os.path.join(base_path, "ernie-4_5-vl-base-tp2")
|
||||
base_file = os.path.join(base_path, "ernie-4_5-vl-base-tp2-v2.3")
|
||||
else:
|
||||
base_file = "ernie-4_5-vl-base-tp2"
|
||||
base_file = "ernie-4_5-vl-base-tp2-v2.3"
|
||||
with open(base_file, "r") as f:
|
||||
content2 = f.read()
|
||||
|
||||
|
||||
@@ -247,9 +247,9 @@ def test_consistency_between_runs(api_url, headers, consistent_payload):
|
||||
# base result
|
||||
base_path = os.getenv("MODEL_PATH")
|
||||
if base_path:
|
||||
base_file = os.path.join(base_path, "ernie-4_5-vl-base-tp2")
|
||||
base_file = os.path.join(base_path, "ernie-4_5-vl-base-tp2-v2.3")
|
||||
else:
|
||||
base_file = "ernie-4_5-vl-base-tp2"
|
||||
base_file = "ernie-4_5-vl-base-tp2-v2.3"
|
||||
with open(base_file, "r") as f:
|
||||
content2 = f.read()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user