[Bug Fix] Fix bug of multimodal inputs only text (#3850)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled

This commit is contained in:
ming1753
2025-09-03 19:48:10 +08:00
committed by GitHub
parent 9213a58a06
commit 1432e336d7

View File

@@ -141,6 +141,7 @@ class ResourceManagerV1(ResourceManager):
if not self.config.model_config.enable_mm:
return num_new_tokens
request.with_image = False
inputs = request.multimodal_inputs
if inputs.get("patch_idx", None) is not None and inputs.get("patch_map", None) is not None:
pre_end_idx = request.num_computed_tokens
@@ -184,8 +185,6 @@ class ResourceManagerV1(ResourceManager):
and inputs.get("image_patch_id", None) is not None
and inputs.get("grid_thw", None) is not None
):
request.with_image = False
input_ids_lst = request.prompt_token_ids + request.output_token_ids
input_ids = paddle.to_tensor(input_ids_lst, dtype="int64")
input_ids = paddle.to_tensor(input_ids_lst, dtype="int64")