From 1432e336d72c8099acb90c3fa23f4285027bc09f Mon Sep 17 00:00:00 2001 From: ming1753 <61511741+ming1753@users.noreply.github.com> Date: Wed, 3 Sep 2025 19:48:10 +0800 Subject: [PATCH] [Bug Fix] Fix bug of multimodal inputs only text (#3850) --- fastdeploy/engine/sched/resource_manager_v1.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fastdeploy/engine/sched/resource_manager_v1.py b/fastdeploy/engine/sched/resource_manager_v1.py index d753ae6aa..a7f425858 100644 --- a/fastdeploy/engine/sched/resource_manager_v1.py +++ b/fastdeploy/engine/sched/resource_manager_v1.py @@ -141,6 +141,7 @@ class ResourceManagerV1(ResourceManager): if not self.config.model_config.enable_mm: return num_new_tokens + request.with_image = False inputs = request.multimodal_inputs if inputs.get("patch_idx", None) is not None and inputs.get("patch_map", None) is not None: pre_end_idx = request.num_computed_tokens @@ -184,8 +185,6 @@ class ResourceManagerV1(ResourceManager): and inputs.get("image_patch_id", None) is not None and inputs.get("grid_thw", None) is not None ): - request.with_image = False - input_ids_lst = request.prompt_token_ids + request.output_token_ids input_ids = paddle.to_tensor(input_ids_lst, dtype="int64") input_ids = paddle.to_tensor(input_ids_lst, dtype="int64")