From a375378cc1edf452dcf73c6e72dc8fb834e7dc16 Mon Sep 17 00:00:00 2001 From: ming1753 <61511741+ming1753@users.noreply.github.com> Date: Thu, 14 Aug 2025 09:49:22 +0800 Subject: [PATCH] [Bug Fix] Fix V1 video bug (#3387) --- .../engine/sched/resource_manager_v1.py | 39 ++++++++++++------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/fastdeploy/engine/sched/resource_manager_v1.py b/fastdeploy/engine/sched/resource_manager_v1.py index d1116980c..4aecabcd5 100644 --- a/fastdeploy/engine/sched/resource_manager_v1.py +++ b/fastdeploy/engine/sched/resource_manager_v1.py @@ -97,13 +97,13 @@ class ResourceManagerV1(ResourceManager): def _prepare_preempt_task(self, request): return ScheduledPreemptTask(idx=request.idx, request_id=request.request_id) - + def reschedule_preempt_task(self, request_id): with self.lock: if request_id in self.to_be_rescheduled_request_id_set and request_id in self.requests: request = self.requests[request_id] self.waiting.appendleft(request) - self.to_be_rescheduled_request_id_set.remove(request_id) + self.to_be_rescheduled_request_id_set.remove(request_id) def _trigger_preempt(self, request, num_new_blocks, preempted_reqs, scheduled_reqs): can_schedule = True @@ -142,26 +142,31 @@ class ResourceManagerV1(ResourceManager): input_ids_lst = request.prompt_token_ids + request.output_token_ids input_ids = paddle.to_tensor(input_ids_lst, dtype="int64") - grid_thw = [] - for one in inputs["grid_thw"]: - if one[0] == 1: - grid_thw.append(one) - else: - grid_thw.extend([[2, one[1], one[2]]] * (one[0] // 2)) - + input_ids = paddle.to_tensor(input_ids_lst, dtype="int64") image_patch_id = inputs["image_patch_id"] - grid_thw = paddle.to_tensor(grid_thw, dtype="int64") + if request.multimodal_img_boundaries is None: + grid_thw = [] + for one in inputs["grid_thw"]: + if one[0] == 1: + grid_thw.append(one) + else: + grid_thw.extend([[2, one[1], one[2]]] * (one[0] // 2)) + + grid_thw = paddle.to_tensor(grid_thw, dtype="int64") from fastdeploy.model_executor.ops.gpu import get_img_boundaries request.multimodal_img_boundaries = get_img_boundaries( task_input_ids=input_ids, grid_thw=grid_thw, image_patch_id=image_patch_id ).numpy() + grid_thw = grid_thw.numpy().reshape([-1, 3]) + inputs["grid_thw"] = grid_thw + + grid_thw = inputs["grid_thw"] img_boundaries_idx = request.multimodal_img_boundaries[0] img_num_per_boundary = request.multimodal_img_boundaries[1] ori_prompt_len = img_boundaries_idx[-1].item() - grid_thw = grid_thw.numpy().reshape([-1, 3]) pre_end_idx = request.num_computed_tokens new_end_idx = pre_end_idx + num_new_tokens if new_end_idx < ori_prompt_len and input_ids[new_end_idx - 1] == image_patch_id: @@ -421,9 +426,15 @@ class ResourceManagerV1(ResourceManager): self.running.remove(request) request.status = RequestStatus.FINISHED self._free_blocks(request) - if request.request_id in self.to_be_rescheduled_request_id_set: # finished after preempted, blocks have been recycled. - self.to_be_rescheduled_request_id_set.remove(request.request_id) # just remove from to_be_rescheduled_request_id_set - if request in self.waiting: # after finished, this request still scheduled from preempted to waiting, unexpected error, should not be here + if ( + request.request_id in self.to_be_rescheduled_request_id_set + ): # finished after preempted, blocks have been recycled. + self.to_be_rescheduled_request_id_set.remove( + request.request_id + ) # just remove from to_be_rescheduled_request_id_set + if ( + request in self.waiting + ): # after finished, this request still scheduled from preempted to waiting, unexpected error, should not be here raise RuntimeError(f"request {request.request_id} scheduled into waiting list, after finished") self.tasks_list[request.idx] = None self.stop_flags[request.idx] = True