[Sync Code] Update vs branch (#3403)

* Pre ce modified (#3335) (#3360) * Pre ce modified (#3335) * update * update * fix * fix * update * update * update * fix * update * update * update * add ut fix pr(3367) * [Bug Fix] Fix V1 video bug (#3387) * fix stopseq error info (#3342) Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> * [BugFix] Fix default log level of paddleformers (#3377) Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> * [Polish Code] Remove useless notes * feat(log):add_request_and_response_log (#3392) * Optimize CI execution workflow. (#3371) (#3384) * fix * [BugFix] fix control signal release failed (#3374) * [BugFix] * [BugFix] * [BugFix] * [BugFix] * fix * fix --------- Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com> --------- Co-authored-by: YUNSHEN XIE <1084314248@qq.com> Co-authored-by: ming1753 <61511741+ming1753@users.noreply.github.com> Co-authored-by: JYChen <zoooo0820@qq.com> Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> Co-authored-by: Jiang-Jia-Jun <jiangjiajun@baidu.com> Co-authored-by: xiaolei373 <zley373@gmail.com> Co-authored-by: ltd0924 <32387785+ltd0924@users.noreply.github.com>
2025-10-12 20:11:20 +08:00 · 2025-08-14 17:14:45 +08:00
parent 81092c0fe3
commit e11331927f
16 changed files with 329 additions and 155 deletions
--- a/fastdeploy/engine/sched/resource_manager_v1.py
+++ b/fastdeploy/engine/sched/resource_manager_v1.py
@@ -97,13 +97,13 @@ class ResourceManagerV1(ResourceManager):

    def _prepare_preempt_task(self, request):
        return ScheduledPreemptTask(idx=request.idx, request_id=request.request_id)
-    
+
    def reschedule_preempt_task(self, request_id):
        with self.lock:
            if request_id in self.to_be_rescheduled_request_id_set and request_id in self.requests:
                request = self.requests[request_id]
                self.waiting.appendleft(request)
-                self.to_be_rescheduled_request_id_set.remove(request_id) 
+                self.to_be_rescheduled_request_id_set.remove(request_id)

    def _trigger_preempt(self, request, num_new_blocks, preempted_reqs, scheduled_reqs):
        can_schedule = True
@@ -142,26 +142,31 @@ class ResourceManagerV1(ResourceManager):

        input_ids_lst = request.prompt_token_ids + request.output_token_ids
        input_ids = paddle.to_tensor(input_ids_lst, dtype="int64")
-        grid_thw = []
-        for one in inputs["grid_thw"]:
-            if one[0] == 1:
-                grid_thw.append(one)
-            else:
-                grid_thw.extend([[2, one[1], one[2]]] * (one[0] // 2))
-
+        input_ids = paddle.to_tensor(input_ids_lst, dtype="int64")
        image_patch_id = inputs["image_patch_id"]
-        grid_thw = paddle.to_tensor(grid_thw, dtype="int64")
+
        if request.multimodal_img_boundaries is None:
+            grid_thw = []
+            for one in inputs["grid_thw"]:
+                if one[0] == 1:
+                    grid_thw.append(one)
+                else:
+                    grid_thw.extend([[2, one[1], one[2]]] * (one[0] // 2))
+
+            grid_thw = paddle.to_tensor(grid_thw, dtype="int64")
            from fastdeploy.model_executor.ops.gpu import get_img_boundaries

            request.multimodal_img_boundaries = get_img_boundaries(
                task_input_ids=input_ids, grid_thw=grid_thw, image_patch_id=image_patch_id
            ).numpy()

+            grid_thw = grid_thw.numpy().reshape([-1, 3])
+            inputs["grid_thw"] = grid_thw
+
+        grid_thw = inputs["grid_thw"]
        img_boundaries_idx = request.multimodal_img_boundaries[0]
        img_num_per_boundary = request.multimodal_img_boundaries[1]
        ori_prompt_len = img_boundaries_idx[-1].item()
-        grid_thw = grid_thw.numpy().reshape([-1, 3])
        pre_end_idx = request.num_computed_tokens
        new_end_idx = pre_end_idx + num_new_tokens
        if new_end_idx < ori_prompt_len and input_ids[new_end_idx - 1] == image_patch_id:
@@ -421,9 +426,15 @@ class ResourceManagerV1(ResourceManager):
                        self.running.remove(request)
                        request.status = RequestStatus.FINISHED
                        self._free_blocks(request)
-                    if request.request_id in self.to_be_rescheduled_request_id_set: # finished after preempted, blocks have been recycled.
-                        self.to_be_rescheduled_request_id_set.remove(request.request_id) # just remove from to_be_rescheduled_request_id_set
-                    if request in self.waiting:  # after finished, this request still scheduled from preempted to waiting, unexpected error, should not be here
+                    if (
+                        request.request_id in self.to_be_rescheduled_request_id_set
+                    ):  # finished after preempted, blocks have been recycled.
+                        self.to_be_rescheduled_request_id_set.remove(
+                            request.request_id
+                        )  # just remove from to_be_rescheduled_request_id_set
+                    if (
+                        request in self.waiting
+                    ):  # after finished, this request still scheduled from preempted to waiting, unexpected error, should not be here
                        raise RuntimeError(f"request {request.request_id} scheduled into waiting list, after finished")
                    self.tasks_list[request.idx] = None
                    self.stop_flags[request.idx] = True