mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-12 20:11:20 +08:00
[Sync Code] Update vs branch (#3403)
* Pre ce modified (#3335) (#3360) * Pre ce modified (#3335) * update * update * fix * fix * update * update * update * fix * update * update * update * add ut fix pr(3367) * [Bug Fix] Fix V1 video bug (#3387) * fix stopseq error info (#3342) Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> * [BugFix] Fix default log level of paddleformers (#3377) Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> * [Polish Code] Remove useless notes * feat(log):add_request_and_response_log (#3392) * Optimize CI execution workflow. (#3371) (#3384) * fix * [BugFix] fix control signal release failed (#3374) * [BugFix] * [BugFix] * [BugFix] * [BugFix] * fix * fix --------- Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com> --------- Co-authored-by: YUNSHEN XIE <1084314248@qq.com> Co-authored-by: ming1753 <61511741+ming1753@users.noreply.github.com> Co-authored-by: JYChen <zoooo0820@qq.com> Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> Co-authored-by: Jiang-Jia-Jun <jiangjiajun@baidu.com> Co-authored-by: xiaolei373 <zley373@gmail.com> Co-authored-by: ltd0924 <32387785+ltd0924@users.noreply.github.com>
This commit is contained in:
@@ -97,13 +97,13 @@ class ResourceManagerV1(ResourceManager):
|
||||
|
||||
def _prepare_preempt_task(self, request):
|
||||
return ScheduledPreemptTask(idx=request.idx, request_id=request.request_id)
|
||||
|
||||
|
||||
def reschedule_preempt_task(self, request_id):
|
||||
with self.lock:
|
||||
if request_id in self.to_be_rescheduled_request_id_set and request_id in self.requests:
|
||||
request = self.requests[request_id]
|
||||
self.waiting.appendleft(request)
|
||||
self.to_be_rescheduled_request_id_set.remove(request_id)
|
||||
self.to_be_rescheduled_request_id_set.remove(request_id)
|
||||
|
||||
def _trigger_preempt(self, request, num_new_blocks, preempted_reqs, scheduled_reqs):
|
||||
can_schedule = True
|
||||
@@ -142,26 +142,31 @@ class ResourceManagerV1(ResourceManager):
|
||||
|
||||
input_ids_lst = request.prompt_token_ids + request.output_token_ids
|
||||
input_ids = paddle.to_tensor(input_ids_lst, dtype="int64")
|
||||
grid_thw = []
|
||||
for one in inputs["grid_thw"]:
|
||||
if one[0] == 1:
|
||||
grid_thw.append(one)
|
||||
else:
|
||||
grid_thw.extend([[2, one[1], one[2]]] * (one[0] // 2))
|
||||
|
||||
input_ids = paddle.to_tensor(input_ids_lst, dtype="int64")
|
||||
image_patch_id = inputs["image_patch_id"]
|
||||
grid_thw = paddle.to_tensor(grid_thw, dtype="int64")
|
||||
|
||||
if request.multimodal_img_boundaries is None:
|
||||
grid_thw = []
|
||||
for one in inputs["grid_thw"]:
|
||||
if one[0] == 1:
|
||||
grid_thw.append(one)
|
||||
else:
|
||||
grid_thw.extend([[2, one[1], one[2]]] * (one[0] // 2))
|
||||
|
||||
grid_thw = paddle.to_tensor(grid_thw, dtype="int64")
|
||||
from fastdeploy.model_executor.ops.gpu import get_img_boundaries
|
||||
|
||||
request.multimodal_img_boundaries = get_img_boundaries(
|
||||
task_input_ids=input_ids, grid_thw=grid_thw, image_patch_id=image_patch_id
|
||||
).numpy()
|
||||
|
||||
grid_thw = grid_thw.numpy().reshape([-1, 3])
|
||||
inputs["grid_thw"] = grid_thw
|
||||
|
||||
grid_thw = inputs["grid_thw"]
|
||||
img_boundaries_idx = request.multimodal_img_boundaries[0]
|
||||
img_num_per_boundary = request.multimodal_img_boundaries[1]
|
||||
ori_prompt_len = img_boundaries_idx[-1].item()
|
||||
grid_thw = grid_thw.numpy().reshape([-1, 3])
|
||||
pre_end_idx = request.num_computed_tokens
|
||||
new_end_idx = pre_end_idx + num_new_tokens
|
||||
if new_end_idx < ori_prompt_len and input_ids[new_end_idx - 1] == image_patch_id:
|
||||
@@ -421,9 +426,15 @@ class ResourceManagerV1(ResourceManager):
|
||||
self.running.remove(request)
|
||||
request.status = RequestStatus.FINISHED
|
||||
self._free_blocks(request)
|
||||
if request.request_id in self.to_be_rescheduled_request_id_set: # finished after preempted, blocks have been recycled.
|
||||
self.to_be_rescheduled_request_id_set.remove(request.request_id) # just remove from to_be_rescheduled_request_id_set
|
||||
if request in self.waiting: # after finished, this request still scheduled from preempted to waiting, unexpected error, should not be here
|
||||
if (
|
||||
request.request_id in self.to_be_rescheduled_request_id_set
|
||||
): # finished after preempted, blocks have been recycled.
|
||||
self.to_be_rescheduled_request_id_set.remove(
|
||||
request.request_id
|
||||
) # just remove from to_be_rescheduled_request_id_set
|
||||
if (
|
||||
request in self.waiting
|
||||
): # after finished, this request still scheduled from preempted to waiting, unexpected error, should not be here
|
||||
raise RuntimeError(f"request {request.request_id} scheduled into waiting list, after finished")
|
||||
self.tasks_list[request.idx] = None
|
||||
self.stop_flags[request.idx] = True
|
||||
|
Reference in New Issue
Block a user