From af715db763abc65cb2cca1e9f93f9b6572acde19 Mon Sep 17 00:00:00 2001 From: yangjianfengo1 <125249383+yangjianfengo1@users.noreply.github.com> Date: Thu, 20 Nov 2025 16:29:13 +0800 Subject: [PATCH] [Scheduler] Support chunk prefill for video input (#5107) * add video chunk prefill * add vit_merge=True for test_tokenizer_client.py --------- Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> --- fastdeploy/engine/sched/resource_manager_v1.py | 10 +++++++++- fastdeploy/input/tokenzier_client.py | 1 + tests/input/test_tokenizer_client.py | 1 + 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fastdeploy/engine/sched/resource_manager_v1.py b/fastdeploy/engine/sched/resource_manager_v1.py index b74c772d3..2d6641ed9 100644 --- a/fastdeploy/engine/sched/resource_manager_v1.py +++ b/fastdeploy/engine/sched/resource_manager_v1.py @@ -40,6 +40,7 @@ from fastdeploy.engine.request import ( RequestType, ) from fastdeploy.engine.resource_manager import ResourceManager +from fastdeploy.input.utils import IDS_TYPE_FLAG from fastdeploy.inter_communicator import IPCSignal from fastdeploy.metrics.metrics import main_process_metrics from fastdeploy.multimodal.hasher import MultimodalHasher @@ -391,8 +392,15 @@ class ResourceManagerV1(ResourceManager): end_patch_idx -= 1 end_patch_map = inputs["patch_map"][end_patch_idx] end_modal_id = end_patch_map["modal_id"] - if end_modal_id > 0: + if end_modal_id > 0 and end_modal_id != IDS_TYPE_FLAG["video"]: new_end_idx = end_patch_map["end_idx"] # 当前模态结束位置 + + if end_modal_id == IDS_TYPE_FLAG["video"] and "can_split_idx_list" in inputs: + can_split_idx_list = inputs["can_split_idx_list"] + for i in range(len(can_split_idx_list)): + if can_split_idx_list[i] >= new_end_idx: + new_end_idx = can_split_idx_list[i] + break num_new_tokens = new_end_idx - pre_end_idx request.image_end = end_patch_map["image_num"] diff --git a/fastdeploy/input/tokenzier_client.py b/fastdeploy/input/tokenzier_client.py index ff013cf3c..409055c1d 100644 --- a/fastdeploy/input/tokenzier_client.py +++ b/fastdeploy/input/tokenzier_client.py @@ -40,6 +40,7 @@ class VideoEncodeRequest(BaseEncodeRequest): start_ts: int end_ts: int frames: int + vit_merge: bool class ImageDecodeRequest(BaseModel): diff --git a/tests/input/test_tokenizer_client.py b/tests/input/test_tokenizer_client.py index 64c50e929..ef180270f 100644 --- a/tests/input/test_tokenizer_client.py +++ b/tests/input/test_tokenizer_client.py @@ -58,6 +58,7 @@ async def test_encode_video_failure(): start_ts=0.0, end_ts=10.0, frames=30, + vit_merge=True, ) with pytest.raises(RuntimeError, match="Encode failed"):