[Scheduler] Support chunk prefill for video input (#5107)

* add video chunk prefill * add vit_merge=True for test_tokenizer_client.py --------- Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com>
2025-12-24 13:28:13 +08:00 · 2025-11-20 16:29:13 +08:00
parent 0edda75a56
commit af715db763
3 changed files with 11 additions and 1 deletions
--- a/fastdeploy/engine/sched/resource_manager_v1.py
+++ b/fastdeploy/engine/sched/resource_manager_v1.py
@@ -40,6 +40,7 @@ from fastdeploy.engine.request import (
    RequestType,
 )
 from fastdeploy.engine.resource_manager import ResourceManager
+from fastdeploy.input.utils import IDS_TYPE_FLAG
 from fastdeploy.inter_communicator import IPCSignal
 from fastdeploy.metrics.metrics import main_process_metrics
 from fastdeploy.multimodal.hasher import MultimodalHasher
@@ -391,8 +392,15 @@ class ResourceManagerV1(ResourceManager):
                    end_patch_idx -= 1
            end_patch_map = inputs["patch_map"][end_patch_idx]
            end_modal_id = end_patch_map["modal_id"]
-            if end_modal_id > 0:
+            if end_modal_id > 0 and end_modal_id != IDS_TYPE_FLAG["video"]:
                new_end_idx = end_patch_map["end_idx"]  # 当前模态结束位置
+
+            if end_modal_id == IDS_TYPE_FLAG["video"] and "can_split_idx_list" in inputs:
+                can_split_idx_list = inputs["can_split_idx_list"]
+                for i in range(len(can_split_idx_list)):
+                    if can_split_idx_list[i] >= new_end_idx:
+                        new_end_idx = can_split_idx_list[i]
+                        break
            num_new_tokens = new_end_idx - pre_end_idx

            request.image_end = end_patch_map["image_num"]
--- a/fastdeploy/input/tokenzier_client.py
+++ b/fastdeploy/input/tokenzier_client.py
@@ -40,6 +40,7 @@ class VideoEncodeRequest(BaseEncodeRequest):
    start_ts: int
    end_ts: int
    frames: int
+    vit_merge: bool


 class ImageDecodeRequest(BaseModel):
--- a/tests/input/test_tokenizer_client.py
+++ b/tests/input/test_tokenizer_client.py
@@ -58,6 +58,7 @@ async def test_encode_video_failure():
        start_ts=0.0,
        end_ts=10.0,
        frames=30,
+        vit_merge=True,
    )

    with pytest.raises(RuntimeError, match="Encode failed"):