[Scheduler] Support chunk prefill for video input (#5107)

* add video chunk prefill

* add vit_merge=True for test_tokenizer_client.py

---------

Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com>
This commit is contained in:
yangjianfengo1
2025-11-20 16:29:13 +08:00
committed by GitHub
parent 0edda75a56
commit af715db763
3 changed files with 11 additions and 1 deletions

View File

@@ -40,6 +40,7 @@ from fastdeploy.engine.request import (
RequestType,
)
from fastdeploy.engine.resource_manager import ResourceManager
from fastdeploy.input.utils import IDS_TYPE_FLAG
from fastdeploy.inter_communicator import IPCSignal
from fastdeploy.metrics.metrics import main_process_metrics
from fastdeploy.multimodal.hasher import MultimodalHasher
@@ -391,8 +392,15 @@ class ResourceManagerV1(ResourceManager):
end_patch_idx -= 1
end_patch_map = inputs["patch_map"][end_patch_idx]
end_modal_id = end_patch_map["modal_id"]
if end_modal_id > 0:
if end_modal_id > 0 and end_modal_id != IDS_TYPE_FLAG["video"]:
new_end_idx = end_patch_map["end_idx"] # 当前模态结束位置
if end_modal_id == IDS_TYPE_FLAG["video"] and "can_split_idx_list" in inputs:
can_split_idx_list = inputs["can_split_idx_list"]
for i in range(len(can_split_idx_list)):
if can_split_idx_list[i] >= new_end_idx:
new_end_idx = can_split_idx_list[i]
break
num_new_tokens = new_end_idx - pre_end_idx
request.image_end = end_patch_map["image_num"]

View File

@@ -40,6 +40,7 @@ class VideoEncodeRequest(BaseEncodeRequest):
start_ts: int
end_ts: int
frames: int
vit_merge: bool
class ImageDecodeRequest(BaseModel):

View File

@@ -58,6 +58,7 @@ async def test_encode_video_failure():
start_ts=0.0,
end_ts=10.0,
frames=30,
vit_merge=True,
)
with pytest.raises(RuntimeError, match="Encode failed"):