[Scheduler] Support chunk prefill for video input (#5107)

* add video chunk prefill

* add vit_merge=True for test_tokenizer_client.py

---------

Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com>
This commit is contained in:
yangjianfengo1
2025-11-20 16:29:13 +08:00
committed by GitHub
parent 0edda75a56
commit af715db763
3 changed files with 11 additions and 1 deletions

View File

@@ -40,6 +40,7 @@ from fastdeploy.engine.request import (
RequestType,
)
from fastdeploy.engine.resource_manager import ResourceManager
from fastdeploy.input.utils import IDS_TYPE_FLAG
from fastdeploy.inter_communicator import IPCSignal
from fastdeploy.metrics.metrics import main_process_metrics
from fastdeploy.multimodal.hasher import MultimodalHasher
@@ -391,8 +392,15 @@ class ResourceManagerV1(ResourceManager):
end_patch_idx -= 1
end_patch_map = inputs["patch_map"][end_patch_idx]
end_modal_id = end_patch_map["modal_id"]
if end_modal_id > 0:
if end_modal_id > 0 and end_modal_id != IDS_TYPE_FLAG["video"]:
new_end_idx = end_patch_map["end_idx"] # 当前模态结束位置
if end_modal_id == IDS_TYPE_FLAG["video"] and "can_split_idx_list" in inputs:
can_split_idx_list = inputs["can_split_idx_list"]
for i in range(len(can_split_idx_list)):
if can_split_idx_list[i] >= new_end_idx:
new_end_idx = can_split_idx_list[i]
break
num_new_tokens = new_end_idx - pre_end_idx
request.image_end = end_patch_map["image_num"]