[Iluvatar] Support V1_KVCACHE_SCHEDULER and paddleocr-vl rope mode (#5555)

This commit is contained in:
yzwu
2025-12-18 18:14:25 +08:00
committed by GitHub
parent 48f3e9797e
commit ac013803f3
24 changed files with 1212 additions and 1090 deletions

View File

@@ -56,11 +56,11 @@ from fastdeploy.platforms import current_platform
if current_platform.is_iluvatar():
from fastdeploy.model_executor.ops.iluvatar import (
recover_decode_task,
set_data_ipc,
set_value_by_flags_and_idx,
)
recover_decode_task = None
share_external_data = None
elif current_platform.is_dcu():
from fastdeploy.model_executor.ops.gpu import set_value_by_flags_and_idx
@@ -467,7 +467,7 @@ class GPUModelRunner(ModelRunnerBase):
multi_vision_inputs["encoder_cache_info"].append((mm_hash, feature_positions[i], False))
if envs.FD_ENABLE_MAX_PREFILL:
multi_vision_inputs["images_lst"].append(
inputs["images"][image_start_idx : image_start_idx + image_offset].cuda()
inputs["images"][image_start_idx : image_start_idx + image_offset].to(self.device)
)
multi_vision_inputs["grid_thw_lst"].append(paddle.to_tensor(grid_thw_list[i]))
multi_vision_inputs["cu_seqlens"].append(vit_seqlen_list[i])
@@ -486,7 +486,7 @@ class GPUModelRunner(ModelRunnerBase):
else:
if envs.FD_ENABLE_MAX_PREFILL:
multi_vision_inputs["images_lst"].append(
inputs["images"][request.image_start : request.image_end].cuda()
inputs["images"][request.image_start : request.image_end].to(self.device)
)
multi_vision_inputs["grid_thw_lst"].extend(
paddle.to_tensor(inputs["grid_thw"][request.num_image_start : request.num_image_end])