[LLM] First commit the llm deployment code

2025-10-05 16:48:03 +08:00 · 2025-06-09 19:20:15 +08:00
parent 980c0a1d2c
commit 684703fd72
11814 changed files with 127294 additions and 1293102 deletions
--- a/fastdeploy/input/mm_processor/process_video.py
+++ b/fastdeploy/input/mm_processor/process_video.py
@@ -0,0 +1,201 @@
+"""
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+import io
+import os
+import random
+
+import numpy as np
+from PIL import Image
+
+from .utils.io_utils import EXTRACTED_FRAME_DIR, get_downloadable, get_filename
+from .utils.video_utils import VideoReaderWrapper
+from fastdeploy.utils import data_processor_logger
+
+
+def read_video_decord(video_path, save_to_disk):
+    """get reader and meta by decord"""
+    data_in_mem = False
+    # video_path = get_downloadable(video_path, save_to_disk=save_to_disk)
+    if isinstance(video_path, VideoReaderWrapper):
+        data_in_mem = True
+        video_reader = video_path
+    else:
+        if isinstance(video_path, bytes):
+            video_path = io.BytesIO(video_path)
+        video_reader = VideoReaderWrapper(video_path, num_threads=1)
+    vlen = len(video_reader)
+    fps = video_reader.get_avg_fps()
+    duration = vlen / float(fps)
+
+    video_meta = {"fps": fps, "duration": duration, "num_of_frame": vlen}
+
+    return video_reader, video_meta, video_path
+
+
+def get_frame_indices(
+    vlen,
+    target_frames=-1,
+    target_fps=-1,
+    frames_sample="middle",
+    fix_start=None,
+    input_fps=-1,
+):
+    """
+    取出对应的frame index
+    """
+    assert frames_sample in ["rand", "middle", "leading"]
+    if target_frames > 0:
+        assert target_fps <= 0, "target_fps must be negative if target_frames is given."
+        if target_frames > vlen:
+            acc_samples = vlen
+            data_processor_logger.info(
+                f"target_frames={target_frames} is larger than video length {vlen}, "
+                f"will sample {acc_samples} frames."
+            )
+        else:
+            acc_samples = target_frames
+            data_processor_logger.debug(f"sampling at target_frames={target_frames}, frames_sample={frames_sample}")
+
+        # split the video into `acc_samples` intervals, and sample from each interval.
+        intervals = np.linspace(start=0, stop=vlen, num=acc_samples + 1).astype(int)
+        ranges = []
+        for idx, interv in enumerate(intervals[:-1]):
+            ranges.append((interv, intervals[idx + 1] - 1))
+        if frames_sample == "rand":
+            try:
+                frame_indices = [random.choice(range(x[0], x[1])) for x in ranges]
+            except Exception as e:
+                frame_indices = np.random.permutation(vlen)[:acc_samples]
+                frame_indices.sort()
+                frame_indices = list(frame_indices)
+        elif fix_start is not None:
+            frame_indices = [x[0] + fix_start for x in ranges]
+        elif frames_sample == "leading":
+            frame_indices = [x[0] for x in ranges]
+        elif frames_sample == "middle":
+            frame_indices = [(x[0] + x[1]) // 2 for x in ranges]
+        else:
+            raise NotImplementedError
+
+    elif target_fps > 0:
+        assert target_frames <= 0, "target_frames must be negative if target_fps is given."
+        assert input_fps > 0, "input_fps must be provided if target_fps is given."
+        data_processor_logger.info(f"sampling at fps={target_fps}, frames_sample={frames_sample}")
+        duration = float(vlen) / input_fps
+        delta = 1 / target_fps  # gap between frames, this is also the clip length each frame represents
+        if frames_sample == "middle":
+            frame_seconds = np.arange(0 + delta / 2, duration + delta / 2, delta)
+        elif frames_sample == "leading":
+            frame_seconds = np.arange(0, duration, delta)
+        if frames_sample == "rand":
+            frame_seconds = np.arange(0 + delta / 2, duration + delta / 2, delta)
+            rand_offset = np.random.rand(*(frame_seconds.shape)) - 0.5
+            frame_seconds += rand_offset * delta
+        frame_indices = np.around(frame_seconds * input_fps).astype(int)
+        frame_indices = [e for e in frame_indices if e < vlen]
+
+    else:
+        raise ValueError("Must provide either positive target_fps or positive target_frames.")
+
+    return frame_indices
+
+
+def read_frames_decord(
+    video_path,
+    video_reader,
+    video_meta,
+    target_frames=-1,
+    target_fps=-1,
+    frames_sample="middle",
+    fix_start=None,
+    save_to_disk=False,
+    cache_dir=EXTRACTED_FRAME_DIR,
+    frame_indices=None,
+    tol=10,
+):
+    """get frames by decord"""
+
+    if frame_indices is None:
+        frame_indices = get_frame_indices(
+            video_meta["num_of_frame"],
+            target_frames=target_frames,
+            target_fps=target_fps,
+            frames_sample=frames_sample,
+            fix_start=fix_start,
+            input_fps=video_meta["fps"],
+        )
+
+    frames = []
+    for frame_indice_index in range(0, len(frame_indices)):
+        frame_indice = frame_indices[frame_indice_index]
+        try:
+            frames.append(video_reader[frame_indice].asnumpy())  # (T, H, W, C)
+        except Exception as e:
+            data_processor_logger.debug(f"encounter error when get frame: {frame_indice}, error: {e}")
+            previous_counter = 1
+            later_counter = 1
+            previous_after_flag = True
+            if frame_indice == 0 or frame_indice == len(video_reader) - 1:
+                cur_tol = tol * 2
+            else:
+                cur_tol = tol
+            while previous_counter < cur_tol or later_counter < cur_tol:
+                if previous_after_flag:
+                    if frame_indice - previous_counter < 0:
+                        previous_counter += 1
+                        previous_after_flag = not previous_after_flag
+                        continue
+                    try:
+                        frames.append(video_reader[frame_indice - previous_counter].asnumpy())
+                        data_processor_logger.info(f"replace {frame_indice}-th frame with {frame_indice-previous_counter}-th frame")
+                        frame_indices[frame_indice_index] = frame_indice - previous_counter
+                        break
+                    except Exception as e:
+                        previous_counter += 1
+                else:
+                    if frame_indice + later_counter >= len(video_reader):
+                        later_counter += 1
+                        previous_after_flag = not previous_after_flag
+                        continue
+                    try:
+                        frames.append(video_reader[frame_indice + later_counter].asnumpy())
+                        data_processor_logger.info(f"replace {frame_indice}-th frame with {frame_indice+later_counter}-th frame")
+                        frame_indices[frame_indice_index] = frame_indice + later_counter
+                        break
+                    except Exception as e:
+                        later_counter += 1
+                previous_after_flag = not previous_after_flag
+
+    frames = np.stack(frames, axis=0)
+    assert len(frames) == len(frame_indices), f"len(frames): {len(frames)} != len(frame_indices): {len(frame_indices)}"
+
+    ret = []
+
+    url_sha1 = get_filename()
+    for idx, frame in enumerate(frames):
+        tmp = Image.fromarray(frame, "RGB")
+        if save_to_disk:
+            save_path = os.path.join(cache_dir, f"{url_sha1}", f"{idx}.png")
+            if not os.path.exists(os.path.dirname(save_path)):
+                os.makedirs(os.path.dirname(save_path))
+            tmp.save(save_path)
+            tmp = save_path
+        ret.append(tmp)
+
+    time_stamps = [frame_idx * video_meta["duration"] / video_meta["num_of_frame"] for frame_idx in frame_indices]
+
+    return ret, frame_indices, time_stamps