polish code with new pre-commit rule (#2923)

This commit is contained in:
Zero Rains
2025-07-19 23:19:27 +08:00
committed by GitHub
parent b8676d71a8
commit 25698d56d1
424 changed files with 14307 additions and 13518 deletions

View File

@@ -17,4 +17,4 @@
from .get_image_preprocessor import get_image_preprocessor
from .image_preprocessor_adaptive import AdaptiveImageProcessor
__all__ = ['get_image_preprocessor', 'AdaptiveImageProcessor']
__all__ = ["get_image_preprocessor", "AdaptiveImageProcessor"]

View File

@@ -16,9 +16,10 @@
"""get image preprocessor"""
from .image_preprocessor_adaptive import AdaptiveImageProcessor
from fastdeploy.utils import data_processor_logger
from .image_preprocessor_adaptive import AdaptiveImageProcessor
def get_image_preprocessor(args):
"""

View File

@@ -42,9 +42,7 @@ from paddleformers.transformers.image_utils import (
to_numpy_array,
valid_images,
)
from paddleformers.transformers.tokenizer_utils_base import (
TensorType,
)
from paddleformers.transformers.tokenizer_utils_base import TensorType
from PIL import Image
from fastdeploy.utils import data_processor_logger
@@ -161,7 +159,12 @@ class AdaptiveImageProcessor(BaseImageProcessor):
The merge size of the vision encoder to llm encoder.
"""
model_input_names = ["pixel_values", "image_grid_thw", "pixel_values_videos", "video_grid_thw"]
model_input_names = [
"pixel_values",
"image_grid_thw",
"pixel_values_videos",
"video_grid_thw",
]
def __init__(
self,
@@ -221,7 +224,10 @@ class AdaptiveImageProcessor(BaseImageProcessor):
min_pixels=actual_min_pixels,
max_pixels=actual_max_pixels,
)
return (resized_height, resized_width), (resized_height // self.patch_size, resized_width // self.patch_size)
return (resized_height, resized_width), (
resized_height // self.patch_size,
resized_width // self.patch_size,
)
def _preprocess(
self,
@@ -330,7 +336,12 @@ class AdaptiveImageProcessor(BaseImageProcessor):
image = rescale(image, scale=rescale_factor, data_format=input_data_format)
if do_normalize:
image = normalize(image=image, mean=image_mean, std=image_std, data_format=input_data_format)
image = normalize(
image=image,
mean=image_mean,
std=image_std,
data_format=input_data_format,
)
image = to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format) # [C, H, W]
@@ -341,7 +352,10 @@ class AdaptiveImageProcessor(BaseImageProcessor):
channel = patches.shape[1] # [time, C, H, W]
grid_t = patches.shape[0]
grid_h, grid_w = resized_height // self.patch_size, resized_width // self.patch_size
grid_h, grid_w = (
resized_height // self.patch_size,
resized_width // self.patch_size,
)
patches = patches.reshape(
[
grid_t,
@@ -358,7 +372,10 @@ class AdaptiveImageProcessor(BaseImageProcessor):
patches = patches.transpose([0, 2, 5, 3, 6, 1, 4, 7])
flatten_patches = patches.reshape(
[grid_t * grid_h * grid_w, channel * self.patch_size * self.patch_size]
[
grid_t * grid_h * grid_w,
channel * self.patch_size * self.patch_size,
]
) # [grid_t * grid_h * grid_w, C * psz * psz]
return flatten_patches, (grid_t, grid_h, grid_w)
@@ -471,7 +488,10 @@ class AdaptiveImageProcessor(BaseImageProcessor):
vision_grid_thws.append(image_grid_thw)
pixel_values = np.array(pixel_values)
vision_grid_thws = np.array(vision_grid_thws)
data = {"pixel_values": pixel_values, "image_grid_thw": vision_grid_thws}
data = {
"pixel_values": pixel_values,
"image_grid_thw": vision_grid_thws,
}
if videos is not None:
pixel_values, vision_grid_thws = [], []
@@ -495,7 +515,10 @@ class AdaptiveImageProcessor(BaseImageProcessor):
pixel_values = np.array(pixel_values)
vision_grid_thws = np.array(vision_grid_thws)
data = {"pixel_values_videos": pixel_values, "video_grid_thw": vision_grid_thws}
data = {
"pixel_values_videos": pixel_values,
"video_grid_thw": vision_grid_thws,
}
return BatchFeature(data=data, tensor_type=return_tensors)
@@ -516,7 +539,11 @@ def floor_by_factor(number: int, factor: int) -> int:
def smart_resize(
height: int, width: int, factor: int = IMAGE_FACTOR, min_pixels: int = MIN_PIXELS, max_pixels: int = MAX_PIXELS
height: int,
width: int,
factor: int = IMAGE_FACTOR,
min_pixels: int = MIN_PIXELS,
max_pixels: int = MAX_PIXELS,
):
"""
Rescales the image so that the following conditions are met: