# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import cv2 import os import numpy as np import random from PIL import Image, ImageEnhance import paddle """ Preprocess for Yolov5/v6/v7 Series """ def generate_scale(im, target_shape): origin_shape = im.shape[:2] im_size_min = np.min(origin_shape) im_size_max = np.max(origin_shape) target_size_min = np.min(target_shape) target_size_max = np.max(target_shape) im_scale = float(target_size_min) / float(im_size_min) if np.round(im_scale * im_size_max) > target_size_max: im_scale = float(target_size_max) / float(im_size_max) im_scale_x = im_scale im_scale_y = im_scale return im_scale_y, im_scale_x def yolo_image_preprocess(img, target_shape=[640, 640]): # Resize image im_scale_y, im_scale_x = generate_scale(img, target_shape) img = cv2.resize( img, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=cv2.INTER_LINEAR) # Pad im_h, im_w = img.shape[:2] h, w = target_shape[:] if h != im_h or w != im_w: canvas = np.ones((h, w, 3), dtype=np.float32) canvas *= np.array([114.0, 114.0, 114.0], dtype=np.float32) canvas[0:im_h, 0:im_w, :] = img.astype(np.float32) img = canvas img = np.transpose(img / 255, [2, 0, 1]) return img.astype(np.float32) """ Preprocess for PaddleClas model """ def cls_resize_short(img, target_size): img_h, img_w = img.shape[:2] percent = float(target_size) / min(img_w, img_h) w = int(round(img_w * percent)) h = int(round(img_h * percent)) return cv2.resize(img, (w, h), interpolation=cv2.INTER_LINEAR) def crop_image(img, target_size, center): height, width = img.shape[:2] size = target_size if center == True: w_start = (width - size) // 2 h_start = (height - size) // 2 else: w_start = np.random.randint(0, width - size + 1) h_start = np.random.randint(0, height - size + 1) w_end = w_start + size h_end = h_start + size return img[h_start:h_end, w_start:w_end, :] def cls_image_preprocess(img): # resize img = cls_resize_short(img, target_size=256) # crop img = crop_image(img, target_size=224, center=True) #ToCHWImage & Normalize img = np.transpose(img / 255, [2, 0, 1]) img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) img -= img_mean img /= img_std return img.astype(np.float32) """ Preprocess for PPYOLOE """ def ppdet_resize_no_keepratio(img, target_shape=[640, 640]): im_shape = img.shape resize_h, resize_w = target_shape im_scale_y = resize_h / im_shape[0] im_scale_x = resize_w / im_shape[1] scale_factor = np.asarray([im_scale_y, im_scale_x], dtype=np.float32) return cv2.resize( img, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=2), scale_factor def ppyoloe_withNMS_image_preprocess(img): img, scale_factor = ppdet_resize_no_keepratio(img, target_shape=[640, 640]) img = np.transpose(img / 255, [2, 0, 1]) img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) img -= img_mean img /= img_std return img.astype(np.float32), scale_factor def ppyoloe_plus_withNMS_image_preprocess(img): img, scale_factor = ppdet_resize_no_keepratio(img, target_shape=[640, 640]) img = np.transpose(img / 255, [2, 0, 1]) return img.astype(np.float32), scale_factor """ Preprocess for PP_LiteSeg """ def ppseg_cityscapes_ptq_preprocess(img): #ToCHWImage & Normalize img = np.transpose(img / 255.0, [2, 0, 1]) img_mean = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) img_std = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) img -= img_mean img /= img_std return img.astype(np.float32) def ResizeStepScaling(img, min_scale_factor=0.75, max_scale_factor=1.25, scale_step_size=0.25): # refer form ppseg if min_scale_factor == max_scale_factor: scale_factor = min_scale_factor elif scale_step_size == 0: scale_factor = np.random.uniform(min_scale_factor, max_scale_factor) else: num_steps = int((max_scale_factor - min_scale_factor) / scale_step_size + 1) scale_factors = np.linspace(min_scale_factor, max_scale_factor, num_steps).tolist() np.random.shuffle(scale_factors) scale_factor = scale_factors[0] w = int(round(scale_factor * img.shape[1])) h = int(round(scale_factor * img.shape[0])) img = cv2.resize(img, (w, h), interpolation=cv2.INTER_LINEAR) return img def RandomPaddingCrop(img, crop_size=(512, 512), im_padding_value=(127.5, 127.5, 127.5), label_padding_value=255): if isinstance(crop_size, list) or isinstance(crop_size, tuple): if len(crop_size) != 2: raise ValueError( 'Type of `crop_size` is list or tuple. It should include 2 elements, but it is {}' .format(crop_size)) else: raise TypeError( "The type of `crop_size` is invalid. It should be list or tuple, but it is {}" .format(type(crop_size))) if isinstance(crop_size, int): crop_width = crop_size crop_height = crop_size else: crop_width = crop_size[0] crop_height = crop_size[1] img_height = img.shape[0] img_width = img.shape[1] if img_height == crop_height and img_width == crop_width: return img else: pad_height = max(crop_height - img_height, 0) pad_width = max(crop_width - img_width, 0) if (pad_height > 0 or pad_width > 0): img = cv2.copyMakeBorder( img, 0, pad_height, 0, pad_width, cv2.BORDER_CONSTANT, value=im_padding_value) img_height = img.shape[0] img_width = img.shape[1] if crop_height > 0 and crop_width > 0: h_off = np.random.randint(img_height - crop_height + 1) w_off = np.random.randint(img_width - crop_width + 1) img = img[h_off:(crop_height + h_off), w_off:(w_off + crop_width ), :] return img def RandomHorizontalFlip(img, prob=0.5): if random.random() < prob: if len(img.shape) == 3: img = img[:, ::-1, :] elif len(img.shape) == 2: img = img[:, ::-1] return img else: return img def brightness(im, brightness_lower, brightness_upper): brightness_delta = np.random.uniform(brightness_lower, brightness_upper) im = ImageEnhance.Brightness(im).enhance(brightness_delta) return im def contrast(im, contrast_lower, contrast_upper): contrast_delta = np.random.uniform(contrast_lower, contrast_upper) im = ImageEnhance.Contrast(im).enhance(contrast_delta) return im def saturation(im, saturation_lower, saturation_upper): saturation_delta = np.random.uniform(saturation_lower, saturation_upper) im = ImageEnhance.Color(im).enhance(saturation_delta) return im def hue(im, hue_lower, hue_upper): hue_delta = np.random.uniform(hue_lower, hue_upper) im = np.array(im.convert('HSV')) im[:, :, 0] = im[:, :, 0] + hue_delta im = Image.fromarray(im, mode='HSV').convert('RGB') return im def sharpness(im, sharpness_lower, sharpness_upper): sharpness_delta = np.random.uniform(sharpness_lower, sharpness_upper) im = ImageEnhance.Sharpness(im).enhance(sharpness_delta) return im def RandomDistort(img, brightness_range=0.5, brightness_prob=0.5, contrast_range=0.5, contrast_prob=0.5, saturation_range=0.5, saturation_prob=0.5, hue_range=18, hue_prob=0.5, sharpness_range=0.5, sharpness_prob=0): brightness_lower = 1 - brightness_range brightness_upper = 1 + brightness_range contrast_lower = 1 - contrast_range contrast_upper = 1 + contrast_range saturation_lower = 1 - saturation_range saturation_upper = 1 + saturation_range hue_lower = -hue_range hue_upper = hue_range sharpness_lower = 1 - sharpness_range sharpness_upper = 1 + sharpness_range ops = [brightness, contrast, saturation, hue, sharpness] random.shuffle(ops) params_dict = { 'brightness': { 'brightness_lower': brightness_lower, 'brightness_upper': brightness_upper }, 'contrast': { 'contrast_lower': contrast_lower, 'contrast_upper': contrast_upper }, 'saturation': { 'saturation_lower': saturation_lower, 'saturation_upper': saturation_upper }, 'hue': { 'hue_lower': hue_lower, 'hue_upper': hue_upper }, 'sharpness': { 'sharpness_lower': sharpness_lower, 'sharpness_upper': sharpness_upper, } } prob_dict = { 'brightness': brightness_prob, 'contrast': contrast_prob, 'saturation': saturation_prob, 'hue': hue_prob, 'sharpness': sharpness_prob } img = img.astype('uint8') img = Image.fromarray(img) for id in range(len(ops)): params = params_dict[ops[id].__name__] prob = prob_dict[ops[id].__name__] params['im'] = img if np.random.uniform(0, 1) < prob: img = ops[id](**params) img = np.asarray(img).astype('float32') return img def ppseg_cityscapes_qat_preprocess(img): min_scale_factor = 0.5 max_scale_factor = 2.0 scale_step_size = 0.25 crop_size = (1024, 512) brightness_range = 0.5 contrast_range = 0.5 saturation_range = 0.5 img = ResizeStepScaling( img, min_scale_factor=0.5, max_scale_factor=2.0, scale_step_size=0.25) img = RandomPaddingCrop(img, crop_size=(1024, 512)) img = RandomHorizontalFlip(img) img = RandomDistort( img, brightness_range=0.5, contrast_range=0.5, saturation_range=0.5) img = np.transpose(img / 255.0, [2, 0, 1]) img_mean = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) img_std = np.array([0.5, 0.5, 0.5]).reshape((3, 1, 1)) img -= img_mean img /= img_std return img.astype(np.float32)