From fc226af88e0240e7b0c6bc197702d686688c3ca4 Mon Sep 17 00:00:00 2001 From: Amy Roberts <22614925+amyeroberts@users.noreply.github.com> Date: Thu, 25 Apr 2024 15:28:37 +0000 Subject: [PATCH] Move out input validation into base image processor --- src/transformers/image_processing_utils.py | 89 ++++++++++++++++++- src/transformers/image_utils.py | 49 ---------- .../models/beit/image_processing_beit.py | 35 -------- .../models/bit/image_processing_bit.py | 31 ------- .../models/blip/image_processing_blip.py | 28 ------ .../image_processing_bridgetower.py | 65 +++++++------- .../image_processing_chinese_clip.py | 29 ------ .../models/clip/image_processing_clip.py | 30 ------- .../image_processing_conditional_detr.py | 47 ++++------ .../convnext/image_processing_convnext.py | 29 ------ .../image_processing_deformable_detr.py | 46 ++++------ .../models/deit/image_processing_deit.py | 29 ------ .../models/deta/image_processing_deta.py | 38 ++++---- .../models/detr/image_processing_detr.py | 46 ++++------ .../models/donut/image_processing_donut.py | 49 ++++------ .../models/dpt/image_processing_dpt.py | 29 ------ .../image_processing_efficientformer.py | 29 ------ .../image_processing_efficientnet.py | 29 ------ .../models/flava/image_processing_flava.py | 32 ------- .../models/fuyu/image_processing_fuyu.py | 57 +++++++----- .../models/glpn/image_processing_glpn.py | 37 +++----- .../image_processing_grounding_dino.py | 46 ++++------ .../idefics2/image_processing_idefics2.py | 37 +++----- .../imagegpt/image_processing_imagegpt.py | 33 ++----- .../layoutlmv2/image_processing_layoutlmv2.py | 16 ---- .../layoutlmv3/image_processing_layoutlmv3.py | 28 ------ .../models/levit/image_processing_levit.py | 29 ------ .../llava_next/image_processing_llava_next.py | 28 ------ .../image_processing_mask2former.py | 34 ------- .../maskformer/image_processing_maskformer.py | 33 ------- .../image_processing_mobilenet_v1.py | 30 ------- .../image_processing_mobilenet_v2.py | 29 ------ .../mobilevit/image_processing_mobilevit.py | 33 ------- .../models/nougat/image_processing_nougat.py | 59 ++++++------ .../oneformer/image_processing_oneformer.py | 34 ------- .../models/owlv2/image_processing_owlv2.py | 46 ++++------ .../models/owlvit/image_processing_owlvit.py | 30 ------- .../perceiver/image_processing_perceiver.py | 30 ------- .../poolformer/image_processing_poolformer.py | 30 ------- .../models/pvt/image_processing_pvt.py | 28 ------ .../models/sam/image_processing_sam.py | 65 ++++++-------- .../segformer/image_processing_segformer.py | 27 ------ .../models/seggpt/image_processing_seggpt.py | 23 ----- .../models/siglip/image_processing_siglip.py | 27 ------ .../superpoint/image_processing_superpoint.py | 7 -- .../swin2sr/image_processing_swin2sr.py | 34 ++----- .../models/tvlt/image_processing_tvlt.py | 32 ------- .../models/tvp/image_processing_tvp.py | 71 +++++++++------ .../videomae/image_processing_videomae.py | 17 ---- .../models/vilt/image_processing_vilt.py | 49 ++++------ .../models/vit/image_processing_vit.py | 28 ------ .../vit_hybrid/image_processing_vit_hybrid.py | 30 ------- .../vitmatte/image_processing_vitmatte.py | 26 ------ .../models/vivit/image_processing_vivit.py | 73 ++++++++------- .../models/yolos/image_processing_yolos.py | 45 ++++------ 55 files changed, 481 insertions(+), 1559 deletions(-) diff --git a/src/transformers/image_processing_utils.py b/src/transformers/image_processing_utils.py index 70f1a339de706a..287f10a630a77c 100644 --- a/src/transformers/image_processing_utils.py +++ b/src/transformers/image_processing_utils.py @@ -26,7 +26,7 @@ from .dynamic_module_utils import custom_object_save from .feature_extraction_utils import BatchFeature as BaseBatchFeature from .image_transforms import center_crop, normalize, rescale -from .image_utils import ChannelDimension +from .image_utils import ChannelDimension, PILImageResampling, is_scaled_image, to_numpy_array, valid_images from .utils import ( IMAGE_PROCESSOR_NAME, PushToHubMixin, @@ -47,6 +47,55 @@ logger = logging.get_logger(__name__) +def validate_kwargs(valid_processor_keys: List[str], captured_kwargs: List[str]): + unused_keys = set(captured_kwargs).difference(set(valid_processor_keys)) + if unused_keys: + unused_key_str = ", ".join(unused_keys) + # TODO raise a warning here instead of simply logging? + logger.warning(f"Unused or unrecognized kwargs: {unused_key_str}.") + + +def validate_preprocess_arguments( + do_rescale: Optional[bool] = None, + rescale_factor: Optional[float] = None, + do_normalize: Optional[bool] = None, + image_mean: Optional[Union[float, List[float]]] = None, + image_std: Optional[Union[float, List[float]]] = None, + do_pad: Optional[bool] = None, + size_divisibility: Optional[int] = None, + do_center_crop: Optional[bool] = None, + crop_size: Optional[Dict[str, int]] = None, + do_resize: Optional[bool] = None, + size: Optional[Dict[str, int]] = None, + resample: Optional["PILImageResampling"] = None, +): + """ + Checks validity of typically used arguments in an `ImageProcessor` `preprocess` method. + Raises `ValueError` if arguments incompatibility is caught. + Many incompatibilities are model-specific. `do_pad` sometimes needs `size_divisor`, + sometimes `size_divisibility`, and sometimes `size`. New models and processors added should follow + existing arguments when possible. + + """ + if do_rescale and rescale_factor is None: + raise ValueError("rescale_factor must be specified if do_rescale is True.") + + if do_pad and size_divisibility is None: + # Here, size_divisor might be passed as the value of size + raise ValueError( + "Depending on moel, size_divisibility, size_divisor, pad_size or size must be specified if do_pad is True." + ) + + if do_normalize and (image_mean is None or image_std is None): + raise ValueError("image_mean and image_std must both be specified if do_normalize is True.") + + if do_center_crop and crop_size is None: + raise ValueError("crop_size must be specified if do_center_crop is True.") + + if do_resize and (size is None or resample is None): + raise ValueError("size and resample must be specified if do_resize is True.") + + # TODO: Move BatchFeature to be imported by both image_processing_utils and image_processing_utils # We override the class string here, but logic is the same. class BatchFeature(BaseBatchFeature): @@ -543,13 +592,51 @@ def fetch_images(self, image_url_or_urls: Union[str, List[str]]): class BaseImageProcessor(ImageProcessingMixin): + _valid_processor_keys = None + def __init__(self, **kwargs): super().__init__(**kwargs) def __call__(self, images, **kwargs) -> BatchFeature: """Preprocess an image or a batch of images.""" + self._validate_inputs(**kwargs) return self.preprocess(images, **kwargs) + def _validate_preprocess_arguments(self, **kwargs): + """Check if the arguments passed to the preprocess method have compatible settings e.g. if `size` is defined when `do_resize` is set to True.""" + validate_preprocess_arguments(**kwargs) + + def _validate_image_inputs(self, images, segmentation_maps=None, do_rescale=False): + """Check if the images and segmentation maps are valid.""" + if not valid_images(images): + raise ValueError( + "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray." + ) + + if segmentation_maps is not None and not valid_images(segmentation_maps): + raise ValueError( + "Invalid segmentation map type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray." + ) + + if do_rescale and is_scaled_image(to_numpy_array(images[0])): + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + + def _validate_inputs(self, images, segmentation_maps=None, **kwargs): + """Check if the arguments passed to the preprocess method are valid.""" + if self._valid_processor_keys is None: + raise ValueError("Each image processor must define self._valid_processor_keys") + + for key in kwargs: + if key not in self._valid_processor_keys: + raise ValueError(f"Invalid argument {key} passed to preprocess method.") + + validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) + self._validate_preprocess_arguments(**kwargs) + self._validate_image_inputs(images, segmentation_maps, do_rescale=kwargs.get("do_rescale", False)) + def preprocess(self, images, **kwargs) -> BatchFeature: raise NotImplementedError("Each image processor must implement its own preprocess method") diff --git a/src/transformers/image_utils.py b/src/transformers/image_utils.py index e4a55b3455a344..a662307cdf9eef 100644 --- a/src/transformers/image_utils.py +++ b/src/transformers/image_utils.py @@ -337,47 +337,6 @@ def load_image(image: Union[str, "PIL.Image.Image"], timeout: Optional[float] = return image -def validate_preprocess_arguments( - do_rescale: Optional[bool] = None, - rescale_factor: Optional[float] = None, - do_normalize: Optional[bool] = None, - image_mean: Optional[Union[float, List[float]]] = None, - image_std: Optional[Union[float, List[float]]] = None, - do_pad: Optional[bool] = None, - size_divisibility: Optional[int] = None, - do_center_crop: Optional[bool] = None, - crop_size: Optional[Dict[str, int]] = None, - do_resize: Optional[bool] = None, - size: Optional[Dict[str, int]] = None, - resample: Optional["PILImageResampling"] = None, -): - """ - Checks validity of typically used arguments in an `ImageProcessor` `preprocess` method. - Raises `ValueError` if arguments incompatibility is caught. - Many incompatibilities are model-specific. `do_pad` sometimes needs `size_divisor`, - sometimes `size_divisibility`, and sometimes `size`. New models and processors added should follow - existing arguments when possible. - - """ - if do_rescale and rescale_factor is None: - raise ValueError("rescale_factor must be specified if do_rescale is True.") - - if do_pad and size_divisibility is None: - # Here, size_divisor might be passed as the value of size - raise ValueError( - "Depending on moel, size_divisibility, size_divisor, pad_size or size must be specified if do_pad is True." - ) - - if do_normalize and (image_mean is None or image_std is None): - raise ValueError("image_mean and image_std must both be specified if do_normalize is True.") - - if do_center_crop and crop_size is None: - raise ValueError("crop_size must be specified if do_center_crop is True.") - - if do_resize and (size is None or resample is None): - raise ValueError("size and resample must be specified if do_resize is True.") - - # In the future we can add a TF implementation here when we have TF models. class ImageFeatureExtractionMixin: """ @@ -759,11 +718,3 @@ def validate_annotations( "(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with " "the latter being a list of annotations in the COCO format." ) - - -def validate_kwargs(valid_processor_keys: List[str], captured_kwargs: List[str]): - unused_keys = set(captured_kwargs).difference(set(valid_processor_keys)) - if unused_keys: - unused_key_str = ", ".join(unused_keys) - # TODO raise a warning here instead of simply logging? - logger.warning(f"Unused or unrecognized kwargs: {unused_key_str}.") diff --git a/src/transformers/models/beit/image_processing_beit.py b/src/transformers/models/beit/image_processing_beit.py index 5e15fe645cf9d9..5e37a67feb3faf 100644 --- a/src/transformers/models/beit/image_processing_beit.py +++ b/src/transformers/models/beit/image_processing_beit.py @@ -28,12 +28,8 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, is_torch_available, is_torch_tensor, is_vision_available, logging @@ -257,11 +253,6 @@ def _preprocess_image( """Preprocesses a single image.""" # All transformations expect numpy arrays. image = to_numpy_array(image) - if is_scaled_image(image) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) if input_data_format is None: input_data_format = infer_channel_dimension_format(image) image = self._preprocess( @@ -418,37 +409,11 @@ def preprocess( image_std = image_std if image_std is not None else self.image_std do_reduce_labels = do_reduce_labels if do_reduce_labels is not None else self.do_reduce_labels - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - images = make_list_of_images(images) if segmentation_maps is not None: segmentation_maps = make_list_of_images(segmentation_maps, expected_ndims=2) - if segmentation_maps is not None and not valid_images(segmentation_maps): - raise ValueError( - "Invalid segmentation_maps type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_center_crop=do_center_crop, - crop_size=crop_size, - do_resize=do_resize, - size=size, - resample=resample, - ) - images = [ self._preprocess_image( image=img, diff --git a/src/transformers/models/bit/image_processing_bit.py b/src/transformers/models/bit/image_processing_bit.py index c9d5c7a7594a49..e6b8c001347442 100644 --- a/src/transformers/models/bit/image_processing_bit.py +++ b/src/transformers/models/bit/image_processing_bit.py @@ -32,12 +32,8 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -274,29 +270,8 @@ def preprocess( image_std = image_std if image_std is not None else self.image_std do_convert_rgb = do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - images = make_list_of_images(images) - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_center_crop=do_center_crop, - crop_size=crop_size, - do_resize=do_resize, - size=size, - resample=resample, - ) - # PIL RGBA images are converted to RGB if do_convert_rgb: images = [convert_to_rgb(image) for image in images] @@ -304,12 +279,6 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/blip/image_processing_blip.py b/src/transformers/models/blip/image_processing_blip.py index a65ccc2d9839b7..59031ade46fae2 100644 --- a/src/transformers/models/blip/image_processing_blip.py +++ b/src/transformers/models/blip/image_processing_blip.py @@ -27,12 +27,8 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -250,24 +246,6 @@ def preprocess( images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_resize=do_resize, - size=size, - resample=resample, - ) # PIL RGBA images are converted to RGB if do_convert_rgb: images = [convert_to_rgb(image) for image in images] @@ -275,12 +253,6 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/bridgetower/image_processing_bridgetower.py b/src/transformers/models/bridgetower/image_processing_bridgetower.py index 8fc62ad3970fa0..05b39e3ab068fd 100644 --- a/src/transformers/models/bridgetower/image_processing_bridgetower.py +++ b/src/transformers/models/bridgetower/image_processing_bridgetower.py @@ -18,7 +18,7 @@ import numpy as np -from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict +from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict, validate_preprocess_arguments from ...image_transforms import PaddingMode, center_crop, pad, resize, to_channel_dimension_format from ...image_utils import ( OPENAI_CLIP_MEAN, @@ -29,11 +29,7 @@ get_image_size, infer_channel_dimension_format, is_batched, - is_scaled_image, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -224,6 +220,37 @@ def __init__( "input_data_format", ] + def _validate_preprocess_arguments( + self, + do_rescale, + rescale_factor, + do_normalize, + image_mean, + image_std, + do_pad, + size_divisor, + do_center_crop, + crop_size, + do_resize, + size, + resample, + ): + # crop_size is used only if it is set, else size will be used. + validate_preprocess_arguments( + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + do_pad=do_pad, + size_divisibility=size_divisor, + do_center_crop=do_center_crop, + crop_size=crop_size, + do_resize=do_resize, + size=size, + resample=resample, + ) + # Copied from transformers.models.vilt.image_processing_vilt.ViltImageProcessor.resize def resize( self, @@ -484,40 +511,12 @@ def preprocess( size = size if size is not None else self.size size = get_size_dict(size, default_to_square=False) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - if not is_batched(images): images = [images] - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - # Here, crop_size is used only if it is set, else size will be used. - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_pad=do_pad, - size_divisibility=size_divisor, - do_center_crop=do_center_crop, - crop_size=crop_size, - do_resize=do_resize, - size=size, - resample=resample, - ) # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if do_resize: images = [ self.resize( diff --git a/src/transformers/models/chinese_clip/image_processing_chinese_clip.py b/src/transformers/models/chinese_clip/image_processing_chinese_clip.py index 60f40272bf9271..247dfe8af77017 100644 --- a/src/transformers/models/chinese_clip/image_processing_chinese_clip.py +++ b/src/transformers/models/chinese_clip/image_processing_chinese_clip.py @@ -32,12 +32,8 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -265,37 +261,12 @@ def preprocess( images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_center_crop=do_center_crop, - crop_size=crop_size, - do_resize=do_resize, - size=size, - resample=resample, - ) if do_convert_rgb: images = [convert_to_rgb(image) for image in images] # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/clip/image_processing_clip.py b/src/transformers/models/clip/image_processing_clip.py index fd2f8b3d532bbb..0db9d69a8076b8 100644 --- a/src/transformers/models/clip/image_processing_clip.py +++ b/src/transformers/models/clip/image_processing_clip.py @@ -32,12 +32,8 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -277,40 +273,14 @@ def preprocess( image_std = image_std if image_std is not None else self.image_std do_convert_rgb = do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - images = make_list_of_images(images) - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_center_crop=do_center_crop, - crop_size=crop_size, - do_resize=do_resize, - size=size, - resample=resample, - ) - if do_convert_rgb: images = [convert_to_rgb(image) for image in images] # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py index e88bfc8fe230df..83343093690223 100644 --- a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py +++ b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py @@ -22,7 +22,7 @@ import numpy as np from ...feature_extraction_utils import BatchFeature -from ...image_processing_utils import BaseImageProcessor, get_size_dict +from ...image_processing_utils import BaseImageProcessor, get_size_dict, validate_preprocess_arguments from ...image_transforms import ( PaddingMode, center_to_corners_format, @@ -44,13 +44,9 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, validate_annotations, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import ( TensorType, @@ -867,6 +863,22 @@ def __init__( "input_data_format", ] + # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor._validate_preprocess_arguments with Detr->ConditionalDetr + def _validate_preprocess_arguments( + self, do_rescale, rescale_factor, do_normalize, image_mean, image_std, do_resize, size, resample + ): + # Here, the pad() method pads to the maximum of (width, height). It does not need to be validated. + validate_preprocess_arguments( + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + do_resize=do_resize, + size=size, + resample=resample, + ) + @classmethod # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.from_dict with Detr->ConditionalDetr def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs): @@ -1315,25 +1327,6 @@ def preprocess( images = make_list_of_images(images) - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - # Here, the pad() method pads to the maximum of (width, height). It does not need to be validated. - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_resize=do_resize, - size=size, - resample=resample, - ) - if annotations is not None and isinstance(annotations, dict): annotations = [annotations] @@ -1359,12 +1352,6 @@ def preprocess( # All transformations expect numpy arrays images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/convnext/image_processing_convnext.py b/src/transformers/models/convnext/image_processing_convnext.py index 54060105f59eb2..3950f4ab2270db 100644 --- a/src/transformers/models/convnext/image_processing_convnext.py +++ b/src/transformers/models/convnext/image_processing_convnext.py @@ -32,12 +32,8 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -276,36 +272,11 @@ def preprocess( size = size if size is not None else self.size size = get_size_dict(size, default_to_square=False) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - images = make_list_of_images(images) - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_resize=do_resize, - size=size, - resample=resample, - ) - # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py index 5525eeeb8c58d5..07d6148b8906c6 100644 --- a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py +++ b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py @@ -22,7 +22,7 @@ import numpy as np from ...feature_extraction_utils import BatchFeature -from ...image_processing_utils import BaseImageProcessor, get_size_dict +from ...image_processing_utils import BaseImageProcessor, get_size_dict, validate_preprocess_arguments from ...image_transforms import ( PaddingMode, center_to_corners_format, @@ -44,13 +44,9 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, validate_annotations, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import ( TensorType, @@ -865,6 +861,21 @@ def __init__( "input_data_format", ] + def _validate_preprocess_arguments( + self, do_rescale, rescale_factor, do_normalize, image_mean, image_std, do_resize, size, resample + ): + # pad() method pads to the maximum of (width, height). It does not need to be validated. + validate_preprocess_arguments( + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + do_resize=do_resize, + size=size, + resample=resample, + ) + @classmethod # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.from_dict with Detr->DeformableDetr def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs): @@ -1313,25 +1324,6 @@ def preprocess( images = make_list_of_images(images) - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - # Here, the pad() method pads to the maximum of (width, height). It does not need to be validated. - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_resize=do_resize, - size=size, - resample=resample, - ) - if annotations is not None and isinstance(annotations, dict): annotations = [annotations] @@ -1357,12 +1349,6 @@ def preprocess( # All transformations expect numpy arrays images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/deit/image_processing_deit.py b/src/transformers/models/deit/image_processing_deit.py index 2a8ebb36377854..0b942a88ca23b7 100644 --- a/src/transformers/models/deit/image_processing_deit.py +++ b/src/transformers/models/deit/image_processing_deit.py @@ -27,12 +27,8 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -257,34 +253,9 @@ def preprocess( images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_center_crop=do_center_crop, - crop_size=crop_size, - do_resize=do_resize, - size=size, - resample=resample, - ) # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/deta/image_processing_deta.py b/src/transformers/models/deta/image_processing_deta.py index 45c5c6cb285a8f..0f13bc953c4204 100644 --- a/src/transformers/models/deta/image_processing_deta.py +++ b/src/transformers/models/deta/image_processing_deta.py @@ -20,7 +20,7 @@ import numpy as np from ...feature_extraction_utils import BatchFeature -from ...image_processing_utils import BaseImageProcessor, get_size_dict +from ...image_processing_utils import BaseImageProcessor, get_size_dict, validate_preprocess_arguments from ...image_transforms import ( PaddingMode, center_to_corners_format, @@ -42,11 +42,9 @@ get_image_size, infer_channel_dimension_format, is_batched, - is_scaled_image, to_numpy_array, valid_images, validate_annotations, - validate_preprocess_arguments, ) from ...utils import ( is_flax_available, @@ -543,6 +541,21 @@ def __init__( self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD self.do_pad = do_pad + def _validate_preprocess_arguments( + self, do_rescale, rescale_factor, do_normalize, image_mean, image_std, do_resize, size, resample + ): + # pad() method pads to the maximum of (width, height). It does not need to be validated. + validate_preprocess_arguments( + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + do_resize=do_resize, + size=size, + resample=resample, + ) + # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare_annotation with DETR->DETA def prepare_annotation( self, @@ -956,19 +969,6 @@ def preprocess( do_pad = self.do_pad if do_pad is None else do_pad format = self.format if format is None else format - # Here, the pad() method pads to the maximum of (width, height). It does not need to be validated. - - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_resize=do_resize, - size=size, - resample=resample, - ) - if not is_batched(images): images = [images] annotations = [annotations] if annotations is not None else None @@ -1000,12 +1000,6 @@ def preprocess( # All transformations expect numpy arrays images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/detr/image_processing_detr.py b/src/transformers/models/detr/image_processing_detr.py index e0e59cbc7c40c6..ba8957f6cecdff 100644 --- a/src/transformers/models/detr/image_processing_detr.py +++ b/src/transformers/models/detr/image_processing_detr.py @@ -21,7 +21,7 @@ import numpy as np -from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict +from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict, validate_preprocess_arguments from ...image_transforms import ( PaddingMode, center_to_corners_format, @@ -43,13 +43,9 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, validate_annotations, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import ( TensorType, @@ -850,6 +846,21 @@ def __init__( "input_data_format", ] + def _validate_preprocess_arguments( + self, do_rescale, rescale_factor, do_normalize, image_mean, image_std, do_resize, size, resample + ): + # Here, the pad() method pads to the maximum of (width, height). It does not need to be validated. + validate_preprocess_arguments( + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + do_resize=do_resize, + size=size, + resample=resample, + ) + @classmethod def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs): """ @@ -1285,25 +1296,6 @@ def preprocess( images = make_list_of_images(images) - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - # Here, the pad() method pads to the maximum of (width, height). It does not need to be validated. - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_resize=do_resize, - size=size, - resample=resample, - ) - if annotations is not None and isinstance(annotations, dict): annotations = [annotations] @@ -1329,12 +1321,6 @@ def preprocess( # All transformations expect numpy arrays images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/donut/image_processing_donut.py b/src/transformers/models/donut/image_processing_donut.py index 1c6e4723139046..f4515001567b96 100644 --- a/src/transformers/models/donut/image_processing_donut.py +++ b/src/transformers/models/donut/image_processing_donut.py @@ -18,7 +18,7 @@ import numpy as np -from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict +from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict, validate_preprocess_arguments from ...image_transforms import ( get_resize_output_image_size, pad, @@ -33,12 +33,8 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, logging from ...utils.import_utils import is_vision_available @@ -143,6 +139,23 @@ def __init__( "input_data_format", ] + def _validate_preprocess_arguments( + do_rescale, rescale_factor, do_normalize, image_mean, image_std, do_pad, size, do_resize, resample + ): + validate_preprocess_arguments( + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + do_pad=do_pad, + # There is no pad divisibility in this processor, but pad requires the size arg. + size_divisibility=size, + do_resize=do_resize, + size=size, + resample=resample, + ) + def align_long_axis( self, image: np.ndarray, @@ -407,35 +420,9 @@ def preprocess( images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_pad=do_pad, - size_divisibility=size, # There is no pad divisibility in this processor, but pad requires the size arg. - do_resize=do_resize, - size=size, - resample=resample, - ) - # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/dpt/image_processing_dpt.py b/src/transformers/models/dpt/image_processing_dpt.py index 96f43a796e3886..79cc174cf19b18 100644 --- a/src/transformers/models/dpt/image_processing_dpt.py +++ b/src/transformers/models/dpt/image_processing_dpt.py @@ -29,14 +29,10 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, - is_scaled_image, is_torch_available, is_torch_tensor, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -369,34 +365,9 @@ def preprocess( images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_pad=do_pad, - size_divisibility=size_divisor, - do_resize=do_resize, - size=size, - resample=resample, - ) # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/efficientformer/image_processing_efficientformer.py b/src/transformers/models/efficientformer/image_processing_efficientformer.py index 38756f7c958f5d..9718acfccae2d6 100644 --- a/src/transformers/models/efficientformer/image_processing_efficientformer.py +++ b/src/transformers/models/efficientformer/image_processing_efficientformer.py @@ -32,11 +32,7 @@ PILImageResampling, infer_channel_dimension_format, is_batched, - is_scaled_image, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, logging @@ -255,37 +251,12 @@ def preprocess( size = size if size is not None else self.size size_dict = get_size_dict(size) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - if not is_batched(images): images = [images] - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_center_crop=do_center_crop, - crop_size=crop_size, - do_resize=do_resize, - size=size, - resample=resample, - ) # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/efficientnet/image_processing_efficientnet.py b/src/transformers/models/efficientnet/image_processing_efficientnet.py index 4fd2364a3020c5..7292a931e8cc46 100644 --- a/src/transformers/models/efficientnet/image_processing_efficientnet.py +++ b/src/transformers/models/efficientnet/image_processing_efficientnet.py @@ -27,12 +27,8 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -316,34 +312,9 @@ def preprocess( images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_center_crop=do_center_crop, - crop_size=crop_size, - do_resize=do_resize, - size=size, - resample=resample, - ) # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/flava/image_processing_flava.py b/src/transformers/models/flava/image_processing_flava.py index d6a7c8080bb6b4..9362d8e5c7f1c6 100644 --- a/src/transformers/models/flava/image_processing_flava.py +++ b/src/transformers/models/flava/image_processing_flava.py @@ -30,12 +30,8 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -440,29 +436,9 @@ def _preprocess_image( input_data_format: Optional[ChannelDimension] = None, ) -> np.ndarray: """Preprocesses a single image.""" - - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_center_crop=do_center_crop, - crop_size=crop_size, - do_resize=do_resize, - size=size, - resample=resample, - ) - # All transformations expect numpy arrays. image = to_numpy_array(image) - if is_scaled_image(image) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(image) @@ -672,14 +648,6 @@ def preprocess( images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - processed_images = [ self._preprocess_image( image=img, diff --git a/src/transformers/models/fuyu/image_processing_fuyu.py b/src/transformers/models/fuyu/image_processing_fuyu.py index ec5e1a36abb75c..c0b673f7460a34 100644 --- a/src/transformers/models/fuyu/image_processing_fuyu.py +++ b/src/transformers/models/fuyu/image_processing_fuyu.py @@ -19,7 +19,7 @@ import numpy as np -from ...image_processing_utils import BaseImageProcessor, BatchFeature +from ...image_processing_utils import BaseImageProcessor, BatchFeature, validate_preprocess_arguments from ...image_transforms import ( pad, resize, @@ -35,7 +35,7 @@ is_valid_image, make_list_of_images, to_numpy_array, - validate_preprocess_arguments, + valid_images, ) from ...utils import ( TensorType, @@ -280,6 +280,38 @@ def __init__( "input_data_format", ] + def _validate_image_inputs(self, images, segmentation_maps=None, do_rescale=False): + if isinstance(images, list) and any(isinstance(elem, list) and len(elem) >= 2 for elem in images): + raise ValueError("Multiple images for a single sample are not yet supported.") + + if not valid_images(images[0]): + raise ValueError( + "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray." + ) + + if is_scaled_image(to_numpy_array(images[0][0])) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + + def _validate_preprocess_arguments( + self, do_rescale, rescale_factor, do_normalize, image_mean, image_std, do_pad, size, do_resize, resample + ): + validate_preprocess_arguments( + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + do_pad=do_pad, + # There is no pad divisibility in this processor, but pad requires the size arg. + size_divisibility=size, + do_resize=do_resize, + size=size, + resample=resample, + ) + def resize( self, image: np.ndarray, @@ -460,32 +492,11 @@ def preprocess( rescale_factor = rescale_factor if rescale_factor is not None else self.rescale_factor patch_size = patch_size if patch_size is not None else self.patch_size - if isinstance(images, list) and any(isinstance(elem, list) and len(elem) >= 2 for elem in images): - raise ValueError("Multiple images for a single sample are not yet supported.") - batch_images = make_list_of_list_of_images(images) - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_pad=do_pad, - size_divisibility=size, # There is no pad divisibility in this processor, but pad requires the size arg. - do_resize=do_resize, - size=size, - resample=resample, - ) # All transformations expect numpy arrays. batch_images = [[to_numpy_array(image) for image in images] for images in batch_images] - if is_scaled_image(batch_images[0][0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(batch_images[0][0]) diff --git a/src/transformers/models/glpn/image_processing_glpn.py b/src/transformers/models/glpn/image_processing_glpn.py index 7577b4eeb3d0c2..26d086f2c6a873 100644 --- a/src/transformers/models/glpn/image_processing_glpn.py +++ b/src/transformers/models/glpn/image_processing_glpn.py @@ -19,19 +19,15 @@ import numpy as np import PIL.Image -from ...image_processing_utils import BaseImageProcessor, BatchFeature +from ...image_processing_utils import BaseImageProcessor, BatchFeature, validate_preprocess_arguments from ...image_transforms import resize, to_channel_dimension_format from ...image_utils import ( ChannelDimension, PILImageResampling, get_image_size, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, logging @@ -83,6 +79,15 @@ def __init__( "input_data_format", ] + def _validate_preprocess_arguments(self, do_resize, size_divisor, resample): + # rescale() uses a constant rescale_factor. It does not need to be validated + # with a rescale_factor. + validate_preprocess_arguments( + do_resize=do_resize, + size=size_divisor, # size_divisor is used as a parameter for optimal resizing instead of size. + resample=resample, + ) + def resize( self, image: np.ndarray, @@ -187,31 +192,9 @@ def preprocess( images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - - # Here, the rescale() method uses a constant rescale_factor. It does not need to be validated - # with a rescale_factor. - validate_preprocess_arguments( - do_resize=do_resize, - size=size_divisor, # Here, size_divisor is used as a parameter for optimal resizing instead of size. - resample=resample, - ) - # All transformations expect numpy arrays. images = [to_numpy_array(img) for img in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/grounding_dino/image_processing_grounding_dino.py b/src/transformers/models/grounding_dino/image_processing_grounding_dino.py index 8b39d6801ca000..908448ab104a00 100644 --- a/src/transformers/models/grounding_dino/image_processing_grounding_dino.py +++ b/src/transformers/models/grounding_dino/image_processing_grounding_dino.py @@ -22,7 +22,7 @@ import numpy as np from ...feature_extraction_utils import BatchFeature -from ...image_processing_utils import BaseImageProcessor, get_size_dict +from ...image_processing_utils import BaseImageProcessor, get_size_dict, validate_preprocess_arguments from ...image_transforms import ( PaddingMode, center_to_corners_format, @@ -42,13 +42,9 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, validate_annotations, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import ( ExplicitEnum, @@ -872,6 +868,21 @@ def __init__( "input_data_format", ] + def _validate_preprocess_arguments( + self, do_rescale, rescale_factor, do_normalize, image_mean, image_std, do_resize, size, resample + ): + # pad() method pads to the maximum of (width, height). It does not need to be validated. + validate_preprocess_arguments( + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + do_resize=do_resize, + size=size, + resample=resample, + ) + @classmethod # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.from_dict with Detr->GroundingDino def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs): @@ -1320,25 +1331,6 @@ def preprocess( images = make_list_of_images(images) - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - # Here, the pad() method pads to the maximum of (width, height). It does not need to be validated. - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_resize=do_resize, - size=size, - resample=resample, - ) - if annotations is not None and isinstance(annotations, dict): annotations = [annotations] @@ -1364,12 +1356,6 @@ def preprocess( # All transformations expect numpy arrays images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/idefics2/image_processing_idefics2.py b/src/transformers/models/idefics2/image_processing_idefics2.py index ac9df68871eee2..3a530af00b4f1c 100644 --- a/src/transformers/models/idefics2/image_processing_idefics2.py +++ b/src/transformers/models/idefics2/image_processing_idefics2.py @@ -32,7 +32,6 @@ is_valid_image, to_numpy_array, valid_images, - validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -246,6 +245,19 @@ def __init__( self.do_pad = do_pad self.do_image_splitting = do_image_splitting + def _validate_image_inputs(self, images, segmentation_maps=None, do_rescale=False): + if not valid_images(images[0]): + raise ValueError( + "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " + "torch.Tensor, tf.Tensor or jax.ndarray." + ) + + if is_scaled_image(to_numpy_array(images[0][0])) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + def resize( self, image: np.ndarray, @@ -505,35 +517,12 @@ def preprocess( images_list = make_list_of_images(images) - if not valid_images(images_list[0]): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_resize=do_resize, - size=size, - resample=resample, - ) - if do_convert_rgb: images_list = [[convert_to_rgb(image) for image in images] for images in images_list] # All transformations expect numpy arrays. images_list = [[to_numpy_array(image) for image in images] for images in images_list] - if is_scaled_image(images_list[0][0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images_list[0][0]) diff --git a/src/transformers/models/imagegpt/image_processing_imagegpt.py b/src/transformers/models/imagegpt/image_processing_imagegpt.py index fecdd061d4e40e..39c828c3549a0b 100644 --- a/src/transformers/models/imagegpt/image_processing_imagegpt.py +++ b/src/transformers/models/imagegpt/image_processing_imagegpt.py @@ -18,19 +18,15 @@ import numpy as np -from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict +from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict, validate_preprocess_arguments from ...image_transforms import rescale, resize, to_channel_dimension_format from ...image_utils import ( ChannelDimension, ImageInput, PILImageResampling, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -116,6 +112,11 @@ def __init__( "input_data_format", ] + def _validate_preprocess_arguments(self, do_resize, size, resample): + # normalize() uses a constant factor to divide pixel values. + # hence, the method does not need iamge_mean and image_std. + validate_preprocess_arguments(do_resize=do_resize, size=size, resample=resample) + # Copied from transformers.models.vit.image_processing_vit.ViTImageProcessor.resize def resize( self, @@ -251,34 +252,12 @@ def preprocess( images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - - # Here, normalize() is using a constant factor to divide pixel values. - # hence, the method does not need iamge_mean and image_std. - validate_preprocess_arguments( - do_resize=do_resize, - size=size, - resample=resample, - ) - if do_color_quantize and clusters is None: raise ValueError("Clusters must be specified if do_color_quantize is True.") # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_normalize: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If you wish to do this, " - "make sure to set `do_normalize` to `False` and that pixel values are between [-1, 1].", - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py b/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py index e2369911941388..141e0050f42271 100644 --- a/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py @@ -27,9 +27,6 @@ infer_channel_dimension_format, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, is_pytesseract_available, is_vision_available, logging, requires_backends @@ -257,19 +254,6 @@ def preprocess( images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_preprocess_arguments( - do_resize=do_resize, - size=size, - resample=resample, - ) - # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] diff --git a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py index 8c5356993f16be..8fb77678a1da4b 100644 --- a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py +++ b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py @@ -27,12 +27,8 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, is_pytesseract_available, is_vision_available, logging, requires_backends @@ -316,33 +312,9 @@ def preprocess( tesseract_config = tesseract_config if tesseract_config is not None else self.tesseract_config images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_resize=do_resize, - size=size, - resample=resample, - ) - # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/levit/image_processing_levit.py b/src/transformers/models/levit/image_processing_levit.py index b861a4ebf8b2dc..dead9ed24d8305 100644 --- a/src/transformers/models/levit/image_processing_levit.py +++ b/src/transformers/models/levit/image_processing_levit.py @@ -31,12 +31,8 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, logging @@ -271,34 +267,9 @@ def preprocess( crop_size = get_size_dict(crop_size, param_name="crop_size") images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_center_crop=do_center_crop, - crop_size=crop_size, - do_resize=do_resize, - size=size, - resample=resample, - ) # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/llava_next/image_processing_llava_next.py b/src/transformers/models/llava_next/image_processing_llava_next.py index 3934927a2e7957..89a840e816e26c 100644 --- a/src/transformers/models/llava_next/image_processing_llava_next.py +++ b/src/transformers/models/llava_next/image_processing_llava_next.py @@ -35,11 +35,8 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -534,37 +531,12 @@ def preprocess( images = make_list_of_images(images) - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_center_crop=do_center_crop, - crop_size=crop_size, - do_resize=do_resize, - size=size, - resample=resample, - ) - if do_convert_rgb: images = [convert_to_rgb(image) for image in images] # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/mask2former/image_processing_mask2former.py b/src/transformers/models/mask2former/image_processing_mask2former.py index 5440584d25f28f..7643e7c3522c1f 100644 --- a/src/transformers/models/mask2former/image_processing_mask2former.py +++ b/src/transformers/models/mask2former/image_processing_mask2former.py @@ -36,11 +36,7 @@ get_image_size, infer_channel_dimension_format, is_batched, - is_scaled_image, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import ( IMAGENET_DEFAULT_MEAN, @@ -627,11 +623,6 @@ def _preprocess_image( """Preprocesses a single image.""" # All transformations expect numpy arrays. image = to_numpy_array(image) - if is_scaled_image(image) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) if input_data_format is None: input_data_format = infer_channel_dimension_format(image) image = self._preprocess( @@ -728,31 +719,6 @@ def preprocess( ignore_index = ignore_index if ignore_index is not None else self.ignore_index reduce_labels = reduce_labels if reduce_labels is not None else self.reduce_labels - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_resize=do_resize, - size=size, - resample=resample, - ) - - if segmentation_maps is not None and not valid_images(segmentation_maps): - raise ValueError( - "Invalid segmentation map type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - if not is_batched(images): images = [images] segmentation_maps = [segmentation_maps] if segmentation_maps is not None else None diff --git a/src/transformers/models/maskformer/image_processing_maskformer.py b/src/transformers/models/maskformer/image_processing_maskformer.py index 3c854b35c76edb..5ae92b36fefd31 100644 --- a/src/transformers/models/maskformer/image_processing_maskformer.py +++ b/src/transformers/models/maskformer/image_processing_maskformer.py @@ -35,12 +35,8 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import ( IMAGENET_DEFAULT_MEAN, @@ -634,11 +630,6 @@ def _preprocess_image( """Preprocesses a single image.""" # All transformations expect numpy arrays. image = to_numpy_array(image) - if is_scaled_image(image) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) if input_data_format is None: input_data_format = infer_channel_dimension_format(image) image = self._preprocess( @@ -745,30 +736,6 @@ def preprocess( ignore_index = ignore_index if ignore_index is not None else self.ignore_index do_reduce_labels = do_reduce_labels if do_reduce_labels is not None else self.do_reduce_labels - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_resize=do_resize, - size=size, - resample=resample, - ) - - if segmentation_maps is not None and not valid_images(segmentation_maps): - raise ValueError( - "Invalid segmentation map type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - images = make_list_of_images(images) if segmentation_maps is not None: segmentation_maps = make_list_of_images(segmentation_maps, expected_ndims=2) diff --git a/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py b/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py index 086ab892492065..bf69c306f91c47 100644 --- a/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py +++ b/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py @@ -31,12 +31,8 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, logging @@ -262,35 +258,9 @@ def preprocess( images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_center_crop=do_center_crop, - crop_size=crop_size, - do_resize=do_resize, - size=size, - resample=resample, - ) - # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py b/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py index 44b784d2a7c3b8..17971fde6ecff2 100644 --- a/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py +++ b/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py @@ -31,12 +31,8 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, is_torch_available, is_torch_tensor, logging @@ -266,34 +262,9 @@ def preprocess( images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_center_crop=do_center_crop, - crop_size=crop_size, - do_resize=do_resize, - size=size, - resample=resample, - ) # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/mobilevit/image_processing_mobilevit.py b/src/transformers/models/mobilevit/image_processing_mobilevit.py index 8cc79a283e05af..13d8297bb3cb8a 100644 --- a/src/transformers/models/mobilevit/image_processing_mobilevit.py +++ b/src/transformers/models/mobilevit/image_processing_mobilevit.py @@ -25,12 +25,8 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, is_torch_available, is_torch_tensor, is_vision_available, logging @@ -243,11 +239,6 @@ def _preprocess_image( """Preprocesses a single image.""" # All transformations expect numpy arrays. image = to_numpy_array(image) - if is_scaled_image(image) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) if input_data_format is None: input_data_format = infer_channel_dimension_format(image) @@ -383,35 +374,11 @@ def preprocess( images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - if segmentation_maps is not None: segmentation_maps = make_list_of_images(segmentation_maps, expected_ndims=2) images = make_list_of_images(images) - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - - if segmentation_maps is not None and not valid_images(segmentation_maps): - raise ValueError( - "Invalid segmentation map type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_center_crop=do_center_crop, - crop_size=crop_size, - do_resize=do_resize, - size=size, - resample=resample, - ) - images = [ self._preprocess_image( image=img, diff --git a/src/transformers/models/nougat/image_processing_nougat.py b/src/transformers/models/nougat/image_processing_nougat.py index 49913d5baa080b..6a456b5f5b942f 100644 --- a/src/transformers/models/nougat/image_processing_nougat.py +++ b/src/transformers/models/nougat/image_processing_nougat.py @@ -18,7 +18,7 @@ import numpy as np -from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict +from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict, validate_preprocess_arguments from ...image_transforms import ( get_resize_output_image_size, pad, @@ -34,12 +34,8 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, logging from ...utils.import_utils import is_cv2_available, is_vision_available @@ -145,6 +141,33 @@ def __init__( "input_data_format", ] + def _validate_preprocess_arguments( + self, + do_rescale, + rescale_factor, + do_normalize, + image_mean, + image_std, + do_pad, + size_divisibility, + do_resize, + size, + resample, + ): + validate_preprocess_arguments( + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + do_pad=do_pad, + # There is no pad divisibility in this processor, but pad requires the size arg. + size_divisibility=size, + do_resize=do_resize, + size=size, + resample=resample, + ) + def python_find_non_zero(self, image: np.array): """This is a reimplementation of a findNonZero function equivalent to cv2.""" non_zero_indices = np.column_stack(np.nonzero(image)) @@ -461,35 +484,9 @@ def preprocess( images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_pad=do_pad, - size_divisibility=size, # There is no pad divisibility in this processor, but pad requires the size arg. - do_resize=do_resize, - size=size, - resample=resample, - ) - # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/oneformer/image_processing_oneformer.py b/src/transformers/models/oneformer/image_processing_oneformer.py index 9f865f8efd9b94..0a285af71822c9 100644 --- a/src/transformers/models/oneformer/image_processing_oneformer.py +++ b/src/transformers/models/oneformer/image_processing_oneformer.py @@ -38,12 +38,8 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import ( IMAGENET_DEFAULT_MEAN, @@ -616,11 +612,6 @@ def _preprocess_image( """Preprocesses a single image.""" # All transformations expect numpy arrays. image = to_numpy_array(image) - if is_scaled_image(image) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) if input_data_format is None: input_data_format = infer_channel_dimension_format(image) image = self._preprocess( @@ -729,31 +720,6 @@ def preprocess( ignore_index = ignore_index if ignore_index is not None else self.ignore_index do_reduce_labels = do_reduce_labels if do_reduce_labels is not None else self.do_reduce_labels - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_resize=do_resize, - size=size, - resample=resample, - ) - - if segmentation_maps is not None and not valid_images(segmentation_maps): - raise ValueError( - "Invalid segmentation map type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - images = make_list_of_images(images) if segmentation_maps is not None: segmentation_maps = make_list_of_images(segmentation_maps, expected_ndims=2) diff --git a/src/transformers/models/owlv2/image_processing_owlv2.py b/src/transformers/models/owlv2/image_processing_owlv2.py index 06ac984c7d866e..e370475d25e93e 100644 --- a/src/transformers/models/owlv2/image_processing_owlv2.py +++ b/src/transformers/models/owlv2/image_processing_owlv2.py @@ -19,7 +19,7 @@ import numpy as np -from ...image_processing_utils import BaseImageProcessor, BatchFeature +from ...image_processing_utils import BaseImageProcessor, BatchFeature, validate_preprocess_arguments from ...image_transforms import ( center_to_corners_format, pad, @@ -33,12 +33,8 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import ( TensorType, @@ -248,6 +244,20 @@ def __init__( "input_data_format", ] + def _validate_preprocess_arguments(self, do_rescale, rescale_factor, do_normalize, image_mean, image_std, size): + # Here, pad and resize methods are different from the rest of image processors + # as they don't have any resampling in resize() + # or pad size in pad() (the maximum of (height, width) is taken instead). + # hence, these arguments don't need to be passed in validate_preprocess_arguments. + validate_preprocess_arguments( + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + size=size, + ) + def pad( self, image: np.array, @@ -416,35 +426,9 @@ def preprocess( images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - # Here, pad and resize methods are different from the rest of image processors - # as they don't have any resampling in resize() - # or pad size in pad() (the maximum of (height, width) is taken instead). - # hence, these arguments don't need to be passed in validate_preprocess_arguments. - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - size=size, - ) - # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/owlvit/image_processing_owlvit.py b/src/transformers/models/owlvit/image_processing_owlvit.py index 25ea5f2720d527..b3a92a098cfd28 100644 --- a/src/transformers/models/owlvit/image_processing_owlvit.py +++ b/src/transformers/models/owlvit/image_processing_owlvit.py @@ -34,12 +34,8 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, is_torch_available, logging @@ -368,35 +364,9 @@ def preprocess( images = make_list_of_images(images) - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_center_crop=do_center_crop, - crop_size=crop_size, - do_resize=do_resize, - size=size, - resample=resample, - ) - # All transformations expect numpy arrays images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/perceiver/image_processing_perceiver.py b/src/transformers/models/perceiver/image_processing_perceiver.py index 02dd527e437be7..62aa61442f888b 100644 --- a/src/transformers/models/perceiver/image_processing_perceiver.py +++ b/src/transformers/models/perceiver/image_processing_perceiver.py @@ -28,12 +28,8 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -303,35 +299,9 @@ def preprocess( images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_center_crop=do_center_crop, - crop_size=crop_size, - do_resize=do_resize, - size=size, - resample=resample, - ) - # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/poolformer/image_processing_poolformer.py b/src/transformers/models/poolformer/image_processing_poolformer.py index dcdb1591b1c31b..daa23793005ef2 100644 --- a/src/transformers/models/poolformer/image_processing_poolformer.py +++ b/src/transformers/models/poolformer/image_processing_poolformer.py @@ -31,12 +31,8 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -311,35 +307,9 @@ def preprocess( images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_center_crop=do_center_crop, - crop_size=crop_size, - do_resize=do_resize, - size=size, - resample=resample, - ) - # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/pvt/image_processing_pvt.py b/src/transformers/models/pvt/image_processing_pvt.py index f3907edf3af093..7807d63f23e206 100644 --- a/src/transformers/models/pvt/image_processing_pvt.py +++ b/src/transformers/models/pvt/image_processing_pvt.py @@ -27,12 +27,8 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, logging @@ -233,33 +229,9 @@ def preprocess( images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_resize=do_resize, - size=size, - resample=resample, - ) - # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/sam/image_processing_sam.py b/src/transformers/models/sam/image_processing_sam.py index ccdc72fc7baadb..c67f306b42ff0a 100644 --- a/src/transformers/models/sam/image_processing_sam.py +++ b/src/transformers/models/sam/image_processing_sam.py @@ -20,7 +20,7 @@ import numpy as np -from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict +from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict, validate_preprocess_arguments from ...image_transforms import convert_to_rgb, pad, resize, to_channel_dimension_format from ...image_utils import ( IMAGENET_DEFAULT_MEAN, @@ -30,12 +30,8 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import ( TensorType, @@ -182,6 +178,33 @@ def __init__( "input_data_format", ] + def _validate_preprocess_arguments( + self, + do_rescale, + rescale_factor, + do_normalize, + image_mean, + image_std, + do_pad, + size_divisibility, + do_resize, + size, + resample, + pad_size, + ): + validate_preprocess_arguments( + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + do_pad=do_pad, + size_divisibility=pad_size, # _preprocess needs do_pad and pad_size. + do_resize=do_resize, + size=size, + resample=resample, + ) + def pad_image( self, image: np.ndarray, @@ -335,12 +358,6 @@ def _preprocess_image( # All transformations expect numpy arrays. image = to_numpy_array(image) - if is_scaled_image(image) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: input_data_format = infer_channel_dimension_format(image) @@ -512,35 +529,9 @@ def preprocess( images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - if segmentation_maps is not None: segmentation_maps = make_list_of_images(segmentation_maps, expected_ndims=2) - if not valid_images(segmentation_maps): - raise ValueError( - "Invalid segmentation map type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_pad=do_pad, - size_divisibility=pad_size, # Here _preprocess needs do_pad and pad_size. - do_resize=do_resize, - size=size, - resample=resample, - ) - images, original_sizes, reshaped_input_sizes = zip( *( self._preprocess_image( diff --git a/src/transformers/models/segformer/image_processing_segformer.py b/src/transformers/models/segformer/image_processing_segformer.py index 1fef9645549878..acd9f99790e073 100644 --- a/src/transformers/models/segformer/image_processing_segformer.py +++ b/src/transformers/models/segformer/image_processing_segformer.py @@ -28,12 +28,8 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, is_torch_available, is_torch_tensor, is_vision_available, logging @@ -251,11 +247,6 @@ def _preprocess_image( """Preprocesses a single image.""" # All transformations expect numpy arrays. image = to_numpy_array(image) - if is_scaled_image(image) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) if input_data_format is None: input_data_format = infer_channel_dimension_format(image) image = self._preprocess( @@ -398,27 +389,9 @@ def preprocess( images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - if segmentation_maps is not None: segmentation_maps = make_list_of_images(segmentation_maps, expected_ndims=2) - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_resize=do_resize, - size=size, - resample=resample, - ) - images = [ self._preprocess_image( image=img, diff --git a/src/transformers/models/seggpt/image_processing_seggpt.py b/src/transformers/models/seggpt/image_processing_seggpt.py index 80fb94cdc7aaf4..6456702b08ce14 100644 --- a/src/transformers/models/seggpt/image_processing_seggpt.py +++ b/src/transformers/models/seggpt/image_processing_seggpt.py @@ -28,10 +28,8 @@ PILImageResampling, get_channel_dimension_axis, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, ) from ...utils import TensorType, is_torch_available, logging, requires_backends @@ -350,30 +348,9 @@ def _preprocess_step( images = make_list_of_images(images) - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - - if do_resize and size is None: - raise ValueError("Size must be specified if do_resize is True.") - - if do_rescale and rescale_factor is None: - raise ValueError("Rescale factor must be specified if do_rescale is True.") - - if do_normalize and (image_mean is None or image_std is None): - raise ValueError("Image mean and std must be specified if do_normalize is True.") - # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None and not is_mask: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/siglip/image_processing_siglip.py b/src/transformers/models/siglip/image_processing_siglip.py index 5f24ffb0a2a8b1..6d2668a3442302 100644 --- a/src/transformers/models/siglip/image_processing_siglip.py +++ b/src/transformers/models/siglip/image_processing_siglip.py @@ -28,12 +28,8 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -189,32 +185,9 @@ def preprocess( images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_resize=do_resize, - size=size, - resample=resample, - ) # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/superpoint/image_processing_superpoint.py b/src/transformers/models/superpoint/image_processing_superpoint.py index fbbb717570cb70..5bddd2b8213385 100644 --- a/src/transformers/models/superpoint/image_processing_superpoint.py +++ b/src/transformers/models/superpoint/image_processing_superpoint.py @@ -24,7 +24,6 @@ ChannelDimension, ImageInput, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, valid_images, @@ -235,12 +234,6 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/swin2sr/image_processing_swin2sr.py b/src/transformers/models/swin2sr/image_processing_swin2sr.py index a126e6eee5e8d4..a1d169daa6c6b5 100644 --- a/src/transformers/models/swin2sr/image_processing_swin2sr.py +++ b/src/transformers/models/swin2sr/image_processing_swin2sr.py @@ -18,18 +18,14 @@ import numpy as np -from ...image_processing_utils import BaseImageProcessor, BatchFeature +from ...image_processing_utils import BaseImageProcessor, BatchFeature, validate_preprocess_arguments from ...image_transforms import get_image_size, pad, to_channel_dimension_format from ...image_utils import ( ChannelDimension, ImageInput, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, logging @@ -77,6 +73,14 @@ def __init__( "input_data_format", ] + def _validate_preprocess_arguments(self, do_rescale, rescale_factor, do_pad, size_divisibility, pad_size): + validate_preprocess_arguments( + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_pad=do_pad, + size_divisibility=pad_size, # pad function simply requires pad_size. + ) + def pad( self, image: np.ndarray, @@ -172,29 +176,9 @@ def preprocess( images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_pad=do_pad, - size_divisibility=pad_size, # Here the pad function simply requires pad_size. - ) - # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/tvlt/image_processing_tvlt.py b/src/transformers/models/tvlt/image_processing_tvlt.py index f13101c15a9615..8793dae7873e11 100644 --- a/src/transformers/models/tvlt/image_processing_tvlt.py +++ b/src/transformers/models/tvlt/image_processing_tvlt.py @@ -30,12 +30,8 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, - is_scaled_image, is_valid_image, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, logging @@ -233,29 +229,9 @@ def _preprocess_image( input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> np.ndarray: """Preprocesses a single image.""" - - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_center_crop=do_center_crop, - crop_size=crop_size, - do_resize=do_resize, - size=size, - resample=resample, - ) - # All transformations expect numpy arrays. image = to_numpy_array(image) - if is_scaled_image(image) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: input_data_format = infer_channel_dimension_format(image) @@ -377,14 +353,6 @@ def preprocess( patch_size = patch_size if patch_size is not None else self.patch_size num_frames = num_frames if patch_size is not None else self.num_frames - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(videos): - raise ValueError( - "Invalid image or video type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - videos = make_batched(videos) # Check number of frames is fewer than maximum frames diff --git a/src/transformers/models/tvp/image_processing_tvp.py b/src/transformers/models/tvp/image_processing_tvp.py index 18600ee5fbe7f3..51356df67d525a 100644 --- a/src/transformers/models/tvp/image_processing_tvp.py +++ b/src/transformers/models/tvp/image_processing_tvp.py @@ -18,7 +18,7 @@ import numpy as np -from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict +from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict, validate_preprocess_arguments from ...image_transforms import ( PaddingMode, flip_channel_order, @@ -33,11 +33,10 @@ ImageInput, PILImageResampling, get_image_size, + is_scaled_image, is_valid_image, to_numpy_array, valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -195,6 +194,49 @@ def __init__( "input_data_format", ] + def _validate_image_inputs(self, images, segmentation_maps=None, do_rescale=False): + if not valid_images(images[0]): + raise ValueError( + "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " + "torch.Tensor, tf.Tensor or jax.ndarray." + ) + + if is_scaled_image(to_numpy_array(images[0][0])) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + + def _validate_preprocess_arguments( + self, + do_rescale, + rescale_factor, + do_normalize, + image_mean, + image_std, + do_pad, + pad_size, + do_center_crop, + crop_size, + do_resize, + size, + resample, + ): + validate_preprocess_arguments( + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + do_pad=do_pad, + size_divisibility=pad_size, # pad() method simply requires the pad_size argument. + do_center_crop=do_center_crop, + crop_size=crop_size, + do_resize=do_resize, + size=size, + resample=resample, + ) + def resize( self, image: np.ndarray, @@ -309,21 +351,6 @@ def _preprocess_image( ) -> np.ndarray: """Preprocesses a single image.""" - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_pad=do_pad, - size_divisibility=pad_size, # here the pad() method simply requires the pad_size argument. - do_center_crop=do_center_crop, - crop_size=crop_size, - do_resize=do_resize, - size=size, - resample=resample, - ) - # All transformations expect numpy arrays. image = to_numpy_array(image) @@ -459,14 +486,6 @@ def preprocess( crop_size = crop_size if crop_size is not None else self.crop_size crop_size = get_size_dict(crop_size, param_name="crop_size") - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(videos): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - videos = make_batched(videos) videos = [ diff --git a/src/transformers/models/videomae/image_processing_videomae.py b/src/transformers/models/videomae/image_processing_videomae.py index 6563d69c6503ea..06ce8f2413507e 100644 --- a/src/transformers/models/videomae/image_processing_videomae.py +++ b/src/transformers/models/videomae/image_processing_videomae.py @@ -35,8 +35,6 @@ is_valid_image, to_numpy_array, valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -209,19 +207,6 @@ def _preprocess_image( input_data_format: Optional[Union[str, ChannelDimension]] = None, ) -> np.ndarray: """Preprocesses a single image.""" - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_center_crop=do_center_crop, - crop_size=crop_size, - do_resize=do_resize, - size=size, - resample=resample, - ) - # All transformations expect numpy arrays. image = to_numpy_array(image) @@ -328,8 +313,6 @@ def preprocess( crop_size = crop_size if crop_size is not None else self.crop_size crop_size = get_size_dict(crop_size, param_name="crop_size") - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - if not valid_images(videos): raise ValueError( "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " diff --git a/src/transformers/models/vilt/image_processing_vilt.py b/src/transformers/models/vilt/image_processing_vilt.py index 42e5b3f439d6aa..4715db0c4c0471 100644 --- a/src/transformers/models/vilt/image_processing_vilt.py +++ b/src/transformers/models/vilt/image_processing_vilt.py @@ -18,7 +18,7 @@ import numpy as np -from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict +from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict, validate_preprocess_arguments from ...image_transforms import PaddingMode, pad, resize, to_channel_dimension_format from ...image_utils import ( IMAGENET_STANDARD_MEAN, @@ -28,12 +28,8 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -209,6 +205,22 @@ def __init__( "input_data_format", ] + def _validate_preprocess_arguments( + self, do_rescale, rescale_factor, do_normalize, image_mean, image_std, do_resize, size, resample + ): + # pad() method does not require any additional argument as it takes the maximum of (height, width). + # Hence, it does not need to be passed to a validate_preprocess_arguments() method. + validate_preprocess_arguments( + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + do_resize=do_resize, + size=size, + resample=resample, + ) + @classmethod def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs): """ @@ -433,36 +445,9 @@ def preprocess( images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - - # Here the pad() method does not require any additional argument as it takes the maximum of (height, width). - # Hence, it does not need to be passed to a validate_preprocess_arguments() method. - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_resize=do_resize, - size=size, - resample=resample, - ) - # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/vit/image_processing_vit.py b/src/transformers/models/vit/image_processing_vit.py index 4c7d8de714f72d..7767c9866b176e 100644 --- a/src/transformers/models/vit/image_processing_vit.py +++ b/src/transformers/models/vit/image_processing_vit.py @@ -27,12 +27,8 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, logging @@ -232,33 +228,9 @@ def preprocess( images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_resize=do_resize, - size=size, - resample=resample, - ) - # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/vit_hybrid/image_processing_vit_hybrid.py b/src/transformers/models/vit_hybrid/image_processing_vit_hybrid.py index 4bb3f70b49bb5b..e3c50182a5deed 100644 --- a/src/transformers/models/vit_hybrid/image_processing_vit_hybrid.py +++ b/src/transformers/models/vit_hybrid/image_processing_vit_hybrid.py @@ -32,12 +32,8 @@ ImageInput, PILImageResampling, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, is_vision_available, logging @@ -276,26 +272,6 @@ def preprocess( images = make_list_of_images(images) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_center_crop=do_center_crop, - crop_size=crop_size, - do_resize=do_resize, - size=size, - resample=resample, - ) - # PIL RGBA images are converted to RGB if do_convert_rgb: images = [convert_to_rgb(image) for image in images] @@ -303,12 +279,6 @@ def preprocess( # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/vitmatte/image_processing_vitmatte.py b/src/transformers/models/vitmatte/image_processing_vitmatte.py index d7310bc0dd26ba..b945484fefb718 100644 --- a/src/transformers/models/vitmatte/image_processing_vitmatte.py +++ b/src/transformers/models/vitmatte/image_processing_vitmatte.py @@ -27,12 +27,9 @@ ImageInput, get_image_size, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import TensorType, logging @@ -213,39 +210,16 @@ def preprocess( images = make_list_of_images(images) trimaps = make_list_of_images(trimaps, expected_ndims=2) - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - if not valid_images(trimaps): raise ValueError( "Invalid trimap type. Must be of type PIL.Image.Image, numpy.ndarray, " "torch.Tensor, tf.Tensor or jax.ndarray." ) - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_pad=do_pad, - size_divisibility=size_divisibility, - ) - # All transformations expect numpy arrays. images = [to_numpy_array(image) for image in images] trimaps = [to_numpy_array(trimap) for trimap in trimaps] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0]) diff --git a/src/transformers/models/vivit/image_processing_vivit.py b/src/transformers/models/vivit/image_processing_vivit.py index 9b62aedc234e88..cd019d2e5a5401 100644 --- a/src/transformers/models/vivit/image_processing_vivit.py +++ b/src/transformers/models/vivit/image_processing_vivit.py @@ -38,8 +38,6 @@ is_valid_image, to_numpy_array, valid_images, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import logging @@ -156,6 +154,47 @@ def __init__( "input_data_format", ] + def _validate_image_inputs(self, images, segmentation_maps=None, do_rescale=False): + if not valid_images(images[0]): + raise ValueError( + "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray." + ) + + if is_scaled_image(to_numpy_array(images[0][0])) and do_rescale: + logger.warning_once( + "It looks like you are trying to rescale already rescaled images. If the input" + " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." + ) + + def _validate_preprocess_arguments( + self, + do_rescale, + rescale_factor, + do_normalize, + image_mean, + image_std, + do_center_crop, + crop_size, + do_resize, + size, + resample, + offset, + ): + super()._validate_preprocess_arguments( + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + do_center_crop=do_center_crop, + crop_size=crop_size, + do_resize=do_resize, + size=size, + resample=resample, + ) + if offset and not do_rescale: + raise ValueError("For offset, do_rescale must also be set to True.") + def resize( self, image: np.ndarray, @@ -260,31 +299,9 @@ def _preprocess_image( ) -> np.ndarray: """Preprocesses a single image.""" - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_center_crop=do_center_crop, - crop_size=crop_size, - do_resize=do_resize, - size=size, - resample=resample, - ) - - if offset and not do_rescale: - raise ValueError("For offset, do_rescale must also be set to True.") - # All transformations expect numpy arrays. image = to_numpy_array(image) - if is_scaled_image(image) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: input_data_format = infer_channel_dimension_format(image) @@ -386,14 +403,6 @@ def preprocess( crop_size = crop_size if crop_size is not None else self.crop_size crop_size = get_size_dict(crop_size, param_name="crop_size") - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) - - if not valid_images(videos): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - videos = make_batched(videos) videos = [ diff --git a/src/transformers/models/yolos/image_processing_yolos.py b/src/transformers/models/yolos/image_processing_yolos.py index b74819c7a1c91b..04d430298b72e8 100644 --- a/src/transformers/models/yolos/image_processing_yolos.py +++ b/src/transformers/models/yolos/image_processing_yolos.py @@ -20,7 +20,7 @@ import numpy as np from ...feature_extraction_utils import BatchFeature -from ...image_processing_utils import BaseImageProcessor, get_size_dict +from ...image_processing_utils import BaseImageProcessor, get_size_dict, validate_preprocess_arguments from ...image_transforms import ( PaddingMode, center_to_corners_format, @@ -42,13 +42,9 @@ PILImageResampling, get_image_size, infer_channel_dimension_format, - is_scaled_image, make_list_of_images, to_numpy_array, - valid_images, validate_annotations, - validate_kwargs, - validate_preprocess_arguments, ) from ...utils import ( TensorType, @@ -772,6 +768,21 @@ def __init__( "input_data_format", ] + def _validate_preprocess_arguments( + self, do_rescale, rescale_factor, do_normalize, image_mean, image_std, do_resize, size, resample + ): + # Here the pad() method pads using the max of (width, height) and does not need to be validated. + validate_preprocess_arguments( + do_rescale=do_rescale, + rescale_factor=rescale_factor, + do_normalize=do_normalize, + image_mean=image_mean, + image_std=image_std, + do_resize=do_resize, + size=size, + resample=resample, + ) + @classmethod # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.from_dict with Detr->Yolos def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs): @@ -1213,27 +1224,9 @@ def preprocess( ) do_pad = self.do_pad if do_pad is None else do_pad format = self.format if format is None else format - validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys) images = make_list_of_images(images) - if not valid_images(images): - raise ValueError( - "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, " - "torch.Tensor, tf.Tensor or jax.ndarray." - ) - # Here the pad() method pads using the max of (width, height) and does not need to be validated. - validate_preprocess_arguments( - do_rescale=do_rescale, - rescale_factor=rescale_factor, - do_normalize=do_normalize, - image_mean=image_mean, - image_std=image_std, - do_resize=do_resize, - size=size, - resample=resample, - ) - if annotations is not None and isinstance(annotations, dict): annotations = [annotations] @@ -1259,12 +1252,6 @@ def preprocess( # All transformations expect numpy arrays images = [to_numpy_array(image) for image in images] - if is_scaled_image(images[0]) and do_rescale: - logger.warning_once( - "It looks like you are trying to rescale already rescaled images. If the input" - " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again." - ) - if input_data_format is None: # We assume that all images have the same channel dimension format. input_data_format = infer_channel_dimension_format(images[0])