diff --git a/src/transformers/image_processing_utils.py b/src/transformers/image_processing_utils.py
index 70f1a339de706a..287f10a630a77c 100644
--- a/src/transformers/image_processing_utils.py
+++ b/src/transformers/image_processing_utils.py
@@ -26,7 +26,7 @@
 from .dynamic_module_utils import custom_object_save
 from .feature_extraction_utils import BatchFeature as BaseBatchFeature
 from .image_transforms import center_crop, normalize, rescale
-from .image_utils import ChannelDimension
+from .image_utils import ChannelDimension, PILImageResampling, is_scaled_image, to_numpy_array, valid_images
 from .utils import (
     IMAGE_PROCESSOR_NAME,
     PushToHubMixin,
@@ -47,6 +47,55 @@
 logger = logging.get_logger(__name__)
 
 
+def validate_kwargs(valid_processor_keys: List[str], captured_kwargs: List[str]):
+    unused_keys = set(captured_kwargs).difference(set(valid_processor_keys))
+    if unused_keys:
+        unused_key_str = ", ".join(unused_keys)
+        # TODO raise a warning here instead of simply logging?
+        logger.warning(f"Unused or unrecognized kwargs: {unused_key_str}.")
+
+
+def validate_preprocess_arguments(
+    do_rescale: Optional[bool] = None,
+    rescale_factor: Optional[float] = None,
+    do_normalize: Optional[bool] = None,
+    image_mean: Optional[Union[float, List[float]]] = None,
+    image_std: Optional[Union[float, List[float]]] = None,
+    do_pad: Optional[bool] = None,
+    size_divisibility: Optional[int] = None,
+    do_center_crop: Optional[bool] = None,
+    crop_size: Optional[Dict[str, int]] = None,
+    do_resize: Optional[bool] = None,
+    size: Optional[Dict[str, int]] = None,
+    resample: Optional["PILImageResampling"] = None,
+):
+    """
+    Checks validity of typically used arguments in an `ImageProcessor` `preprocess` method.
+    Raises `ValueError` if arguments incompatibility is caught.
+    Many incompatibilities are model-specific. `do_pad` sometimes needs `size_divisor`,
+    sometimes `size_divisibility`, and sometimes `size`. New models and processors added should follow
+    existing arguments when possible.
+
+    """
+    if do_rescale and rescale_factor is None:
+        raise ValueError("rescale_factor must be specified if do_rescale is True.")
+
+    if do_pad and size_divisibility is None:
+        # Here, size_divisor might be passed as the value of size
+        raise ValueError(
+            "Depending on moel, size_divisibility, size_divisor, pad_size or size must be specified if do_pad is True."
+        )
+
+    if do_normalize and (image_mean is None or image_std is None):
+        raise ValueError("image_mean and image_std must both be specified if do_normalize is True.")
+
+    if do_center_crop and crop_size is None:
+        raise ValueError("crop_size must be specified if do_center_crop is True.")
+
+    if do_resize and (size is None or resample is None):
+        raise ValueError("size and resample must be specified if do_resize is True.")
+
+
 # TODO: Move BatchFeature to be imported by both image_processing_utils and image_processing_utils
 # We override the class string here, but logic is the same.
 class BatchFeature(BaseBatchFeature):
@@ -543,13 +592,51 @@ def fetch_images(self, image_url_or_urls: Union[str, List[str]]):
 
 
 class BaseImageProcessor(ImageProcessingMixin):
+    _valid_processor_keys = None
+
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
 
     def __call__(self, images, **kwargs) -> BatchFeature:
         """Preprocess an image or a batch of images."""
+        self._validate_inputs(**kwargs)
         return self.preprocess(images, **kwargs)
 
+    def _validate_preprocess_arguments(self, **kwargs):
+        """Check if the arguments passed to the preprocess method have compatible settings e.g. if `size` is defined when `do_resize` is set to True."""
+        validate_preprocess_arguments(**kwargs)
+
+    def _validate_image_inputs(self, images, segmentation_maps=None, do_rescale=False):
+        """Check if the images and segmentation maps are valid."""
+        if not valid_images(images):
+            raise ValueError(
+                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray."
+            )
+
+        if segmentation_maps is not None and not valid_images(segmentation_maps):
+            raise ValueError(
+                "Invalid segmentation map type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray."
+            )
+
+        if do_rescale and is_scaled_image(to_numpy_array(images[0])):
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
+
+    def _validate_inputs(self, images, segmentation_maps=None, **kwargs):
+        """Check if the arguments passed to the preprocess method are valid."""
+        if self._valid_processor_keys is None:
+            raise ValueError("Each image processor must define self._valid_processor_keys")
+
+        for key in kwargs:
+            if key not in self._valid_processor_keys:
+                raise ValueError(f"Invalid argument {key} passed to preprocess method.")
+
+        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
+        self._validate_preprocess_arguments(**kwargs)
+        self._validate_image_inputs(images, segmentation_maps, do_rescale=kwargs.get("do_rescale", False))
+
     def preprocess(self, images, **kwargs) -> BatchFeature:
         raise NotImplementedError("Each image processor must implement its own preprocess method")
 
diff --git a/src/transformers/image_utils.py b/src/transformers/image_utils.py
index e4a55b3455a344..a662307cdf9eef 100644
--- a/src/transformers/image_utils.py
+++ b/src/transformers/image_utils.py
@@ -337,47 +337,6 @@ def load_image(image: Union[str, "PIL.Image.Image"], timeout: Optional[float] =
     return image
 
 
-def validate_preprocess_arguments(
-    do_rescale: Optional[bool] = None,
-    rescale_factor: Optional[float] = None,
-    do_normalize: Optional[bool] = None,
-    image_mean: Optional[Union[float, List[float]]] = None,
-    image_std: Optional[Union[float, List[float]]] = None,
-    do_pad: Optional[bool] = None,
-    size_divisibility: Optional[int] = None,
-    do_center_crop: Optional[bool] = None,
-    crop_size: Optional[Dict[str, int]] = None,
-    do_resize: Optional[bool] = None,
-    size: Optional[Dict[str, int]] = None,
-    resample: Optional["PILImageResampling"] = None,
-):
-    """
-    Checks validity of typically used arguments in an `ImageProcessor` `preprocess` method.
-    Raises `ValueError` if arguments incompatibility is caught.
-    Many incompatibilities are model-specific. `do_pad` sometimes needs `size_divisor`,
-    sometimes `size_divisibility`, and sometimes `size`. New models and processors added should follow
-    existing arguments when possible.
-
-    """
-    if do_rescale and rescale_factor is None:
-        raise ValueError("rescale_factor must be specified if do_rescale is True.")
-
-    if do_pad and size_divisibility is None:
-        # Here, size_divisor might be passed as the value of size
-        raise ValueError(
-            "Depending on moel, size_divisibility, size_divisor, pad_size or size must be specified if do_pad is True."
-        )
-
-    if do_normalize and (image_mean is None or image_std is None):
-        raise ValueError("image_mean and image_std must both be specified if do_normalize is True.")
-
-    if do_center_crop and crop_size is None:
-        raise ValueError("crop_size must be specified if do_center_crop is True.")
-
-    if do_resize and (size is None or resample is None):
-        raise ValueError("size and resample must be specified if do_resize is True.")
-
-
 # In the future we can add a TF implementation here when we have TF models.
 class ImageFeatureExtractionMixin:
     """
@@ -759,11 +718,3 @@ def validate_annotations(
                 "(batch of images) with the following keys: `image_id`, `file_name` and `segments_info`, with "
                 "the latter being a list of annotations in the COCO format."
             )
-
-
-def validate_kwargs(valid_processor_keys: List[str], captured_kwargs: List[str]):
-    unused_keys = set(captured_kwargs).difference(set(valid_processor_keys))
-    if unused_keys:
-        unused_key_str = ", ".join(unused_keys)
-        # TODO raise a warning here instead of simply logging?
-        logger.warning(f"Unused or unrecognized kwargs: {unused_key_str}.")
diff --git a/src/transformers/models/beit/image_processing_beit.py b/src/transformers/models/beit/image_processing_beit.py
index 5e15fe645cf9d9..5e37a67feb3faf 100644
--- a/src/transformers/models/beit/image_processing_beit.py
+++ b/src/transformers/models/beit/image_processing_beit.py
@@ -28,12 +28,8 @@
     ImageInput,
     PILImageResampling,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_torch_available, is_torch_tensor, is_vision_available, logging
 
@@ -257,11 +253,6 @@ def _preprocess_image(
         """Preprocesses a single image."""
         # All transformations expect numpy arrays.
         image = to_numpy_array(image)
-        if is_scaled_image(image) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
         if input_data_format is None:
             input_data_format = infer_channel_dimension_format(image)
         image = self._preprocess(
@@ -418,37 +409,11 @@ def preprocess(
         image_std = image_std if image_std is not None else self.image_std
         do_reduce_labels = do_reduce_labels if do_reduce_labels is not None else self.do_reduce_labels
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
         images = make_list_of_images(images)
 
         if segmentation_maps is not None:
             segmentation_maps = make_list_of_images(segmentation_maps, expected_ndims=2)
 
-        if segmentation_maps is not None and not valid_images(segmentation_maps):
-            raise ValueError(
-                "Invalid segmentation_maps type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_center_crop=do_center_crop,
-            crop_size=crop_size,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         images = [
             self._preprocess_image(
                 image=img,
diff --git a/src/transformers/models/bit/image_processing_bit.py b/src/transformers/models/bit/image_processing_bit.py
index c9d5c7a7594a49..e6b8c001347442 100644
--- a/src/transformers/models/bit/image_processing_bit.py
+++ b/src/transformers/models/bit/image_processing_bit.py
@@ -32,12 +32,8 @@
     ImageInput,
     PILImageResampling,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging
 
@@ -274,29 +270,8 @@ def preprocess(
         image_std = image_std if image_std is not None else self.image_std
         do_convert_rgb = do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
         images = make_list_of_images(images)
 
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_center_crop=do_center_crop,
-            crop_size=crop_size,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         # PIL RGBA images are converted to RGB
         if do_convert_rgb:
             images = [convert_to_rgb(image) for image in images]
@@ -304,12 +279,6 @@ def preprocess(
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/blip/image_processing_blip.py b/src/transformers/models/blip/image_processing_blip.py
index a65ccc2d9839b7..59031ade46fae2 100644
--- a/src/transformers/models/blip/image_processing_blip.py
+++ b/src/transformers/models/blip/image_processing_blip.py
@@ -27,12 +27,8 @@
     ImageInput,
     PILImageResampling,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging
 
@@ -250,24 +246,6 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
         # PIL RGBA images are converted to RGB
         if do_convert_rgb:
             images = [convert_to_rgb(image) for image in images]
@@ -275,12 +253,6 @@ def preprocess(
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/bridgetower/image_processing_bridgetower.py b/src/transformers/models/bridgetower/image_processing_bridgetower.py
index 8fc62ad3970fa0..05b39e3ab068fd 100644
--- a/src/transformers/models/bridgetower/image_processing_bridgetower.py
+++ b/src/transformers/models/bridgetower/image_processing_bridgetower.py
@@ -18,7 +18,7 @@
 
 import numpy as np
 
-from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict
+from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict, validate_preprocess_arguments
 from ...image_transforms import PaddingMode, center_crop, pad, resize, to_channel_dimension_format
 from ...image_utils import (
     OPENAI_CLIP_MEAN,
@@ -29,11 +29,7 @@
     get_image_size,
     infer_channel_dimension_format,
     is_batched,
-    is_scaled_image,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging
 
@@ -224,6 +220,37 @@ def __init__(
             "input_data_format",
         ]
 
+    def _validate_preprocess_arguments(
+        self,
+        do_rescale,
+        rescale_factor,
+        do_normalize,
+        image_mean,
+        image_std,
+        do_pad,
+        size_divisor,
+        do_center_crop,
+        crop_size,
+        do_resize,
+        size,
+        resample,
+    ):
+        # crop_size is used only if it is set, else size will be used.
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_pad=do_pad,
+            size_divisibility=size_divisor,
+            do_center_crop=do_center_crop,
+            crop_size=crop_size,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )
+
     # Copied from transformers.models.vilt.image_processing_vilt.ViltImageProcessor.resize
     def resize(
         self,
@@ -484,40 +511,12 @@ def preprocess(
         size = size if size is not None else self.size
         size = get_size_dict(size, default_to_square=False)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
         if not is_batched(images):
             images = [images]
 
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        # Here, crop_size is used only if it is set, else size will be used.
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_pad=do_pad,
-            size_divisibility=size_divisor,
-            do_center_crop=do_center_crop,
-            crop_size=crop_size,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if do_resize:
             images = [
                 self.resize(
diff --git a/src/transformers/models/chinese_clip/image_processing_chinese_clip.py b/src/transformers/models/chinese_clip/image_processing_chinese_clip.py
index 60f40272bf9271..247dfe8af77017 100644
--- a/src/transformers/models/chinese_clip/image_processing_chinese_clip.py
+++ b/src/transformers/models/chinese_clip/image_processing_chinese_clip.py
@@ -32,12 +32,8 @@
     ImageInput,
     PILImageResampling,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging
 
@@ -265,37 +261,12 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_center_crop=do_center_crop,
-            crop_size=crop_size,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
         if do_convert_rgb:
             images = [convert_to_rgb(image) for image in images]
 
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/clip/image_processing_clip.py b/src/transformers/models/clip/image_processing_clip.py
index fd2f8b3d532bbb..0db9d69a8076b8 100644
--- a/src/transformers/models/clip/image_processing_clip.py
+++ b/src/transformers/models/clip/image_processing_clip.py
@@ -32,12 +32,8 @@
     ImageInput,
     PILImageResampling,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging
 
@@ -277,40 +273,14 @@ def preprocess(
         image_std = image_std if image_std is not None else self.image_std
         do_convert_rgb = do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
         images = make_list_of_images(images)
 
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_center_crop=do_center_crop,
-            crop_size=crop_size,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         if do_convert_rgb:
             images = [convert_to_rgb(image) for image in images]
 
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py
index e88bfc8fe230df..83343093690223 100644
--- a/src/transformers/models/conditional_detr/image_processing_conditional_detr.py
+++ b/src/transformers/models/conditional_detr/image_processing_conditional_detr.py
@@ -22,7 +22,7 @@
 import numpy as np
 
 from ...feature_extraction_utils import BatchFeature
-from ...image_processing_utils import BaseImageProcessor, get_size_dict
+from ...image_processing_utils import BaseImageProcessor, get_size_dict, validate_preprocess_arguments
 from ...image_transforms import (
     PaddingMode,
     center_to_corners_format,
@@ -44,13 +44,9 @@
     PILImageResampling,
     get_image_size,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
     validate_annotations,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import (
     TensorType,
@@ -867,6 +863,22 @@ def __init__(
             "input_data_format",
         ]
 
+    # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor._validate_preprocess_arguments with Detr->ConditionalDetr
+    def _validate_preprocess_arguments(
+        self, do_rescale, rescale_factor, do_normalize, image_mean, image_std, do_resize, size, resample
+    ):
+        # Here, the pad() method pads to the maximum of (width, height). It does not need to be validated.
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )
+
     @classmethod
     # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.from_dict with Detr->ConditionalDetr
     def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs):
@@ -1315,25 +1327,6 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        # Here, the pad() method pads to the maximum of (width, height). It does not need to be validated.
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         if annotations is not None and isinstance(annotations, dict):
             annotations = [annotations]
 
@@ -1359,12 +1352,6 @@ def preprocess(
         # All transformations expect numpy arrays
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/convnext/image_processing_convnext.py b/src/transformers/models/convnext/image_processing_convnext.py
index 54060105f59eb2..3950f4ab2270db 100644
--- a/src/transformers/models/convnext/image_processing_convnext.py
+++ b/src/transformers/models/convnext/image_processing_convnext.py
@@ -32,12 +32,8 @@
     ImageInput,
     PILImageResampling,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging
 
@@ -276,36 +272,11 @@ def preprocess(
         size = size if size is not None else self.size
         size = get_size_dict(size, default_to_square=False)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
         images = make_list_of_images(images)
 
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py
index 5525eeeb8c58d5..07d6148b8906c6 100644
--- a/src/transformers/models/deformable_detr/image_processing_deformable_detr.py
+++ b/src/transformers/models/deformable_detr/image_processing_deformable_detr.py
@@ -22,7 +22,7 @@
 import numpy as np
 
 from ...feature_extraction_utils import BatchFeature
-from ...image_processing_utils import BaseImageProcessor, get_size_dict
+from ...image_processing_utils import BaseImageProcessor, get_size_dict, validate_preprocess_arguments
 from ...image_transforms import (
     PaddingMode,
     center_to_corners_format,
@@ -44,13 +44,9 @@
     PILImageResampling,
     get_image_size,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
     validate_annotations,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import (
     TensorType,
@@ -865,6 +861,21 @@ def __init__(
             "input_data_format",
         ]
 
+    def _validate_preprocess_arguments(
+        self, do_rescale, rescale_factor, do_normalize, image_mean, image_std, do_resize, size, resample
+    ):
+        # pad() method pads to the maximum of (width, height). It does not need to be validated.
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )
+
     @classmethod
     # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.from_dict with Detr->DeformableDetr
     def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs):
@@ -1313,25 +1324,6 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        # Here, the pad() method pads to the maximum of (width, height). It does not need to be validated.
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         if annotations is not None and isinstance(annotations, dict):
             annotations = [annotations]
 
@@ -1357,12 +1349,6 @@ def preprocess(
         # All transformations expect numpy arrays
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/deit/image_processing_deit.py b/src/transformers/models/deit/image_processing_deit.py
index 2a8ebb36377854..0b942a88ca23b7 100644
--- a/src/transformers/models/deit/image_processing_deit.py
+++ b/src/transformers/models/deit/image_processing_deit.py
@@ -27,12 +27,8 @@
     ImageInput,
     PILImageResampling,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging
 
@@ -257,34 +253,9 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_center_crop=do_center_crop,
-            crop_size=crop_size,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/deta/image_processing_deta.py b/src/transformers/models/deta/image_processing_deta.py
index 45c5c6cb285a8f..0f13bc953c4204 100644
--- a/src/transformers/models/deta/image_processing_deta.py
+++ b/src/transformers/models/deta/image_processing_deta.py
@@ -20,7 +20,7 @@
 import numpy as np
 
 from ...feature_extraction_utils import BatchFeature
-from ...image_processing_utils import BaseImageProcessor, get_size_dict
+from ...image_processing_utils import BaseImageProcessor, get_size_dict, validate_preprocess_arguments
 from ...image_transforms import (
     PaddingMode,
     center_to_corners_format,
@@ -42,11 +42,9 @@
     get_image_size,
     infer_channel_dimension_format,
     is_batched,
-    is_scaled_image,
     to_numpy_array,
     valid_images,
     validate_annotations,
-    validate_preprocess_arguments,
 )
 from ...utils import (
     is_flax_available,
@@ -543,6 +541,21 @@ def __init__(
         self.image_std = image_std if image_std is not None else IMAGENET_DEFAULT_STD
         self.do_pad = do_pad
 
+    def _validate_preprocess_arguments(
+        self, do_rescale, rescale_factor, do_normalize, image_mean, image_std, do_resize, size, resample
+    ):
+        # pad() method pads to the maximum of (width, height). It does not need to be validated.
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )
+
     # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.prepare_annotation with DETR->DETA
     def prepare_annotation(
         self,
@@ -956,19 +969,6 @@ def preprocess(
         do_pad = self.do_pad if do_pad is None else do_pad
         format = self.format if format is None else format
 
-        # Here, the pad() method pads to the maximum of (width, height). It does not need to be validated.
-
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         if not is_batched(images):
             images = [images]
             annotations = [annotations] if annotations is not None else None
@@ -1000,12 +1000,6 @@ def preprocess(
         # All transformations expect numpy arrays
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/detr/image_processing_detr.py b/src/transformers/models/detr/image_processing_detr.py
index e0e59cbc7c40c6..ba8957f6cecdff 100644
--- a/src/transformers/models/detr/image_processing_detr.py
+++ b/src/transformers/models/detr/image_processing_detr.py
@@ -21,7 +21,7 @@
 
 import numpy as np
 
-from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict
+from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict, validate_preprocess_arguments
 from ...image_transforms import (
     PaddingMode,
     center_to_corners_format,
@@ -43,13 +43,9 @@
     PILImageResampling,
     get_image_size,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
     validate_annotations,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import (
     TensorType,
@@ -850,6 +846,21 @@ def __init__(
             "input_data_format",
         ]
 
+    def _validate_preprocess_arguments(
+        self, do_rescale, rescale_factor, do_normalize, image_mean, image_std, do_resize, size, resample
+    ):
+        # Here, the pad() method pads to the maximum of (width, height). It does not need to be validated.
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )
+
     @classmethod
     def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs):
         """
@@ -1285,25 +1296,6 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        # Here, the pad() method pads to the maximum of (width, height). It does not need to be validated.
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         if annotations is not None and isinstance(annotations, dict):
             annotations = [annotations]
 
@@ -1329,12 +1321,6 @@ def preprocess(
         # All transformations expect numpy arrays
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/donut/image_processing_donut.py b/src/transformers/models/donut/image_processing_donut.py
index 1c6e4723139046..f4515001567b96 100644
--- a/src/transformers/models/donut/image_processing_donut.py
+++ b/src/transformers/models/donut/image_processing_donut.py
@@ -18,7 +18,7 @@
 
 import numpy as np
 
-from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict
+from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict, validate_preprocess_arguments
 from ...image_transforms import (
     get_resize_output_image_size,
     pad,
@@ -33,12 +33,8 @@
     PILImageResampling,
     get_image_size,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, logging
 from ...utils.import_utils import is_vision_available
@@ -143,6 +139,23 @@ def __init__(
             "input_data_format",
         ]
 
+    def _validate_preprocess_arguments(
+        do_rescale, rescale_factor, do_normalize, image_mean, image_std, do_pad, size, do_resize, resample
+    ):
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_pad=do_pad,
+            # There is no pad divisibility in this processor, but pad requires the size arg.
+            size_divisibility=size,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )
+
     def align_long_axis(
         self,
         image: np.ndarray,
@@ -407,35 +420,9 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_pad=do_pad,
-            size_divisibility=size,  # There is no pad divisibility in this processor, but pad requires the size arg.
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/dpt/image_processing_dpt.py b/src/transformers/models/dpt/image_processing_dpt.py
index 96f43a796e3886..79cc174cf19b18 100644
--- a/src/transformers/models/dpt/image_processing_dpt.py
+++ b/src/transformers/models/dpt/image_processing_dpt.py
@@ -29,14 +29,10 @@
     PILImageResampling,
     get_image_size,
     infer_channel_dimension_format,
-    is_scaled_image,
     is_torch_available,
     is_torch_tensor,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging
 
@@ -369,34 +365,9 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_pad=do_pad,
-            size_divisibility=size_divisor,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/efficientformer/image_processing_efficientformer.py b/src/transformers/models/efficientformer/image_processing_efficientformer.py
index 38756f7c958f5d..9718acfccae2d6 100644
--- a/src/transformers/models/efficientformer/image_processing_efficientformer.py
+++ b/src/transformers/models/efficientformer/image_processing_efficientformer.py
@@ -32,11 +32,7 @@
     PILImageResampling,
     infer_channel_dimension_format,
     is_batched,
-    is_scaled_image,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, logging
 
@@ -255,37 +251,12 @@ def preprocess(
         size = size if size is not None else self.size
         size_dict = get_size_dict(size)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
         if not is_batched(images):
             images = [images]
 
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_center_crop=do_center_crop,
-            crop_size=crop_size,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/efficientnet/image_processing_efficientnet.py b/src/transformers/models/efficientnet/image_processing_efficientnet.py
index 4fd2364a3020c5..7292a931e8cc46 100644
--- a/src/transformers/models/efficientnet/image_processing_efficientnet.py
+++ b/src/transformers/models/efficientnet/image_processing_efficientnet.py
@@ -27,12 +27,8 @@
     ImageInput,
     PILImageResampling,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging
 
@@ -316,34 +312,9 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_center_crop=do_center_crop,
-            crop_size=crop_size,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/flava/image_processing_flava.py b/src/transformers/models/flava/image_processing_flava.py
index d6a7c8080bb6b4..9362d8e5c7f1c6 100644
--- a/src/transformers/models/flava/image_processing_flava.py
+++ b/src/transformers/models/flava/image_processing_flava.py
@@ -30,12 +30,8 @@
     ImageInput,
     PILImageResampling,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging
 
@@ -440,29 +436,9 @@ def _preprocess_image(
         input_data_format: Optional[ChannelDimension] = None,
     ) -> np.ndarray:
         """Preprocesses a single image."""
-
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_center_crop=do_center_crop,
-            crop_size=crop_size,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         # All transformations expect numpy arrays.
         image = to_numpy_array(image)
 
-        if is_scaled_image(image) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(image)
@@ -672,14 +648,6 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-
         processed_images = [
             self._preprocess_image(
                 image=img,
diff --git a/src/transformers/models/fuyu/image_processing_fuyu.py b/src/transformers/models/fuyu/image_processing_fuyu.py
index ec5e1a36abb75c..c0b673f7460a34 100644
--- a/src/transformers/models/fuyu/image_processing_fuyu.py
+++ b/src/transformers/models/fuyu/image_processing_fuyu.py
@@ -19,7 +19,7 @@
 
 import numpy as np
 
-from ...image_processing_utils import BaseImageProcessor, BatchFeature
+from ...image_processing_utils import BaseImageProcessor, BatchFeature, validate_preprocess_arguments
 from ...image_transforms import (
     pad,
     resize,
@@ -35,7 +35,7 @@
     is_valid_image,
     make_list_of_images,
     to_numpy_array,
-    validate_preprocess_arguments,
+    valid_images,
 )
 from ...utils import (
     TensorType,
@@ -280,6 +280,38 @@ def __init__(
             "input_data_format",
         ]
 
+    def _validate_image_inputs(self, images, segmentation_maps=None, do_rescale=False):
+        if isinstance(images, list) and any(isinstance(elem, list) and len(elem) >= 2 for elem in images):
+            raise ValueError("Multiple images for a single sample are not yet supported.")
+
+        if not valid_images(images[0]):
+            raise ValueError(
+                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray."
+            )
+
+        if is_scaled_image(to_numpy_array(images[0][0])) and do_rescale:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
+
+    def _validate_preprocess_arguments(
+        self, do_rescale, rescale_factor, do_normalize, image_mean, image_std, do_pad, size, do_resize, resample
+    ):
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_pad=do_pad,
+            # There is no pad divisibility in this processor, but pad requires the size arg.
+            size_divisibility=size,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )
+
     def resize(
         self,
         image: np.ndarray,
@@ -460,32 +492,11 @@ def preprocess(
         rescale_factor = rescale_factor if rescale_factor is not None else self.rescale_factor
         patch_size = patch_size if patch_size is not None else self.patch_size
 
-        if isinstance(images, list) and any(isinstance(elem, list) and len(elem) >= 2 for elem in images):
-            raise ValueError("Multiple images for a single sample are not yet supported.")
-
         batch_images = make_list_of_list_of_images(images)
 
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_pad=do_pad,
-            size_divisibility=size,  # There is no pad divisibility in this processor, but pad requires the size arg.
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
         # All transformations expect numpy arrays.
         batch_images = [[to_numpy_array(image) for image in images] for images in batch_images]
 
-        if is_scaled_image(batch_images[0][0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(batch_images[0][0])
diff --git a/src/transformers/models/glpn/image_processing_glpn.py b/src/transformers/models/glpn/image_processing_glpn.py
index 7577b4eeb3d0c2..26d086f2c6a873 100644
--- a/src/transformers/models/glpn/image_processing_glpn.py
+++ b/src/transformers/models/glpn/image_processing_glpn.py
@@ -19,19 +19,15 @@
 import numpy as np
 import PIL.Image
 
-from ...image_processing_utils import BaseImageProcessor, BatchFeature
+from ...image_processing_utils import BaseImageProcessor, BatchFeature, validate_preprocess_arguments
 from ...image_transforms import resize, to_channel_dimension_format
 from ...image_utils import (
     ChannelDimension,
     PILImageResampling,
     get_image_size,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, logging
 
@@ -83,6 +79,15 @@ def __init__(
             "input_data_format",
         ]
 
+    def _validate_preprocess_arguments(self, do_resize, size_divisor, resample):
+        # rescale() uses a constant rescale_factor. It does not need to be validated
+        # with a rescale_factor.
+        validate_preprocess_arguments(
+            do_resize=do_resize,
+            size=size_divisor,  # size_divisor is used as a parameter for optimal resizing instead of size.
+            resample=resample,
+        )
+
     def resize(
         self,
         image: np.ndarray,
@@ -187,31 +192,9 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-
-        # Here, the rescale() method uses a constant rescale_factor. It does not need to be validated
-        # with a rescale_factor.
-        validate_preprocess_arguments(
-            do_resize=do_resize,
-            size=size_divisor,  # Here, size_divisor is used as a parameter for optimal resizing instead of size.
-            resample=resample,
-        )
-
         # All transformations expect numpy arrays.
         images = [to_numpy_array(img) for img in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/grounding_dino/image_processing_grounding_dino.py b/src/transformers/models/grounding_dino/image_processing_grounding_dino.py
index 8b39d6801ca000..908448ab104a00 100644
--- a/src/transformers/models/grounding_dino/image_processing_grounding_dino.py
+++ b/src/transformers/models/grounding_dino/image_processing_grounding_dino.py
@@ -22,7 +22,7 @@
 import numpy as np
 
 from ...feature_extraction_utils import BatchFeature
-from ...image_processing_utils import BaseImageProcessor, get_size_dict
+from ...image_processing_utils import BaseImageProcessor, get_size_dict, validate_preprocess_arguments
 from ...image_transforms import (
     PaddingMode,
     center_to_corners_format,
@@ -42,13 +42,9 @@
     PILImageResampling,
     get_image_size,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
     validate_annotations,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import (
     ExplicitEnum,
@@ -872,6 +868,21 @@ def __init__(
             "input_data_format",
         ]
 
+    def _validate_preprocess_arguments(
+        self, do_rescale, rescale_factor, do_normalize, image_mean, image_std, do_resize, size, resample
+    ):
+        # pad() method pads to the maximum of (width, height). It does not need to be validated.
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )
+
     @classmethod
     # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.from_dict with Detr->GroundingDino
     def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs):
@@ -1320,25 +1331,6 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        # Here, the pad() method pads to the maximum of (width, height). It does not need to be validated.
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         if annotations is not None and isinstance(annotations, dict):
             annotations = [annotations]
 
@@ -1364,12 +1356,6 @@ def preprocess(
         # All transformations expect numpy arrays
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/idefics2/image_processing_idefics2.py b/src/transformers/models/idefics2/image_processing_idefics2.py
index ac9df68871eee2..3a530af00b4f1c 100644
--- a/src/transformers/models/idefics2/image_processing_idefics2.py
+++ b/src/transformers/models/idefics2/image_processing_idefics2.py
@@ -32,7 +32,6 @@
     is_valid_image,
     to_numpy_array,
     valid_images,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging
 
@@ -246,6 +245,19 @@ def __init__(
         self.do_pad = do_pad
         self.do_image_splitting = do_image_splitting
 
+    def _validate_image_inputs(self, images, segmentation_maps=None, do_rescale=False):
+        if not valid_images(images[0]):
+            raise ValueError(
+                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
+                "torch.Tensor, tf.Tensor or jax.ndarray."
+            )
+
+        if is_scaled_image(to_numpy_array(images[0][0])) and do_rescale:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
+
     def resize(
         self,
         image: np.ndarray,
@@ -505,35 +517,12 @@ def preprocess(
 
         images_list = make_list_of_images(images)
 
-        if not valid_images(images_list[0]):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         if do_convert_rgb:
             images_list = [[convert_to_rgb(image) for image in images] for images in images_list]
 
         # All transformations expect numpy arrays.
         images_list = [[to_numpy_array(image) for image in images] for images in images_list]
 
-        if is_scaled_image(images_list[0][0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images_list[0][0])
diff --git a/src/transformers/models/imagegpt/image_processing_imagegpt.py b/src/transformers/models/imagegpt/image_processing_imagegpt.py
index fecdd061d4e40e..39c828c3549a0b 100644
--- a/src/transformers/models/imagegpt/image_processing_imagegpt.py
+++ b/src/transformers/models/imagegpt/image_processing_imagegpt.py
@@ -18,19 +18,15 @@
 
 import numpy as np
 
-from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict
+from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict, validate_preprocess_arguments
 from ...image_transforms import rescale, resize, to_channel_dimension_format
 from ...image_utils import (
     ChannelDimension,
     ImageInput,
     PILImageResampling,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging
 
@@ -116,6 +112,11 @@ def __init__(
             "input_data_format",
         ]
 
+    def _validate_preprocess_arguments(self, do_resize, size, resample):
+        # normalize() uses a constant factor to divide pixel values.
+        # hence, the method does not need iamge_mean and image_std.
+        validate_preprocess_arguments(do_resize=do_resize, size=size, resample=resample)
+
     # Copied from transformers.models.vit.image_processing_vit.ViTImageProcessor.resize
     def resize(
         self,
@@ -251,34 +252,12 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-
-        # Here, normalize() is using a constant factor to divide pixel values.
-        # hence, the method does not need iamge_mean and image_std.
-        validate_preprocess_arguments(
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         if do_color_quantize and clusters is None:
             raise ValueError("Clusters must be specified if do_color_quantize is True.")
 
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_normalize:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If you wish to do this, "
-                "make sure to set `do_normalize` to `False` and that pixel values are between [-1, 1].",
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py b/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py
index e2369911941388..141e0050f42271 100644
--- a/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py
+++ b/src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py
@@ -27,9 +27,6 @@
     infer_channel_dimension_format,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_pytesseract_available, is_vision_available, logging, requires_backends
 
@@ -257,19 +254,6 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_preprocess_arguments(
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
diff --git a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py
index 8c5356993f16be..8fb77678a1da4b 100644
--- a/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py
+++ b/src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py
@@ -27,12 +27,8 @@
     ImageInput,
     PILImageResampling,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_pytesseract_available, is_vision_available, logging, requires_backends
 
@@ -316,33 +312,9 @@ def preprocess(
         tesseract_config = tesseract_config if tesseract_config is not None else self.tesseract_config
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/levit/image_processing_levit.py b/src/transformers/models/levit/image_processing_levit.py
index b861a4ebf8b2dc..dead9ed24d8305 100644
--- a/src/transformers/models/levit/image_processing_levit.py
+++ b/src/transformers/models/levit/image_processing_levit.py
@@ -31,12 +31,8 @@
     ImageInput,
     PILImageResampling,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, logging
 
@@ -271,34 +267,9 @@ def preprocess(
         crop_size = get_size_dict(crop_size, param_name="crop_size")
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_center_crop=do_center_crop,
-            crop_size=crop_size,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/llava_next/image_processing_llava_next.py b/src/transformers/models/llava_next/image_processing_llava_next.py
index 3934927a2e7957..89a840e816e26c 100644
--- a/src/transformers/models/llava_next/image_processing_llava_next.py
+++ b/src/transformers/models/llava_next/image_processing_llava_next.py
@@ -35,11 +35,8 @@
     PILImageResampling,
     get_image_size,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging
 
@@ -534,37 +531,12 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_center_crop=do_center_crop,
-            crop_size=crop_size,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         if do_convert_rgb:
             images = [convert_to_rgb(image) for image in images]
 
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/mask2former/image_processing_mask2former.py b/src/transformers/models/mask2former/image_processing_mask2former.py
index 5440584d25f28f..7643e7c3522c1f 100644
--- a/src/transformers/models/mask2former/image_processing_mask2former.py
+++ b/src/transformers/models/mask2former/image_processing_mask2former.py
@@ -36,11 +36,7 @@
     get_image_size,
     infer_channel_dimension_format,
     is_batched,
-    is_scaled_image,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import (
     IMAGENET_DEFAULT_MEAN,
@@ -627,11 +623,6 @@ def _preprocess_image(
         """Preprocesses a single image."""
         # All transformations expect numpy arrays.
         image = to_numpy_array(image)
-        if is_scaled_image(image) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
         if input_data_format is None:
             input_data_format = infer_channel_dimension_format(image)
         image = self._preprocess(
@@ -728,31 +719,6 @@ def preprocess(
         ignore_index = ignore_index if ignore_index is not None else self.ignore_index
         reduce_labels = reduce_labels if reduce_labels is not None else self.reduce_labels
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
-        if segmentation_maps is not None and not valid_images(segmentation_maps):
-            raise ValueError(
-                "Invalid segmentation map type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-
         if not is_batched(images):
             images = [images]
             segmentation_maps = [segmentation_maps] if segmentation_maps is not None else None
diff --git a/src/transformers/models/maskformer/image_processing_maskformer.py b/src/transformers/models/maskformer/image_processing_maskformer.py
index 3c854b35c76edb..5ae92b36fefd31 100644
--- a/src/transformers/models/maskformer/image_processing_maskformer.py
+++ b/src/transformers/models/maskformer/image_processing_maskformer.py
@@ -35,12 +35,8 @@
     PILImageResampling,
     get_image_size,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import (
     IMAGENET_DEFAULT_MEAN,
@@ -634,11 +630,6 @@ def _preprocess_image(
         """Preprocesses a single image."""
         # All transformations expect numpy arrays.
         image = to_numpy_array(image)
-        if is_scaled_image(image) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
         if input_data_format is None:
             input_data_format = infer_channel_dimension_format(image)
         image = self._preprocess(
@@ -745,30 +736,6 @@ def preprocess(
         ignore_index = ignore_index if ignore_index is not None else self.ignore_index
         do_reduce_labels = do_reduce_labels if do_reduce_labels is not None else self.do_reduce_labels
 
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
-        if segmentation_maps is not None and not valid_images(segmentation_maps):
-            raise ValueError(
-                "Invalid segmentation map type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-
         images = make_list_of_images(images)
         if segmentation_maps is not None:
             segmentation_maps = make_list_of_images(segmentation_maps, expected_ndims=2)
diff --git a/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py b/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py
index 086ab892492065..bf69c306f91c47 100644
--- a/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py
+++ b/src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py
@@ -31,12 +31,8 @@
     ImageInput,
     PILImageResampling,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, logging
 
@@ -262,35 +258,9 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_center_crop=do_center_crop,
-            crop_size=crop_size,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py b/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py
index 44b784d2a7c3b8..17971fde6ecff2 100644
--- a/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py
+++ b/src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py
@@ -31,12 +31,8 @@
     ImageInput,
     PILImageResampling,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_torch_available, is_torch_tensor, logging
 
@@ -266,34 +262,9 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_center_crop=do_center_crop,
-            crop_size=crop_size,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/mobilevit/image_processing_mobilevit.py b/src/transformers/models/mobilevit/image_processing_mobilevit.py
index 8cc79a283e05af..13d8297bb3cb8a 100644
--- a/src/transformers/models/mobilevit/image_processing_mobilevit.py
+++ b/src/transformers/models/mobilevit/image_processing_mobilevit.py
@@ -25,12 +25,8 @@
     ImageInput,
     PILImageResampling,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_torch_available, is_torch_tensor, is_vision_available, logging
 
@@ -243,11 +239,6 @@ def _preprocess_image(
         """Preprocesses a single image."""
         # All transformations expect numpy arrays.
         image = to_numpy_array(image)
-        if is_scaled_image(image) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
         if input_data_format is None:
             input_data_format = infer_channel_dimension_format(image)
 
@@ -383,35 +374,11 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
         if segmentation_maps is not None:
             segmentation_maps = make_list_of_images(segmentation_maps, expected_ndims=2)
 
         images = make_list_of_images(images)
 
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-
-        if segmentation_maps is not None and not valid_images(segmentation_maps):
-            raise ValueError(
-                "Invalid segmentation map type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_center_crop=do_center_crop,
-            crop_size=crop_size,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         images = [
             self._preprocess_image(
                 image=img,
diff --git a/src/transformers/models/nougat/image_processing_nougat.py b/src/transformers/models/nougat/image_processing_nougat.py
index 49913d5baa080b..6a456b5f5b942f 100644
--- a/src/transformers/models/nougat/image_processing_nougat.py
+++ b/src/transformers/models/nougat/image_processing_nougat.py
@@ -18,7 +18,7 @@
 
 import numpy as np
 
-from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict
+from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict, validate_preprocess_arguments
 from ...image_transforms import (
     get_resize_output_image_size,
     pad,
@@ -34,12 +34,8 @@
     PILImageResampling,
     get_image_size,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, logging
 from ...utils.import_utils import is_cv2_available, is_vision_available
@@ -145,6 +141,33 @@ def __init__(
             "input_data_format",
         ]
 
+    def _validate_preprocess_arguments(
+        self,
+        do_rescale,
+        rescale_factor,
+        do_normalize,
+        image_mean,
+        image_std,
+        do_pad,
+        size_divisibility,
+        do_resize,
+        size,
+        resample,
+    ):
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_pad=do_pad,
+            # There is no pad divisibility in this processor, but pad requires the size arg.
+            size_divisibility=size,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )
+
     def python_find_non_zero(self, image: np.array):
         """This is a reimplementation of a findNonZero function equivalent to cv2."""
         non_zero_indices = np.column_stack(np.nonzero(image))
@@ -461,35 +484,9 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_pad=do_pad,
-            size_divisibility=size,  # There is no pad divisibility in this processor, but pad requires the size arg.
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/oneformer/image_processing_oneformer.py b/src/transformers/models/oneformer/image_processing_oneformer.py
index 9f865f8efd9b94..0a285af71822c9 100644
--- a/src/transformers/models/oneformer/image_processing_oneformer.py
+++ b/src/transformers/models/oneformer/image_processing_oneformer.py
@@ -38,12 +38,8 @@
     PILImageResampling,
     get_image_size,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import (
     IMAGENET_DEFAULT_MEAN,
@@ -616,11 +612,6 @@ def _preprocess_image(
         """Preprocesses a single image."""
         # All transformations expect numpy arrays.
         image = to_numpy_array(image)
-        if is_scaled_image(image) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
         if input_data_format is None:
             input_data_format = infer_channel_dimension_format(image)
         image = self._preprocess(
@@ -729,31 +720,6 @@ def preprocess(
         ignore_index = ignore_index if ignore_index is not None else self.ignore_index
         do_reduce_labels = do_reduce_labels if do_reduce_labels is not None else self.do_reduce_labels
 
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
-        if segmentation_maps is not None and not valid_images(segmentation_maps):
-            raise ValueError(
-                "Invalid segmentation map type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-
         images = make_list_of_images(images)
         if segmentation_maps is not None:
             segmentation_maps = make_list_of_images(segmentation_maps, expected_ndims=2)
diff --git a/src/transformers/models/owlv2/image_processing_owlv2.py b/src/transformers/models/owlv2/image_processing_owlv2.py
index 06ac984c7d866e..e370475d25e93e 100644
--- a/src/transformers/models/owlv2/image_processing_owlv2.py
+++ b/src/transformers/models/owlv2/image_processing_owlv2.py
@@ -19,7 +19,7 @@
 
 import numpy as np
 
-from ...image_processing_utils import BaseImageProcessor, BatchFeature
+from ...image_processing_utils import BaseImageProcessor, BatchFeature, validate_preprocess_arguments
 from ...image_transforms import (
     center_to_corners_format,
     pad,
@@ -33,12 +33,8 @@
     PILImageResampling,
     get_image_size,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import (
     TensorType,
@@ -248,6 +244,20 @@ def __init__(
             "input_data_format",
         ]
 
+    def _validate_preprocess_arguments(self, do_rescale, rescale_factor, do_normalize, image_mean, image_std, size):
+        # Here, pad and resize methods are different from the rest of image processors
+        # as they don't have any resampling in resize()
+        # or pad size in pad() (the maximum of (height, width) is taken instead).
+        # hence, these arguments don't need to be passed in validate_preprocess_arguments.
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            size=size,
+        )
+
     def pad(
         self,
         image: np.array,
@@ -416,35 +426,9 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        # Here, pad and resize methods are different from the rest of image processors
-        # as they don't have any resampling in resize()
-        # or pad size in pad() (the maximum of (height, width) is taken instead).
-        # hence, these arguments don't need to be passed in validate_preprocess_arguments.
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            size=size,
-        )
-
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/owlvit/image_processing_owlvit.py b/src/transformers/models/owlvit/image_processing_owlvit.py
index 25ea5f2720d527..b3a92a098cfd28 100644
--- a/src/transformers/models/owlvit/image_processing_owlvit.py
+++ b/src/transformers/models/owlvit/image_processing_owlvit.py
@@ -34,12 +34,8 @@
     ImageInput,
     PILImageResampling,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_torch_available, logging
 
@@ -368,35 +364,9 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_center_crop=do_center_crop,
-            crop_size=crop_size,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         # All transformations expect numpy arrays
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/perceiver/image_processing_perceiver.py b/src/transformers/models/perceiver/image_processing_perceiver.py
index 02dd527e437be7..62aa61442f888b 100644
--- a/src/transformers/models/perceiver/image_processing_perceiver.py
+++ b/src/transformers/models/perceiver/image_processing_perceiver.py
@@ -28,12 +28,8 @@
     PILImageResampling,
     get_image_size,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging
 
@@ -303,35 +299,9 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_center_crop=do_center_crop,
-            crop_size=crop_size,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/poolformer/image_processing_poolformer.py b/src/transformers/models/poolformer/image_processing_poolformer.py
index dcdb1591b1c31b..daa23793005ef2 100644
--- a/src/transformers/models/poolformer/image_processing_poolformer.py
+++ b/src/transformers/models/poolformer/image_processing_poolformer.py
@@ -31,12 +31,8 @@
     ImageInput,
     PILImageResampling,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging
 
@@ -311,35 +307,9 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_center_crop=do_center_crop,
-            crop_size=crop_size,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/pvt/image_processing_pvt.py b/src/transformers/models/pvt/image_processing_pvt.py
index f3907edf3af093..7807d63f23e206 100644
--- a/src/transformers/models/pvt/image_processing_pvt.py
+++ b/src/transformers/models/pvt/image_processing_pvt.py
@@ -27,12 +27,8 @@
     ImageInput,
     PILImageResampling,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, logging
 
@@ -233,33 +229,9 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/sam/image_processing_sam.py b/src/transformers/models/sam/image_processing_sam.py
index ccdc72fc7baadb..c67f306b42ff0a 100644
--- a/src/transformers/models/sam/image_processing_sam.py
+++ b/src/transformers/models/sam/image_processing_sam.py
@@ -20,7 +20,7 @@
 
 import numpy as np
 
-from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict
+from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict, validate_preprocess_arguments
 from ...image_transforms import convert_to_rgb, pad, resize, to_channel_dimension_format
 from ...image_utils import (
     IMAGENET_DEFAULT_MEAN,
@@ -30,12 +30,8 @@
     PILImageResampling,
     get_image_size,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import (
     TensorType,
@@ -182,6 +178,33 @@ def __init__(
             "input_data_format",
         ]
 
+    def _validate_preprocess_arguments(
+        self,
+        do_rescale,
+        rescale_factor,
+        do_normalize,
+        image_mean,
+        image_std,
+        do_pad,
+        size_divisibility,
+        do_resize,
+        size,
+        resample,
+        pad_size,
+    ):
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_pad=do_pad,
+            size_divisibility=pad_size,  # _preprocess needs do_pad and pad_size.
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )
+
     def pad_image(
         self,
         image: np.ndarray,
@@ -335,12 +358,6 @@ def _preprocess_image(
         # All transformations expect numpy arrays.
         image = to_numpy_array(image)
 
-        if is_scaled_image(image) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             input_data_format = infer_channel_dimension_format(image)
 
@@ -512,35 +529,9 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-
         if segmentation_maps is not None:
             segmentation_maps = make_list_of_images(segmentation_maps, expected_ndims=2)
 
-            if not valid_images(segmentation_maps):
-                raise ValueError(
-                    "Invalid segmentation map type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                    "torch.Tensor, tf.Tensor or jax.ndarray."
-                )
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_pad=do_pad,
-            size_divisibility=pad_size,  # Here _preprocess needs do_pad and pad_size.
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         images, original_sizes, reshaped_input_sizes = zip(
             *(
                 self._preprocess_image(
diff --git a/src/transformers/models/segformer/image_processing_segformer.py b/src/transformers/models/segformer/image_processing_segformer.py
index 1fef9645549878..acd9f99790e073 100644
--- a/src/transformers/models/segformer/image_processing_segformer.py
+++ b/src/transformers/models/segformer/image_processing_segformer.py
@@ -28,12 +28,8 @@
     ImageInput,
     PILImageResampling,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_torch_available, is_torch_tensor, is_vision_available, logging
 
@@ -251,11 +247,6 @@ def _preprocess_image(
         """Preprocesses a single image."""
         # All transformations expect numpy arrays.
         image = to_numpy_array(image)
-        if is_scaled_image(image) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
         if input_data_format is None:
             input_data_format = infer_channel_dimension_format(image)
         image = self._preprocess(
@@ -398,27 +389,9 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
         if segmentation_maps is not None:
             segmentation_maps = make_list_of_images(segmentation_maps, expected_ndims=2)
 
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         images = [
             self._preprocess_image(
                 image=img,
diff --git a/src/transformers/models/seggpt/image_processing_seggpt.py b/src/transformers/models/seggpt/image_processing_seggpt.py
index 80fb94cdc7aaf4..6456702b08ce14 100644
--- a/src/transformers/models/seggpt/image_processing_seggpt.py
+++ b/src/transformers/models/seggpt/image_processing_seggpt.py
@@ -28,10 +28,8 @@
     PILImageResampling,
     get_channel_dimension_axis,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
 )
 from ...utils import TensorType, is_torch_available, logging, requires_backends
 
@@ -350,30 +348,9 @@ def _preprocess_step(
 
         images = make_list_of_images(images)
 
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-
-        if do_resize and size is None:
-            raise ValueError("Size must be specified if do_resize is True.")
-
-        if do_rescale and rescale_factor is None:
-            raise ValueError("Rescale factor must be specified if do_rescale is True.")
-
-        if do_normalize and (image_mean is None or image_std is None):
-            raise ValueError("Image mean and std must be specified if do_normalize is True.")
-
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None and not is_mask:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/siglip/image_processing_siglip.py b/src/transformers/models/siglip/image_processing_siglip.py
index 5f24ffb0a2a8b1..6d2668a3442302 100644
--- a/src/transformers/models/siglip/image_processing_siglip.py
+++ b/src/transformers/models/siglip/image_processing_siglip.py
@@ -28,12 +28,8 @@
     ImageInput,
     PILImageResampling,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging
 
@@ -189,32 +185,9 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/superpoint/image_processing_superpoint.py b/src/transformers/models/superpoint/image_processing_superpoint.py
index fbbb717570cb70..5bddd2b8213385 100644
--- a/src/transformers/models/superpoint/image_processing_superpoint.py
+++ b/src/transformers/models/superpoint/image_processing_superpoint.py
@@ -24,7 +24,6 @@
     ChannelDimension,
     ImageInput,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
     valid_images,
@@ -235,12 +234,6 @@ def preprocess(
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/swin2sr/image_processing_swin2sr.py b/src/transformers/models/swin2sr/image_processing_swin2sr.py
index a126e6eee5e8d4..a1d169daa6c6b5 100644
--- a/src/transformers/models/swin2sr/image_processing_swin2sr.py
+++ b/src/transformers/models/swin2sr/image_processing_swin2sr.py
@@ -18,18 +18,14 @@
 
 import numpy as np
 
-from ...image_processing_utils import BaseImageProcessor, BatchFeature
+from ...image_processing_utils import BaseImageProcessor, BatchFeature, validate_preprocess_arguments
 from ...image_transforms import get_image_size, pad, to_channel_dimension_format
 from ...image_utils import (
     ChannelDimension,
     ImageInput,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, logging
 
@@ -77,6 +73,14 @@ def __init__(
             "input_data_format",
         ]
 
+    def _validate_preprocess_arguments(self, do_rescale, rescale_factor, do_pad, size_divisibility, pad_size):
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_pad=do_pad,
+            size_divisibility=pad_size,  # pad function simply requires pad_size.
+        )
+
     def pad(
         self,
         image: np.ndarray,
@@ -172,29 +176,9 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_pad=do_pad,
-            size_divisibility=pad_size,  # Here the pad function simply requires pad_size.
-        )
-
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/tvlt/image_processing_tvlt.py b/src/transformers/models/tvlt/image_processing_tvlt.py
index f13101c15a9615..8793dae7873e11 100644
--- a/src/transformers/models/tvlt/image_processing_tvlt.py
+++ b/src/transformers/models/tvlt/image_processing_tvlt.py
@@ -30,12 +30,8 @@
     ImageInput,
     PILImageResampling,
     infer_channel_dimension_format,
-    is_scaled_image,
     is_valid_image,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, logging
 
@@ -233,29 +229,9 @@ def _preprocess_image(
         input_data_format: Optional[Union[str, ChannelDimension]] = None,
     ) -> np.ndarray:
         """Preprocesses a single image."""
-
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_center_crop=do_center_crop,
-            crop_size=crop_size,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         # All transformations expect numpy arrays.
         image = to_numpy_array(image)
 
-        if is_scaled_image(image) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             input_data_format = infer_channel_dimension_format(image)
 
@@ -377,14 +353,6 @@ def preprocess(
         patch_size = patch_size if patch_size is not None else self.patch_size
         num_frames = num_frames if patch_size is not None else self.num_frames
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(videos):
-            raise ValueError(
-                "Invalid image or video type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-
         videos = make_batched(videos)
 
         # Check number of frames is fewer than maximum frames
diff --git a/src/transformers/models/tvp/image_processing_tvp.py b/src/transformers/models/tvp/image_processing_tvp.py
index 18600ee5fbe7f3..51356df67d525a 100644
--- a/src/transformers/models/tvp/image_processing_tvp.py
+++ b/src/transformers/models/tvp/image_processing_tvp.py
@@ -18,7 +18,7 @@
 
 import numpy as np
 
-from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict
+from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict, validate_preprocess_arguments
 from ...image_transforms import (
     PaddingMode,
     flip_channel_order,
@@ -33,11 +33,10 @@
     ImageInput,
     PILImageResampling,
     get_image_size,
+    is_scaled_image,
     is_valid_image,
     to_numpy_array,
     valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging
 
@@ -195,6 +194,49 @@ def __init__(
             "input_data_format",
         ]
 
+    def _validate_image_inputs(self, images, segmentation_maps=None, do_rescale=False):
+        if not valid_images(images[0]):
+            raise ValueError(
+                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
+                "torch.Tensor, tf.Tensor or jax.ndarray."
+            )
+
+        if is_scaled_image(to_numpy_array(images[0][0])) and do_rescale:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
+
+    def _validate_preprocess_arguments(
+        self,
+        do_rescale,
+        rescale_factor,
+        do_normalize,
+        image_mean,
+        image_std,
+        do_pad,
+        pad_size,
+        do_center_crop,
+        crop_size,
+        do_resize,
+        size,
+        resample,
+    ):
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_pad=do_pad,
+            size_divisibility=pad_size,  # pad() method simply requires the pad_size argument.
+            do_center_crop=do_center_crop,
+            crop_size=crop_size,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )
+
     def resize(
         self,
         image: np.ndarray,
@@ -309,21 +351,6 @@ def _preprocess_image(
     ) -> np.ndarray:
         """Preprocesses a single image."""
 
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_pad=do_pad,
-            size_divisibility=pad_size,  # here the pad() method simply requires the pad_size argument.
-            do_center_crop=do_center_crop,
-            crop_size=crop_size,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         # All transformations expect numpy arrays.
         image = to_numpy_array(image)
 
@@ -459,14 +486,6 @@ def preprocess(
         crop_size = crop_size if crop_size is not None else self.crop_size
         crop_size = get_size_dict(crop_size, param_name="crop_size")
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(videos):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-
         videos = make_batched(videos)
 
         videos = [
diff --git a/src/transformers/models/videomae/image_processing_videomae.py b/src/transformers/models/videomae/image_processing_videomae.py
index 6563d69c6503ea..06ce8f2413507e 100644
--- a/src/transformers/models/videomae/image_processing_videomae.py
+++ b/src/transformers/models/videomae/image_processing_videomae.py
@@ -35,8 +35,6 @@
     is_valid_image,
     to_numpy_array,
     valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging
 
@@ -209,19 +207,6 @@ def _preprocess_image(
         input_data_format: Optional[Union[str, ChannelDimension]] = None,
     ) -> np.ndarray:
         """Preprocesses a single image."""
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_center_crop=do_center_crop,
-            crop_size=crop_size,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         # All transformations expect numpy arrays.
         image = to_numpy_array(image)
 
@@ -328,8 +313,6 @@ def preprocess(
         crop_size = crop_size if crop_size is not None else self.crop_size
         crop_size = get_size_dict(crop_size, param_name="crop_size")
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
         if not valid_images(videos):
             raise ValueError(
                 "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
diff --git a/src/transformers/models/vilt/image_processing_vilt.py b/src/transformers/models/vilt/image_processing_vilt.py
index 42e5b3f439d6aa..4715db0c4c0471 100644
--- a/src/transformers/models/vilt/image_processing_vilt.py
+++ b/src/transformers/models/vilt/image_processing_vilt.py
@@ -18,7 +18,7 @@
 
 import numpy as np
 
-from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict
+from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict, validate_preprocess_arguments
 from ...image_transforms import PaddingMode, pad, resize, to_channel_dimension_format
 from ...image_utils import (
     IMAGENET_STANDARD_MEAN,
@@ -28,12 +28,8 @@
     PILImageResampling,
     get_image_size,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging
 
@@ -209,6 +205,22 @@ def __init__(
             "input_data_format",
         ]
 
+    def _validate_preprocess_arguments(
+        self, do_rescale, rescale_factor, do_normalize, image_mean, image_std, do_resize, size, resample
+    ):
+        # pad() method does not require any additional argument as it takes the maximum of (height, width).
+        # Hence, it does not need to be passed to a validate_preprocess_arguments() method.
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )
+
     @classmethod
     def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs):
         """
@@ -433,36 +445,9 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-
-        # Here the pad() method does not require any additional argument as it takes the maximum of (height, width).
-        # Hence, it does not need to be passed to a validate_preprocess_arguments() method.
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/vit/image_processing_vit.py b/src/transformers/models/vit/image_processing_vit.py
index 4c7d8de714f72d..7767c9866b176e 100644
--- a/src/transformers/models/vit/image_processing_vit.py
+++ b/src/transformers/models/vit/image_processing_vit.py
@@ -27,12 +27,8 @@
     ImageInput,
     PILImageResampling,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, logging
 
@@ -232,33 +228,9 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/vit_hybrid/image_processing_vit_hybrid.py b/src/transformers/models/vit_hybrid/image_processing_vit_hybrid.py
index 4bb3f70b49bb5b..e3c50182a5deed 100644
--- a/src/transformers/models/vit_hybrid/image_processing_vit_hybrid.py
+++ b/src/transformers/models/vit_hybrid/image_processing_vit_hybrid.py
@@ -32,12 +32,8 @@
     ImageInput,
     PILImageResampling,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, is_vision_available, logging
 
@@ -276,26 +272,6 @@ def preprocess(
 
         images = make_list_of_images(images)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_center_crop=do_center_crop,
-            crop_size=crop_size,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         # PIL RGBA images are converted to RGB
         if do_convert_rgb:
             images = [convert_to_rgb(image) for image in images]
@@ -303,12 +279,6 @@ def preprocess(
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/vitmatte/image_processing_vitmatte.py b/src/transformers/models/vitmatte/image_processing_vitmatte.py
index d7310bc0dd26ba..b945484fefb718 100644
--- a/src/transformers/models/vitmatte/image_processing_vitmatte.py
+++ b/src/transformers/models/vitmatte/image_processing_vitmatte.py
@@ -27,12 +27,9 @@
     ImageInput,
     get_image_size,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
     valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import TensorType, logging
 
@@ -213,39 +210,16 @@ def preprocess(
         images = make_list_of_images(images)
         trimaps = make_list_of_images(trimaps, expected_ndims=2)
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
         if not valid_images(trimaps):
             raise ValueError(
                 "Invalid trimap type. Must be of type PIL.Image.Image, numpy.ndarray, "
                 "torch.Tensor, tf.Tensor or jax.ndarray."
             )
 
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_pad=do_pad,
-            size_divisibility=size_divisibility,
-        )
-
         # All transformations expect numpy arrays.
         images = [to_numpy_array(image) for image in images]
         trimaps = [to_numpy_array(trimap) for trimap in trimaps]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])
diff --git a/src/transformers/models/vivit/image_processing_vivit.py b/src/transformers/models/vivit/image_processing_vivit.py
index 9b62aedc234e88..cd019d2e5a5401 100644
--- a/src/transformers/models/vivit/image_processing_vivit.py
+++ b/src/transformers/models/vivit/image_processing_vivit.py
@@ -38,8 +38,6 @@
     is_valid_image,
     to_numpy_array,
     valid_images,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import logging
 
@@ -156,6 +154,47 @@ def __init__(
             "input_data_format",
         ]
 
+    def _validate_image_inputs(self, images, segmentation_maps=None, do_rescale=False):
+        if not valid_images(images[0]):
+            raise ValueError(
+                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray."
+            )
+
+        if is_scaled_image(to_numpy_array(images[0][0])) and do_rescale:
+            logger.warning_once(
+                "It looks like you are trying to rescale already rescaled images. If the input"
+                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
+            )
+
+    def _validate_preprocess_arguments(
+        self,
+        do_rescale,
+        rescale_factor,
+        do_normalize,
+        image_mean,
+        image_std,
+        do_center_crop,
+        crop_size,
+        do_resize,
+        size,
+        resample,
+        offset,
+    ):
+        super()._validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_center_crop=do_center_crop,
+            crop_size=crop_size,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )
+        if offset and not do_rescale:
+            raise ValueError("For offset, do_rescale must also be set to True.")
+
     def resize(
         self,
         image: np.ndarray,
@@ -260,31 +299,9 @@ def _preprocess_image(
     ) -> np.ndarray:
         """Preprocesses a single image."""
 
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_center_crop=do_center_crop,
-            crop_size=crop_size,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
-        if offset and not do_rescale:
-            raise ValueError("For offset, do_rescale must also be set to True.")
-
         # All transformations expect numpy arrays.
         image = to_numpy_array(image)
 
-        if is_scaled_image(image) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             input_data_format = infer_channel_dimension_format(image)
 
@@ -386,14 +403,6 @@ def preprocess(
         crop_size = crop_size if crop_size is not None else self.crop_size
         crop_size = get_size_dict(crop_size, param_name="crop_size")
 
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
-
-        if not valid_images(videos):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-
         videos = make_batched(videos)
 
         videos = [
diff --git a/src/transformers/models/yolos/image_processing_yolos.py b/src/transformers/models/yolos/image_processing_yolos.py
index b74819c7a1c91b..04d430298b72e8 100644
--- a/src/transformers/models/yolos/image_processing_yolos.py
+++ b/src/transformers/models/yolos/image_processing_yolos.py
@@ -20,7 +20,7 @@
 import numpy as np
 
 from ...feature_extraction_utils import BatchFeature
-from ...image_processing_utils import BaseImageProcessor, get_size_dict
+from ...image_processing_utils import BaseImageProcessor, get_size_dict, validate_preprocess_arguments
 from ...image_transforms import (
     PaddingMode,
     center_to_corners_format,
@@ -42,13 +42,9 @@
     PILImageResampling,
     get_image_size,
     infer_channel_dimension_format,
-    is_scaled_image,
     make_list_of_images,
     to_numpy_array,
-    valid_images,
     validate_annotations,
-    validate_kwargs,
-    validate_preprocess_arguments,
 )
 from ...utils import (
     TensorType,
@@ -772,6 +768,21 @@ def __init__(
             "input_data_format",
         ]
 
+    def _validate_preprocess_arguments(
+        self, do_rescale, rescale_factor, do_normalize, image_mean, image_std, do_resize, size, resample
+    ):
+        # Here the pad() method pads using the max of (width, height) and does not need to be validated.
+        validate_preprocess_arguments(
+            do_rescale=do_rescale,
+            rescale_factor=rescale_factor,
+            do_normalize=do_normalize,
+            image_mean=image_mean,
+            image_std=image_std,
+            do_resize=do_resize,
+            size=size,
+            resample=resample,
+        )
+
     @classmethod
     # Copied from transformers.models.detr.image_processing_detr.DetrImageProcessor.from_dict with Detr->Yolos
     def from_dict(cls, image_processor_dict: Dict[str, Any], **kwargs):
@@ -1213,27 +1224,9 @@ def preprocess(
         )
         do_pad = self.do_pad if do_pad is None else do_pad
         format = self.format if format is None else format
-        validate_kwargs(captured_kwargs=kwargs.keys(), valid_processor_keys=self._valid_processor_keys)
 
         images = make_list_of_images(images)
 
-        if not valid_images(images):
-            raise ValueError(
-                "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
-                "torch.Tensor, tf.Tensor or jax.ndarray."
-            )
-        # Here the pad() method pads using the max of (width, height) and does not need to be validated.
-        validate_preprocess_arguments(
-            do_rescale=do_rescale,
-            rescale_factor=rescale_factor,
-            do_normalize=do_normalize,
-            image_mean=image_mean,
-            image_std=image_std,
-            do_resize=do_resize,
-            size=size,
-            resample=resample,
-        )
-
         if annotations is not None and isinstance(annotations, dict):
             annotations = [annotations]
 
@@ -1259,12 +1252,6 @@ def preprocess(
         # All transformations expect numpy arrays
         images = [to_numpy_array(image) for image in images]
 
-        if is_scaled_image(images[0]) and do_rescale:
-            logger.warning_once(
-                "It looks like you are trying to rescale already rescaled images. If the input"
-                " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
-            )
-
         if input_data_format is None:
             # We assume that all images have the same channel dimension format.
             input_data_format = infer_channel_dimension_format(images[0])