Add batch equivalence tests, skip when center_crop is used

huggingface · Dec 11, 2024 · 867b1f5 · 867b1f5
1 parent d5e23ea
commit 867b1f5
Show file tree

Hide file tree

Showing 5 changed files with 69 additions and 20 deletions.
diff --git a/src/transformers/image_processing_utils_fast.py b/src/transformers/image_processing_utils_fast.py
@@ -814,7 +814,7 @@ def _pad_for_batching(
         """
         max_patch = max(len(x) for x in pixel_values)
         pixel_values = [
-            torch.nn.functional.pad(image, pad=[0, max_patch - image.shape[0], 0, 0, 0, 0, 0, 0])
+            torch.nn.functional.pad(image, pad=[0, 0, 0, 0, 0, 0, 0, max_patch - image.shape[0]])
             for image in pixel_values
         ]
 

diff --git a/tests/models/blip/test_image_processing_blip.py b/tests/models/blip/test_image_processing_blip.py
@@ -121,8 +121,7 @@ class BlipImageProcessingTestFourChannels(ImageProcessingTestMixin, unittest.Tes
 
     def setUp(self):
         super().setUp()
-        self.image_processor_tester = BlipImageProcessingTester(self, num_channels=4)
-        self.expected_encoded_image_num_channels = 3
+        self.image_processor_tester = BlipImageProcessingTester(self)
 
     @property
     def image_processor_dict(self):
@@ -137,19 +136,3 @@ def test_image_processor_properties(self):
             self.assertTrue(hasattr(image_processor, "image_mean"))
             self.assertTrue(hasattr(image_processor, "image_std"))
             self.assertTrue(hasattr(image_processor, "do_convert_rgb"))
-
-    @unittest.skip(reason="BlipImageProcessor does not support 4 channels yet")  # FIXME Amy
-    def test_call_numpy(self):
-        return super().test_call_numpy()
-
-    @unittest.skip(reason="BlipImageProcessor does not support 4 channels yet")  # FIXME Amy
-    def test_call_pytorch(self):
-        return super().test_call_torch()
-
-    @unittest.skip(reason="BLIP doesn't treat 4 channel PIL and numpy consistently yet")  # FIXME Amy
-    def test_call_pil(self):
-        pass
-
-    @unittest.skip(reason="BLIP doesn't treat 4 channel PIL and numpy consistently yet")  # FIXME Amy
-    def test_call_numpy_4_channels(self):
-        pass
diff --git a/tests/models/convnext/test_image_processing_convnext.py b/tests/models/convnext/test_image_processing_convnext.py
@@ -116,3 +116,9 @@ def test_image_processor_from_dict_with_kwargs(self):
 
             image_processor = image_processing_class.from_dict(self.image_processor_dict, size=42)
             self.assertEqual(image_processor.size, {"shortest_edge": 42})
+
+    @unittest.skip(
+        "Skipping as ConvNextImageProcessor uses center_crop and center_crop functions are not equivalent for fast and slow processors"
+    )
+    def test_slow_fast_equivalence_batched(self):
+        pass
diff --git a/tests/models/pixtral/test_image_processing_pixtral.py b/tests/models/pixtral/test_image_processing_pixtral.py
@@ -271,7 +271,40 @@ def test_slow_fast_equivalence(self):
         encoding_slow = image_processor_slow(dummy_image, return_tensors="pt")
         encoding_fast = image_processor_fast(dummy_image, return_tensors="pt")
 
-        self.assertTrue(torch.allclose(encoding_slow.pixel_values[0][0], encoding_fast.pixel_values[0][0], atol=1e-2))
+        self.assertTrue(torch.allclose(encoding_slow.pixel_values[0][0], encoding_fast.pixel_values[0][0], atol=1e-1))
+        self.assertLessEqual(
+            torch.mean(torch.abs(encoding_slow.pixel_values[0][0] - encoding_fast.pixel_values[0][0])).item(), 1e-3
+        )
+
+    @require_vision
+    @require_torch
+    def test_slow_fast_equivalence_batched(self):
+        dummy_images = self.image_processor_tester.prepare_image_inputs(equal_resolution=False, torchify=True)
+
+        if not self.test_slow_image_processor or not self.test_fast_image_processor:
+            self.skipTest(reason="Skipping slow/fast equivalence test")
+
+        if self.image_processing_class is None or self.fast_image_processing_class is None:
+            self.skipTest(reason="Skipping slow/fast equivalence test as one of the image processors is not defined")
+
+        if hasattr(self.image_processor_tester, "do_center_crop") and self.image_processor_tester.do_center_crop:
+            self.skipTest(
+                reason="Skipping as do_center_crop is True and center_crop functions are not equivalent for fast and slow processors"
+            )
+
+        image_processor_slow = self.image_processing_class(**self.image_processor_dict)
+        image_processor_fast = self.fast_image_processing_class(**self.image_processor_dict)
+
+        encoding_slow = image_processor_slow(dummy_images, return_tensors="pt")
+        encoding_fast = image_processor_fast(dummy_images, return_tensors="pt")
+
+        for i in range(len(encoding_slow.pixel_values)):
+            self.assertTrue(
+                torch.allclose(encoding_slow.pixel_values[i][0], encoding_fast.pixel_values[i][0], atol=1e-1)
+            )
+            self.assertLessEqual(
+                torch.mean(torch.abs(encoding_slow.pixel_values[i][0] - encoding_fast.pixel_values[i][0])).item(), 1e-3
+            )
 
     @unittest.skip(reason="PixtralImageProcessor doesn't treat 4 channel PIL and numpy consistently yet")  # FIXME Amy
     def test_call_numpy_4_channels(self):

diff --git a/tests/test_image_processing_common.py b/tests/test_image_processing_common.py
@@ -178,6 +178,33 @@ def test_slow_fast_equivalence(self):
             torch.mean(torch.abs(encoding_slow.pixel_values - encoding_fast.pixel_values)).item(), 1e-3
         )
 
+    @require_vision
+    @require_torch
+    def test_slow_fast_equivalence_batched(self):
+        dummy_images = self.image_processor_tester.prepare_image_inputs(equal_resolution=False, torchify=True)
+
+        if not self.test_slow_image_processor or not self.test_fast_image_processor:
+            self.skipTest(reason="Skipping slow/fast equivalence test")
+
+        if self.image_processing_class is None or self.fast_image_processing_class is None:
+            self.skipTest(reason="Skipping slow/fast equivalence test as one of the image processors is not defined")
+
+        if hasattr(self.image_processor_tester, "do_center_crop") and self.image_processor_tester.do_center_crop:
+            self.skipTest(
+                reason="Skipping as do_center_crop is True and center_crop functions are not equivalent for fast and slow processors"
+            )
+
+        image_processor_slow = self.image_processing_class(**self.image_processor_dict)
+        image_processor_fast = self.fast_image_processing_class(**self.image_processor_dict)
+
+        encoding_slow = image_processor_slow(dummy_images, return_tensors="pt")
+        encoding_fast = image_processor_fast(dummy_images, return_tensors="pt")
+
+        self.assertTrue(torch.allclose(encoding_slow.pixel_values, encoding_fast.pixel_values, atol=1e-1))
+        self.assertLessEqual(
+            torch.mean(torch.abs(encoding_slow.pixel_values - encoding_fast.pixel_values)).item(), 1e-3
+        )
+
     @require_vision
     @require_torch
     def test_fast_is_faster_than_slow(self):