From 3c1fc533c43c6e3c1beb1b167925ddc73ef83f3d Mon Sep 17 00:00:00 2001
From: Miquel Farre <miquel.farre@huggingface.co>
Date: Fri, 20 Dec 2024 19:12:49 +0000
Subject: [PATCH] adding tests

---
 .../idefics3/test_processor_idefics3.py       | 72 +++++++++++++++++++
 1 file changed, 72 insertions(+)
diff --git a/tests/models/idefics3/test_processor_idefics3.py b/tests/models/idefics3/test_processor_idefics3.py
index 52d2f1539a4867..d315ce612fda8f 100644
--- a/tests/models/idefics3/test_processor_idefics3.py
+++ b/tests/models/idefics3/test_processor_idefics3.py
@@ -505,3 +505,75 @@ def test_unstructured_kwargs(self):
 
         self.assertEqual(inputs["pixel_values"].shape[3], 32)
         self.assertEqual(len(inputs["input_ids"][0]), 120)
+
+    @require_torch
+    @require_vision
+    def test_text_only_inference(self):
+            """Test that the processor works correctly with text-only input."""
+            processor = self.get_processor()
+            
+            text = "This is a simple text without images."
+            inputs = processor(text=text)
+            
+            tokenized_sentence = processor.tokenizer(text, add_special_tokens=False)
+            expected_input_ids = [[self.bos_token_id] + tokenized_sentence["input_ids"]]
+            
+            self.assertEqual(inputs["input_ids"], expected_input_ids)
+            self.assertEqual(inputs["attention_mask"], [[1] * len(expected_input_ids[0])])
+            self.assertTrue("pixel_values" not in inputs)
+            self.assertTrue("pixel_attention_mask" not in inputs)
+            
+            # Test batch of texts without image tokens
+            texts = ["First text.", "Second piece of text."]
+            batch_inputs = processor(text=texts, padding=True)
+            
+            tokenized_1 = processor.tokenizer(texts[0], add_special_tokens=False)
+            tokenized_2 = processor.tokenizer(texts[1], add_special_tokens=False)
+            
+            expected_1 = [self.bos_token_id] + tokenized_1["input_ids"]
+            expected_2 = [self.bos_token_id] + tokenized_2["input_ids"]
+            
+            # Pad the shorter sequence
+            pad_len = len(expected_2) - len(expected_1)
+            if pad_len > 0:
+                padded_expected_1 = [self.padding_token_id] * pad_len + expected_1
+                expected_attention_1 = [0] * pad_len + [1] * len(expected_1)
+                self.assertEqual(batch_inputs["input_ids"], [padded_expected_1, expected_2])
+                self.assertEqual(batch_inputs["attention_mask"], [expected_attention_1, [1] * len(expected_2)])
+            else:
+                pad_len = -pad_len
+                padded_expected_2 = [self.padding_token_id] * pad_len + expected_2
+                expected_attention_2 = [0] * pad_len + [1] * len(expected_2)
+                self.assertEqual(batch_inputs["input_ids"], [expected_1, padded_expected_2])
+                self.assertEqual(batch_inputs["attention_mask"], [[1] * len(expected_1), expected_attention_2])
+
+    @require_torch
+    @require_vision
+    def test_missing_images_error(self):
+        """Test that appropriate error is raised when images are referenced but not provided."""
+        processor = self.get_processor()
+        
+        # Test single text with image token but no image
+        text = "Let me show you this image: <image> What do you think?"
+        with self.assertRaises(ValueError) as context:
+            processor(text=text)
+        self.assertTrue("Number of images" in str(context.exception))
+        
+        # Test batch with image tokens but no images
+        texts = [
+            "First text with <image> token.",
+            "Second text <image> with token.",
+        ]
+        with self.assertRaises(ValueError) as context:
+            processor(text=texts)
+        self.assertTrue("Number of images" in str(context.exception))
+        
+        # Test with empty images list
+        with self.assertRaises(ValueError) as context:
+            processor(text=text, images=[])
+        self.assertTrue("Number of images" in str(context.exception))
+        
+        # Test with batch and empty images lists
+        with self.assertRaises(ValueError) as context:
+            processor(text=texts, images=[[], []])
+        self.assertTrue("Number of images" in str(context.exception))
\ No newline at end of file