Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
mfarre committed Dec 20, 2024
1 parent 3c1fc53 commit b83ffc0
Showing 1 changed file with 15 additions and 15 deletions.
30 changes: 15 additions & 15 deletions tests/models/idefics3/test_processor_idefics3.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,28 +511,28 @@ def test_unstructured_kwargs(self):
def test_text_only_inference(self):
"""Test that the processor works correctly with text-only input."""
processor = self.get_processor()

text = "This is a simple text without images."
inputs = processor(text=text)

tokenized_sentence = processor.tokenizer(text, add_special_tokens=False)
expected_input_ids = [[self.bos_token_id] + tokenized_sentence["input_ids"]]

self.assertEqual(inputs["input_ids"], expected_input_ids)
self.assertEqual(inputs["attention_mask"], [[1] * len(expected_input_ids[0])])
self.assertTrue("pixel_values" not in inputs)
self.assertTrue("pixel_attention_mask" not in inputs)

# Test batch of texts without image tokens
texts = ["First text.", "Second piece of text."]
batch_inputs = processor(text=texts, padding=True)

tokenized_1 = processor.tokenizer(texts[0], add_special_tokens=False)
tokenized_2 = processor.tokenizer(texts[1], add_special_tokens=False)

expected_1 = [self.bos_token_id] + tokenized_1["input_ids"]
expected_2 = [self.bos_token_id] + tokenized_2["input_ids"]

# Pad the shorter sequence
pad_len = len(expected_2) - len(expected_1)
if pad_len > 0:
Expand All @@ -552,28 +552,28 @@ def test_text_only_inference(self):
def test_missing_images_error(self):
"""Test that appropriate error is raised when images are referenced but not provided."""
processor = self.get_processor()

# Test single text with image token but no image
text = "Let me show you this image: <image> What do you think?"
with self.assertRaises(ValueError) as context:
processor(text=text)
self.assertTrue("Number of images" in str(context.exception))
self.assertTrue("tokens in the text but no images were passed" in str(context.exception))

# Test batch with image tokens but no images
texts = [
"First text with <image> token.",
"Second text <image> with token.",
]
with self.assertRaises(ValueError) as context:
processor(text=texts)
self.assertTrue("Number of images" in str(context.exception))
self.assertTrue("tokens in the text but no images were passed" in str(context.exception))

# Test with empty images list
with self.assertRaises(ValueError) as context:
processor(text=text, images=[])
self.assertTrue("Number of images" in str(context.exception))
self.assertTrue("tokens in the text but no images were passed" in str(context.exception))

# Test with batch and empty images lists
with self.assertRaises(ValueError) as context:
processor(text=texts, images=[[], []])
self.assertTrue("Number of images" in str(context.exception))
self.assertTrue("tokens in the text but no images were passed" in str(context.exception))

0 comments on commit b83ffc0

Please sign in to comment.