diff --git a/tests/models/kosmos2_5/test_processor_kosmos2_5.py b/tests/models/kosmos2_5/test_processor_kosmos2_5.py index a2e97d6ee5e3fd..21be585131bb91 100644 --- a/tests/models/kosmos2_5/test_processor_kosmos2_5.py +++ b/tests/models/kosmos2_5/test_processor_kosmos2_5.py @@ -152,6 +152,35 @@ def test_model_input_names(self): with pytest.raises(ValueError): processor() + @require_torch + @require_vision + def test_structured_kwargs_nested(self): + # Rewrite as KOSMOS-2.5 processor doesn't use `rescale_factor` + if "image_processor" not in self.processor_class.attributes: + self.skipTest(f"image_processor attribute not present in {self.processor_class}") + image_processor = self.get_component("image_processor") + tokenizer = self.get_component("tokenizer") + + processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) + self.skip_processor_without_typed_kwargs(processor) + + input_str = self.prepare_text_inputs() + image_input = self.prepare_image_inputs() + + # Define the kwargs for each modality + all_kwargs = { + "common_kwargs": {"return_tensors": "pt"}, + "images_kwargs": {"max_patches": 1024}, + "text_kwargs": {"padding": "max_length", "max_length": 76}, + } + + inputs = processor(text=input_str, images=image_input, **all_kwargs) + self.skip_processor_without_typed_kwargs(processor) + + self.assertEqual(inputs["flattened_patches"].shape[1], 1024) + + self.assertEqual(len(inputs["input_ids"][0]), 76) + @require_torch def test_full_processor(self): url = "https://huggingface.co/kirp/kosmos2_5/resolve/main/receipt_00008.png"