From fc9a790dea6c9829d7fbd5f37d115934ba58c1ab Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 18 Apr 2024 15:41:30 +0200 Subject: [PATCH 1/2] Fix test with exif_transpose image --- tests/utils/test_image_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/utils/test_image_utils.py b/tests/utils/test_image_utils.py index ee45300a7e5279..49c88afebcbf71 100644 --- a/tests/utils/test_image_utils.py +++ b/tests/utils/test_image_utils.py @@ -21,6 +21,7 @@ import numpy as np import pytest from huggingface_hub.file_download import http_get +from packaging import version from requests import ConnectTimeout, ReadTimeout from tests.pipelines.test_pipelines_document_question_answering import INVOICE_URL @@ -586,7 +587,7 @@ def test_load_img_exif_transpose(self): self.assertEqual( img_arr_without_exif_transpose.shape, - (333, 500, 3), + (500, 333, 3) if version.parse(datasets.__version__) > version.parse("2.18.0") else (333, 500, 3), ) img_with_exif_transpose = load_image(dataset[3]["image"]) From 35f0e84e06233cd1b2cd8af15e8b15fc03f1ca0b Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 18 Apr 2024 17:03:30 +0200 Subject: [PATCH 2/2] Replace datasets with PIL to load image in tests --- tests/utils/test_image_utils.py | 39 ++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/tests/utils/test_image_utils.py b/tests/utils/test_image_utils.py index 49c88afebcbf71..d6bc9a37585899 100644 --- a/tests/utils/test_image_utils.py +++ b/tests/utils/test_image_utils.py @@ -16,12 +16,13 @@ import os import tempfile import unittest +from io import BytesIO +from typing import Optional -import datasets import numpy as np import pytest -from huggingface_hub.file_download import http_get -from packaging import version +import requests +from huggingface_hub.file_download import hf_hub_url, http_get from requests import ConnectTimeout, ReadTimeout from tests.pipelines.test_pipelines_document_question_answering import INVOICE_URL @@ -40,6 +41,11 @@ from transformers.image_utils import get_image_size, infer_channel_dimension_format, load_image +def get_image_from_hub_dataset(dataset_id: str, filename: str, revision: Optional[str] = None) -> "PIL.Image.Image": + url = hf_hub_url(dataset_id, filename, repo_type="dataset", revision=revision) + return PIL.Image.open(BytesIO(requests.get(url).content)) + + def get_random_image(height, width): random_array = np.random.randint(0, 256, (height, width, 3), dtype=np.uint8) return PIL.Image.fromarray(random_array) @@ -541,9 +547,11 @@ def test_load_img_base64(self): def test_load_img_rgba(self): # we use revision="refs/pr/1" until the PR is merged # https://hf.co/datasets/hf-internal-testing/fixtures_image_utils/discussions/1 - dataset = datasets.load_dataset("hf-internal-testing/fixtures_image_utils", split="test", revision="refs/pr/1") + img = get_image_from_hub_dataset( + "hf-internal-testing/fixtures_image_utils", "0-test-lena.png", revision="refs/pr/1" + ) - img = load_image(dataset[0]["image"]) # img with mode RGBA + img = load_image(img) # img with mode RGBA img_arr = np.array(img) self.assertEqual( @@ -554,9 +562,11 @@ def test_load_img_rgba(self): def test_load_img_la(self): # we use revision="refs/pr/1" until the PR is merged # https://hf.co/datasets/hf-internal-testing/fixtures_image_utils/discussions/1 - dataset = datasets.load_dataset("hf-internal-testing/fixtures_image_utils", split="test", revision="refs/pr/1") + img = get_image_from_hub_dataset( + "hf-internal-testing/fixtures_image_utils", "1-test-parrots.png", revision="refs/pr/1" + ) - img = load_image(dataset[1]["image"]) # img with mode LA + img = load_image(img) # img with mode LA img_arr = np.array(img) self.assertEqual( @@ -567,9 +577,11 @@ def test_load_img_la(self): def test_load_img_l(self): # we use revision="refs/pr/1" until the PR is merged # https://hf.co/datasets/hf-internal-testing/fixtures_image_utils/discussions/1 - dataset = datasets.load_dataset("hf-internal-testing/fixtures_image_utils", split="test", revision="refs/pr/1") + img = get_image_from_hub_dataset( + "hf-internal-testing/fixtures_image_utils", "2-test-tree.png", revision="refs/pr/1" + ) - img = load_image(dataset[2]["image"]) # img with mode L + img = load_image(img) # img with mode L img_arr = np.array(img) self.assertEqual( @@ -580,17 +592,18 @@ def test_load_img_l(self): def test_load_img_exif_transpose(self): # we use revision="refs/pr/1" until the PR is merged # https://hf.co/datasets/hf-internal-testing/fixtures_image_utils/discussions/1 - dataset = datasets.load_dataset("hf-internal-testing/fixtures_image_utils", split="test", revision="refs/pr/1") - img_without_exif_transpose = dataset[3]["image"] + img_without_exif_transpose = get_image_from_hub_dataset( + "hf-internal-testing/fixtures_image_utils", "3-test-cat-rotated.jpg", revision="refs/pr/1" + ) img_arr_without_exif_transpose = np.array(img_without_exif_transpose) self.assertEqual( img_arr_without_exif_transpose.shape, - (500, 333, 3) if version.parse(datasets.__version__) > version.parse("2.18.0") else (333, 500, 3), + (333, 500, 3), ) - img_with_exif_transpose = load_image(dataset[3]["image"]) + img_with_exif_transpose = load_image(img_without_exif_transpose) img_arr_with_exif_transpose = np.array(img_with_exif_transpose) self.assertEqual(