Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup: _image.py and _utils.py file in data subpackage #1707

Merged
merged 4 commits into from
Oct 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 34 additions & 5 deletions lightly/data/_image_loaders.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""torchvision image loaders
(see https://pytorch.org/docs/stable/_modules/torchvision/datasets/folder.html)
"""Module for handling image loading in torchvision-compatible format.

This module provides image loading functionality similar to torchvision's implementation
(see https://pytorch.org/vision/main/generated/torchvision.datasets.ImageFolder.html)
"""

# Copyright (c) 2020. Lightly AG and its affiliates.
Expand All @@ -9,15 +10,33 @@
from PIL import Image


def pil_loader(path):
def pil_loader(path: str) -> Image.Image:
"""Loads an image using PIL.

Args:
path: Path to the image file.

Returns:
A PIL Image in RGB format.
"""
# open path as file to avoid ResourceWarning
# (https://github.com/python-pillow/Pillow/issues/835)
with open(path, "rb") as f:
img = Image.open(f)
return img.convert("RGB")


def accimage_loader(path):
def accimage_loader(path: str) -> Image.Image:
"""Loads an image using the accimage library for faster loading.

Falls back to PIL loader if accimage fails to load the image.

Args:
path: Path to the image file.

Returns:
An image loaded either by accimage or PIL in case of failure.
"""
try:
import accimage

Expand All @@ -27,7 +46,17 @@ def accimage_loader(path):
return pil_loader(path)


def default_loader(path):
def default_loader(path: str) -> Image.Image:
"""Loads an image using the default backend specified in torchvision.

Uses accimage if available and configured as the backend, otherwise falls back to PIL.

Args:
path: Path to the image file.

Returns:
An image loaded by either accimage or PIL depending on the backend.
"""
from torchvision import get_image_backend

if get_image_backend() == "accimage":
Expand Down
34 changes: 28 additions & 6 deletions lightly/data/_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
""" Check for Corrupt Images """
"""Provides functionality to identify corrupt images in a directory.

This module helps users identify corrupt or unreadable image files within a specified
directory. It uses parallel processing to efficiently scan through large collections
of images.
"""

# Copyright (c) 2020. Lightly AG and its affiliates.
# All Rights Reserved

import os
from typing import *
from typing import List, Tuple

import tqdm.contrib.concurrent as concurrent
from PIL import Image, UnidentifiedImageError
Expand All @@ -13,18 +18,35 @@


def check_images(data_dir: str) -> Tuple[List[str], List[str]]:
"""Iterate through a directory of images and find corrupt images
"""Identifies corrupt and healthy images in the specified directory.

The function attempts to open each image file in the directory to verify
its integrity. It processes images in parallel for better performance.

Args:
data_dir: Path to the directory containing the images
data_dir: Directory path containing the image files to check.

Returns:
(healthy_images, corrupt_images)
A tuple containing two lists:
- List of filenames of healthy images that can be opened successfully
- List of filenames of corrupt images that cannot be opened

Example:
>>> healthy, corrupt = check_images("path/to/images")
>>> print(f"Found {len(corrupt)} corrupt images")
"""
dataset = LightlyDataset(input_dir=data_dir)
filenames = dataset.get_filenames()

def _is_corrupt(filename):
def _is_corrupt(filename: str) -> bool:
"""Checks if a single image file is corrupt.

Args:
filename: Name of the image file to check.

Returns:
True if the image is corrupt, False otherwise.
"""
try:
image = Image.open(os.path.join(data_dir, filename))
image.load()
Expand Down