From 267f90582f0f513f0faf22ad59360a772b833ea9 Mon Sep 17 00:00:00 2001 From: ChiragAgg5k Date: Wed, 23 Oct 2024 22:19:00 +0530 Subject: [PATCH 1/2] cleanup: _image_loaders.py in data subpackage --- lightly/data/_image_loaders.py | 39 +++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/lightly/data/_image_loaders.py b/lightly/data/_image_loaders.py index d7680de8e..5335fefbd 100644 --- a/lightly/data/_image_loaders.py +++ b/lightly/data/_image_loaders.py @@ -1,6 +1,7 @@ -"""torchvision image loaders -(see https://pytorch.org/docs/stable/_modules/torchvision/datasets/folder.html) +"""Module for handling image loading in torchvision-compatible format. +This module provides image loading functionality similar to torchvision's implementation +(see https://pytorch.org/vision/main/generated/torchvision.datasets.ImageFolder.html) """ # Copyright (c) 2020. Lightly AG and its affiliates. @@ -9,7 +10,15 @@ from PIL import Image -def pil_loader(path): +def pil_loader(path: str) -> Image.Image: + """Loads an image using PIL. + + Args: + path: Path to the image file. + + Returns: + A PIL Image in RGB format. + """ # open path as file to avoid ResourceWarning # (https://github.com/python-pillow/Pillow/issues/835) with open(path, "rb") as f: @@ -17,7 +26,17 @@ def pil_loader(path): return img.convert("RGB") -def accimage_loader(path): +def accimage_loader(path: str) -> Image.Image: + """Loads an image using the accimage library for faster loading. + + Falls back to PIL loader if accimage fails to load the image. + + Args: + path: Path to the image file. + + Returns: + An image loaded either by accimage or PIL in case of failure. + """ try: import accimage @@ -27,7 +46,17 @@ def accimage_loader(path): return pil_loader(path) -def default_loader(path): +def default_loader(path: str) -> Image.Image: + """Loads an image using the default backend specified in torchvision. + + Uses accimage if available and configured as the backend, otherwise falls back to PIL. + + Args: + path: Path to the image file. + + Returns: + An image loaded by either accimage or PIL depending on the backend. + """ from torchvision import get_image_backend if get_image_backend() == "accimage": From 197d8a919057dd922351369f1624d0665a636059 Mon Sep 17 00:00:00 2001 From: ChiragAgg5k Date: Wed, 23 Oct 2024 22:21:09 +0530 Subject: [PATCH 2/2] cleanup: _utils.py in data subpackage --- lightly/data/_utils.py | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/lightly/data/_utils.py b/lightly/data/_utils.py index 86ea687b0..6d49b59be 100644 --- a/lightly/data/_utils.py +++ b/lightly/data/_utils.py @@ -1,10 +1,15 @@ -""" Check for Corrupt Images """ +"""Provides functionality to identify corrupt images in a directory. + +This module helps users identify corrupt or unreadable image files within a specified +directory. It uses parallel processing to efficiently scan through large collections +of images. +""" # Copyright (c) 2020. Lightly AG and its affiliates. # All Rights Reserved import os -from typing import * +from typing import List, Tuple import tqdm.contrib.concurrent as concurrent from PIL import Image, UnidentifiedImageError @@ -13,18 +18,35 @@ def check_images(data_dir: str) -> Tuple[List[str], List[str]]: - """Iterate through a directory of images and find corrupt images + """Identifies corrupt and healthy images in the specified directory. + + The function attempts to open each image file in the directory to verify + its integrity. It processes images in parallel for better performance. Args: - data_dir: Path to the directory containing the images + data_dir: Directory path containing the image files to check. Returns: - (healthy_images, corrupt_images) + A tuple containing two lists: + - List of filenames of healthy images that can be opened successfully + - List of filenames of corrupt images that cannot be opened + + Example: + >>> healthy, corrupt = check_images("path/to/images") + >>> print(f"Found {len(corrupt)} corrupt images") """ dataset = LightlyDataset(input_dir=data_dir) filenames = dataset.get_filenames() - def _is_corrupt(filename): + def _is_corrupt(filename: str) -> bool: + """Checks if a single image file is corrupt. + + Args: + filename: Name of the image file to check. + + Returns: + True if the image is corrupt, False otherwise. + """ try: image = Image.open(os.path.join(data_dir, filename)) image.load()