Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementation of GaussianMixtureMasksTransform #1692

Merged
merged 5 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lightly/transforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from lightly.transforms.dino_transform import DINOTransform, DINOViewTransform
from lightly.transforms.fast_siam_transform import FastSiamTransform
from lightly.transforms.gaussian_blur import GaussianBlur
from lightly.transforms.gaussian_mixture_masks_transform import GaussianMixtureMask
from lightly.transforms.irfft2d_transform import IRFFT2DTransform
from lightly.transforms.jigsaw import Jigsaw
from lightly.transforms.mae_transform import MAETransform
Expand Down
107 changes: 107 additions & 0 deletions lightly/transforms/gaussian_mixture_masks_transform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
from typing import Tuple

import torch
import torch.fft
from torch import Tensor

from lightly.transforms.irfft2d_transform import IRFFT2DTransform
snehilchatterjee marked this conversation as resolved.
Show resolved Hide resolved


class GaussianMixtureMask:
"""Applies a Gaussian Mixture Mask in the Fourier domain to a single-channel image.

The mask is created using random Gaussian kernels, which are applied in
the frequency domain via RFFT2D, and then the IRFFT2D is used to return
to the spatial domain. The transformation is applied to each image channel separately.

Attributes:
num_gaussians: Number of Gaussian kernels to generate in the mixture mask.
std_range: Tuple containing the minimum and maximum standard deviation for the Gaussians.
"""

def __init__(
self, num_gaussians: int = 20, std_range: Tuple[float, float] = (10, 15)
):
"""Initializes GaussianMixtureMasks with the given parameters.

Args:
num_gaussians: Number of Gaussian kernels to generate in the mixture mask.
std_range: Tuple containing the minimum and maximum standard deviation for the Gaussians.
"""
self.num_gaussians = num_gaussians
self.std_range = std_range

def gaussian_kernel(
self, size: Tuple[int, int], sigma: Tensor, center: Tensor
) -> Tensor:
"""Generates a 2D Gaussian kernel.

Args:
size: Tuple specifying the dimensions of the Gaussian kernel (H, W).
sigma: Tensor specifying the standard deviation of the Gaussian.
center: Tensor specifying the center of the Gaussian kernel.

Returns:
Tensor: A 2D Gaussian kernel.
"""
u, v = torch.meshgrid(torch.arange(0, size[0]), torch.arange(0, size[1]))
snehilchatterjee marked this conversation as resolved.
Show resolved Hide resolved
u = u.to(sigma.device)
v = v.to(sigma.device)
u0, v0 = center
gaussian = torch.exp(
-((u - u0) ** 2 / (2 * sigma[0] ** 2) + (v - v0) ** 2 / (2 * sigma[1] ** 2))
)

return gaussian

def apply_gaussian_mixture_mask(
self, freq_image: Tensor, num_gaussians: int, std: Tuple[float, float]
) -> Tensor:
"""Applies the Gaussian mixture mask to a frequency-domain image.

Args:
freq_image: Tensor representing the frequency-domain image of shape (C, H, W//2+1).
num_gaussians: Number of Gaussian kernels to generate in the mask.
std: Tuple specifying the standard deviation range for the Gaussians.

Returns:
Tensor: Image after applying the Gaussian mixture mask.
"""
image_size = freq_image.shape[1:]
original_height = image_size[0]
original_width = 2 * (image_size[1] - 1)

original_shape = (original_height, original_width)

self.irfft2d_transform = IRFFT2DTransform(original_shape)

size = freq_image[0].shape

mask = freq_image.new_ones(freq_image.shape)

for _ in range(num_gaussians):
u0 = torch.randint(0, size[0], (1,), device=freq_image.device)
v0 = torch.randint(0, size[1], (1,), device=freq_image.device)
center = torch.tensor((u0, v0), device=freq_image.device)
sigma = torch.rand(2, device=freq_image.device) * (std[1] - std[0]) + std[0]

g_kernel = self.gaussian_kernel((size[0], size[1]), sigma, center)
mask -= g_kernel

filtered_freq_image = freq_image * mask
filtered_image = self.irfft2d_transform(filtered_freq_image).abs()
return filtered_image
snehilchatterjee marked this conversation as resolved.
Show resolved Hide resolved

def __call__(self, freq_image: Tensor) -> Tensor:
"""Applies the Gaussian mixture mask transformation to the input frequency-domain image.

Args:
freq_image: Tensor representing a frequency-domain image of shape (C, H, W//2+1).

Returns:
Tensor: The transformed image after applying the Gaussian mixture mask.
snehilchatterjee marked this conversation as resolved.
Show resolved Hide resolved
"""
transformed_channel: Tensor = self.apply_gaussian_mixture_mask(
freq_image, self.num_gaussians, self.std_range
)
return transformed_channel
snehilchatterjee marked this conversation as resolved.
Show resolved Hide resolved
10 changes: 10 additions & 0 deletions tests/transforms/test_gaussian_mixture_masks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import torch

from lightly.transforms import GaussianMixtureMask


def test() -> None:
transform = GaussianMixtureMask(20, (10, 15))
image = torch.rand(3, 32, 17)
output = transform(image)
assert output.shape == (3, 32, 32)
snehilchatterjee marked this conversation as resolved.
Show resolved Hide resolved