Skip to content

Commit

Permalink
Anomaly images (#227)
Browse files Browse the repository at this point in the history
* Fixed bugs

* Fixed bugs

* Added auto config creation from template

* Removed tippy

* Improved callback setting js

* Added config examples for different models and tasks

* Added stratified split for mnist classification task

* added anomaly detection for images

* added anomaly detection for images

* fixed augmentations, uploaded data

* Fixed error

* Fixed sonar qube alerts

* Fixed sonar alerts

* Added empty config file

* Added empty config file

* Changed experiments test logic

* Changed experiments test logic

* remove duplicated code

* fix for inference results saving

* Added fixes for brain segmentation

* Added fixes for brain segmentation

* Fixed str Error

* Added inference for UI + Added Dockerfiles and configs for demo

* Fixed errors with ultralitics and qsar

* Fixed yolo train saving dir

* Fixed yolo pretraine errors

* Updated dockerfiles in order for docker to be used with gpu

* Fixed errors and improved Dockerfiles

* Corrected ckpt paths

* visualize results

* Removed gpus from dockerfile

* tests

* data folder fix

* Changed model for stroke segmentation

* Added prom med and nm configs to demo folders

* update in architecture and experiment config

* Mmdetection3d integration

* Update pyproject and fixed yolo DDP error

* Fixed yolo error

* Returned torch 1.11 version as newer ones are not comptible with segmodelspytorch

* Updated poetry lock

* Updated poetry in Dockerfile

* Removed installation of reqs in runtests.sh

* Modified Dockerfile

* Modified Dockerfile

* Updated poetry lock

* Updated poetry lock

* Updated poetry lock

* Fixed sonar alerts

---------

Co-authored-by: InnopolisU <[email protected]>
  • Loading branch information
KGallyamov and BarzaH authored Sep 16, 2024
1 parent 350914a commit e0b545d
Show file tree
Hide file tree
Showing 17 changed files with 490 additions and 5 deletions.
25 changes: 25 additions & 0 deletions config/datasets/anomaly_detection_images.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
task:
- anomaly-detection-images

name: MVTEC
description: "
The MVTec anomaly detection dataset (MVTec AD)
https://www.mvtec.com/company/research/datasets/mvtec-ad
DOI: 10.1007/s11263-020-01400-4
DOI: 10.1109/CVPR.2019.00982
https://www:mvtec:com/company/research/datasets/mvtec-ad"
markup_info: 'Train images do not contain anomalies'
date_time: 20.07.2024

_target_: innofw.core.datamodules.lightning_datamodules.anomaly_detection_images.ImageAnomaliesLightningDataModule

train:
source: https://api.blackhole.ai.innopolis.university/public-datasets/anomaly_detection_mvtec/train.zip
target: ./data/MVTEC/train
test:
source: https://api.blackhole.ai.innopolis.university/public-datasets/anomaly_detection_mvtec/test.zip
target: ./data/MVTEC/test

infer:
source: https://api.blackhole.ai.innopolis.university/public-datasets/anomaly_detection_mvtec/test.zip
target: ./data/MVTEC/test
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# @package _global_
defaults:
- override /models: anomaly-detection/cae
- override /datasets: anomaly_detection_images
- override /optimizers: adam
- override /augmentations_train: none
- override /augmentations_val: none
- override /augmentations_test: none
- override /losses: mse


project: "anomaly-detection-mvtec"
task: "anomaly-detection-images"
random_seed: 0
epochs: 50
batch_size: 8
accelerator: gpu

wandb:
enable: True
project: anomaly_detect_mvtec
entity: "k-galliamov"
group: none
job_type: training
1 change: 1 addition & 0 deletions config/losses/mse.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ name: MSE
description: Mean squared error measures the average of the squares of the errors
task:
- regression
- anomaly-detection-images

implementations:
sklearn:
Expand Down
4 changes: 4 additions & 0 deletions config/models/anomaly-detection/cae.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
name: convolutional AE
_target_: innofw.core.models.torch.architectures.autoencoders.convolutional_ae.CAE
description: Base Unet segmentation model with 3 channels input
anomaly_threshold: 0.05
3 changes: 2 additions & 1 deletion innofw/core/datamodules/lightning_datamodules/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .image_folder_dm import ImageLightningDataModule
from .qsar_dm import QsarSelfiesDataModule
from .semantic_segmentation.hdf5 import HDF5LightningDataModule
from .drugprot import DrugprotDataModule
from .drugprot import DrugprotDataModule
from .anomaly_detection_images import ImageAnomaliesLightningDataModule
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import os
import logging
import pathlib

import pandas as pd
import torch
import cv2
import numpy as np
from torch.utils.data import random_split

from innofw.constants import Frameworks
from innofw.constants import Stages
from innofw.core.datamodules.lightning_datamodules.base import (
BaseLightningDataModule,
)
from innofw.core.datasets.anomalies import AnomaliesDataset


class ImageAnomaliesLightningDataModule(BaseLightningDataModule):
"""
A Class used for working with Time Series
...
Attributes
----------
aug : dict
The list of augmentations
val_size: float
The proportion of the dataset to include in the validation set
Methods
-------
save_preds(preds, stage: Stages, dst_path: pathlib.Path):
Saves inference predictions to csv file
setup_infer():
The method prepares inference data
"""

task = ["anomaly-detection-images"]
framework = [Frameworks.torch]

def __init__(
self,
train,
test,
infer=None,
batch_size: int = 2,
val_size: float = 0.5,
num_workers: int = 1,
augmentations=None,
stage=None,
*args,
**kwargs,
):
super().__init__(
train, test, infer, batch_size, num_workers, stage, *args, **kwargs
)
self.aug = augmentations
self.val_size = val_size

def setup_train_test_val(self, **kwargs):
self.train_dataset = AnomaliesDataset(self.train_source, self.get_aug(self.aug, 'train'),
add_labels=False)
self.test_dataset = AnomaliesDataset(self.test_source, self.get_aug(self.aug, 'test'),
add_labels=True)

# divide into train, val, test - val is a part of test since train does not have anomalies
n = len(self.test_dataset)
test_size = int(n * (1 - self.val_size))
self.test_dataset, self.val_dataset = random_split(
self.test_dataset, [test_size, n - test_size]
)

def predict_dataloader(self):
test_dataloader = torch.utils.data.DataLoader(
self.predict_dataset,
batch_size=self.batch_size,
num_workers=self.num_workers,
)
return test_dataloader

def setup_infer(self):
self.predict_dataset = AnomaliesDataset(self.predict_source, self.get_aug(self.aug, 'test'))

def save_preds(self, out_batches, stage: Stages, dst_path: pathlib.Path):
out_file_path = dst_path / "results"
os.mkdir(out_file_path)
n = 0
for batch in out_batches:
for img, pred in zip(batch[0], batch[1]):
img = img.cpu().numpy()
pred = pred.numpy() * 255 # shape - (1024, 1024)
if pred.dtype != np.uint8:
pred = pred.astype(np.uint8)
filename = out_file_path / f"out_{n}.png"
n += 1
cv2.imwrite(filename, pred)
mask_vis = np.zeros_like(img)
mask_vis[1, :, :] = pred / 255
img_with_mask = (img * 255 * 0.75 + mask_vis * 255 * 0.25).astype(np.uint8).transpose((1, 2, 0))
img_with_mask = cv2.cvtColor(img_with_mask, cv2.COLOR_BGR2RGB)
cv2.imwrite(str(filename).replace('out_', 'vis_'), img_with_mask)
logging.info(f"Saved result to: {out_file_path}")
51 changes: 51 additions & 0 deletions innofw/core/datasets/anomalies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from pathlib import Path

import cv2
import numpy as np
import torch
from torch.utils.data import Dataset


class AnomaliesDataset(Dataset):
"""
A class to represent a custom ECG Dataset.
data_path: str
path to folder with structure:
data_path/images/
data_path/labels/ (optional)
augmentations: transforms to apply on images
add_labels: whether to return anomaly segmentation with the image
Methods
-------
__getitem__(self, idx):
returns X-features, and Y-targets (if the dataset is for testing or validation)
"""

def __init__(self, data_path, augmentations, add_labels=False):
if str(data_path).endswith('images') or str(data_path).endswith('labels'):
data_path = data_path.parent
self.images = list(Path(str(data_path) + '/images').iterdir())
self.add_labels = add_labels
self.augmentations = augmentations
if self.add_labels:
self.labels = list(Path(str(data_path) + '/labels').iterdir())

def __len__(self):
return len(self.images)

def __getitem__(self, idx):
image_path = self.images[idx]
image = cv2.imread(str(image_path))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = torch.from_numpy(image).float()
image = torch.div(image, 255)
if not self.add_labels:
return self.augmentations(image) if self.augmentations is not None else image
mask = cv2.imread(str(self.labels[idx]), 0)
if self.augmentations is not None:
image, mask = self.augmentations(image, mask)
return image, mask
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import torch
import torch.nn as nn
from segmentation_models_pytorch import Unet


class CAE(nn.Module):
def __init__(self, anomaly_threshold, input_channels=3):
super(CAE, self).__init__()
self.model = Unet(classes=input_channels, activation='sigmoid')
self.anomaly_threshold = anomaly_threshold

def forward(self, x):
x_hat = self.model(x)
return x_hat


if __name__ == '__main__':
model = CAE(0)
_x = torch.zeros((10, 3, 512, 512))
print(model(_x).shape)
1 change: 1 addition & 0 deletions innofw/core/models/torch/lightning_modules/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from .anomaly_detection_timeseries import (
AnomalyDetectionTimeSeriesLightningModule,
)
from .anomaly_detection_images import AnomalyDetectionImagesLightningModule
from .biobert_ner_model import BiobertNERModel
from .chemistry_vae import ChemistryVAEForwardLightningModule
from .chemistry_vae import ChemistryVAELightningModule
Expand Down
126 changes: 126 additions & 0 deletions innofw/core/models/torch/lightning_modules/anomaly_detection_images.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
from typing import Any

import torch
from torchmetrics import MetricCollection
from torchmetrics.classification import BinaryJaccardIndex, BinaryF1Score, BinaryPrecision, \
BinaryRecall
from torchmetrics.regression import MeanAbsoluteError, MeanSquaredError
from lovely_numpy import lo

from innofw.core.models.torch.lightning_modules.base import BaseLightningModule


class AnomalyDetectionImagesLightningModule(BaseLightningModule):
"""
PyTorchLightning module for Anomaly Detection in Time Series
...
Attributes
----------
model : nn.Module
model to train
losses : losses
loss to use while training
optimizer_cfg : cfg
optimizer configurations
scheduler_cfg : cfg
scheduler configuration
Methods
-------
forward(x):
returns result of prediction
"""

def __init__(
self,
model,
losses,
optimizer_cfg,
scheduler_cfg,
*args: Any,
**kwargs: Any,
):
super().__init__(*args, **kwargs)
self.model = model
self.losses = losses
self.optimizer_cfg = optimizer_cfg
self.scheduler_cfg = scheduler_cfg

self.loss_fn = torch.nn.MSELoss()

metrics = MetricCollection(
[MeanSquaredError(), MeanAbsoluteError()]
)

self.train_metrics = metrics.clone(prefix='train')
segmentation_metrics = MetricCollection(
[
BinaryF1Score(),
BinaryPrecision(),
BinaryRecall(),
BinaryJaccardIndex(),
]
)
self.val_metrics = segmentation_metrics.clone(prefix='val')
self.test_metrics = segmentation_metrics.clone(prefix='val')

def forward(self, x, *args, **kwargs) -> Any:
return self.model(x.float())

def training_step(self, x, batch_idx):
x_rec = self.forward(x)
loss = self.loss_fn(x, x_rec)
metrics = self.compute_metrics('train', x_rec, x)
self.log_metrics('train', metrics)
self.log("train_loss", loss, on_step=False, on_epoch=True)
return {"loss": loss}

def validation_step(self, batch, batch_idx):
x, y = batch
y = y.bool()
x_rec = self.forward(x)
loss = self.loss_fn(x, x_rec)
mask = self.compute_anomaly_mask(x)
metrics = self.compute_metrics('val', mask, y)
self.log_metrics('val', metrics)
print(mask.float().mean(), y.float().mean())
self.log("val_loss", loss, on_step=False, on_epoch=True)
return {"loss": loss}

def test_step(self, batch, batch_idx):
x, y = batch
x_rec = self.forward(x)
loss = self.loss_fn(x, x_rec)
mask = self.compute_anomaly_mask(x)
metrics = self.compute_metrics('test', mask, y)
self.log_metrics('test', metrics)
self.log("test_loss", loss, on_step=False, on_epoch=True)
return {"loss": loss}

def predict_step(self, x, batch_idx, **kwargs):
return (x, self.compute_anomaly_mask(x))

def compute_anomaly_mask(self, x):
x_rec = self.forward(x) # (B, C, W, H)
diff = ((x - x_rec) ** 2).sum(dim=1) # sum across channels
mask = diff >= self.model.anomaly_threshold
return mask

def log_metrics(self, stage, metrics_res, *args, **kwargs):
for key, value in metrics_res.items():
self.log(key, value) # , sync_dist=True

def compute_metrics(self, stage, predictions, labels):
# Reshape labels from [B, 1, H, W] to [B, H, W]
if labels.shape[1] == 1:
labels = labels.squeeze(1)
labels = labels.type(dtype=torch.long)

if stage == "train":
return self.train_metrics(predictions, labels)
elif stage == "val":
out1 = self.val_metrics(predictions, labels)
return out1
elif stage == "test":
return self.test_metrics(predictions, labels)
2 changes: 1 addition & 1 deletion innofw/core/models/torch_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def predict(self, datamodule, ckpt_path=None):
def train(self, data_module, ckpt_path=None):
self.trainer.fit(self.pl_module, data_module, ckpt_path=ckpt_path)

def test(self, data_module):
def test(self, data_module, ckpt_path=None):
outputs = self.trainer.test(self.pl_module, data_module)
return outputs

Expand Down
Loading

0 comments on commit e0b545d

Please sign in to comment.