Anomaly images (#227)

* Fixed bugs * Fixed bugs * Added auto config creation from template * Removed tippy * Improved callback setting js * Added config examples for different models and tasks * Added stratified split for mnist classification task * added anomaly detection for images * added anomaly detection for images * fixed augmentations, uploaded data * Fixed error * Fixed sonar qube alerts * Fixed sonar alerts * Added empty config file * Added empty config file * Changed experiments test logic * Changed experiments test logic * remove duplicated code * fix for inference results saving * Added fixes for brain segmentation * Added fixes for brain segmentation * Fixed str Error * Added inference for UI + Added Dockerfiles and configs for demo * Fixed errors with ultralitics and qsar * Fixed yolo train saving dir * Fixed yolo pretraine errors * Updated dockerfiles in order for docker to be used with gpu * Fixed errors and improved Dockerfiles * Corrected ckpt paths * visualize results * Removed gpus from dockerfile * tests * data folder fix * Changed model for stroke segmentation * Added prom med and nm configs to demo folders * update in architecture and experiment config * Mmdetection3d integration * Update pyproject and fixed yolo DDP error * Fixed yolo error * Returned torch 1.11 version as newer ones are not comptible with segmodelspytorch * Updated poetry lock * Updated poetry in Dockerfile * Removed installation of reqs in runtests.sh * Modified Dockerfile * Modified Dockerfile * Updated poetry lock * Updated poetry lock * Updated poetry lock * Fixed sonar alerts --------- Co-authored-by: InnopolisU <[email protected]>
InnopolisUni · Sep 16, 2024 · e0b545d · e0b545d
1 parent 350914a
commit e0b545d
Show file tree

Hide file tree

Showing 17 changed files with 490 additions and 5 deletions.
diff --git a/config/datasets/anomaly_detection_images.yaml b/config/datasets/anomaly_detection_images.yaml
@@ -0,0 +1,25 @@
+task:
+  - anomaly-detection-images
+
+name: MVTEC
+description: "
+The MVTec anomaly detection dataset (MVTec AD)
+https://www.mvtec.com/company/research/datasets/mvtec-ad
+DOI: 10.1007/s11263-020-01400-4
+DOI: 10.1109/CVPR.2019.00982
+https://www:mvtec:com/company/research/datasets/mvtec-ad"
+markup_info: 'Train images do not contain anomalies'
+date_time: 20.07.2024
+
+_target_: innofw.core.datamodules.lightning_datamodules.anomaly_detection_images.ImageAnomaliesLightningDataModule
+
+train:
+  source: https://api.blackhole.ai.innopolis.university/public-datasets/anomaly_detection_mvtec/train.zip
+  target: ./data/MVTEC/train
+test:
+  source: https://api.blackhole.ai.innopolis.university/public-datasets/anomaly_detection_mvtec/test.zip
+  target: ./data/MVTEC/test
+
+infer:
+  source: https://api.blackhole.ai.innopolis.university/public-datasets/anomaly_detection_mvtec/test.zip
+  target: ./data/MVTEC/test
diff --git a/config/experiments/anomaly-detection/KG_210724_ba083ak_anomaly_detection_images.yaml b/config/experiments/anomaly-detection/KG_210724_ba083ak_anomaly_detection_images.yaml
@@ -0,0 +1,24 @@
+# @package _global_
+defaults:
+  - override /models: anomaly-detection/cae
+  - override /datasets: anomaly_detection_images
+  - override /optimizers: adam
+  - override /augmentations_train: none
+  - override /augmentations_val: none
+  - override /augmentations_test: none
+  - override /losses: mse
+
+
+project: "anomaly-detection-mvtec"
+task: "anomaly-detection-images"
+random_seed: 0
+epochs: 50
+batch_size: 8
+accelerator: gpu
+
+wandb:
+  enable: True
+  project: anomaly_detect_mvtec
+  entity: "k-galliamov"
+  group: none
+  job_type: training
diff --git a/config/losses/mse.yaml b/config/losses/mse.yaml
@@ -2,6 +2,7 @@ name: MSE
 description: Mean squared error measures the average of the squares of the errors
 task:
   - regression
+  - anomaly-detection-images
 
 implementations:
   sklearn:

diff --git a/config/models/anomaly-detection/cae.yaml b/config/models/anomaly-detection/cae.yaml
@@ -0,0 +1,4 @@
+name: convolutional AE
+_target_: innofw.core.models.torch.architectures.autoencoders.convolutional_ae.CAE
+description: Base Unet segmentation model with 3 channels input
+anomaly_threshold: 0.05
diff --git a/innofw/core/datamodules/lightning_datamodules/__init__.py b/innofw/core/datamodules/lightning_datamodules/__init__.py
@@ -1,4 +1,5 @@
 from .image_folder_dm import ImageLightningDataModule
 from .qsar_dm import QsarSelfiesDataModule
 from .semantic_segmentation.hdf5 import HDF5LightningDataModule
-from .drugprot import DrugprotDataModule
+from .drugprot import DrugprotDataModule
+from .anomaly_detection_images import ImageAnomaliesLightningDataModule
diff --git a/innofw/core/datamodules/lightning_datamodules/anomaly_detection_images.py b/innofw/core/datamodules/lightning_datamodules/anomaly_detection_images.py
@@ -0,0 +1,105 @@
+import os
+import logging
+import pathlib
+
+import pandas as pd
+import torch
+import cv2
+import numpy as np
+from torch.utils.data import random_split
+
+from innofw.constants import Frameworks
+from innofw.constants import Stages
+from innofw.core.datamodules.lightning_datamodules.base import (
+    BaseLightningDataModule,
+)
+from innofw.core.datasets.anomalies import AnomaliesDataset
+
+
+class ImageAnomaliesLightningDataModule(BaseLightningDataModule):
+    """
+    A Class used for working with Time Series
+    ...
+
+    Attributes
+    ----------
+    aug : dict
+        The list of augmentations
+    val_size: float
+        The proportion of the dataset to include in the validation set
+
+    Methods
+    -------
+    save_preds(preds, stage: Stages, dst_path: pathlib.Path):
+        Saves inference predictions to csv file
+
+    setup_infer():
+        The method prepares inference data
+
+    """
+
+    task = ["anomaly-detection-images"]
+    framework = [Frameworks.torch]
+
+    def __init__(
+            self,
+            train,
+            test,
+            infer=None,
+            batch_size: int = 2,
+            val_size: float = 0.5,
+            num_workers: int = 1,
+            augmentations=None,
+            stage=None,
+            *args,
+            **kwargs,
+    ):
+        super().__init__(
+            train, test, infer, batch_size, num_workers, stage, *args, **kwargs
+        )
+        self.aug = augmentations
+        self.val_size = val_size
+
+    def setup_train_test_val(self, **kwargs):
+        self.train_dataset = AnomaliesDataset(self.train_source, self.get_aug(self.aug, 'train'),
+                                              add_labels=False)
+        self.test_dataset = AnomaliesDataset(self.test_source, self.get_aug(self.aug, 'test'),
+                                             add_labels=True)
+
+        # divide into train, val, test - val is a part of test since train does not have anomalies
+        n = len(self.test_dataset)
+        test_size = int(n * (1 - self.val_size))
+        self.test_dataset, self.val_dataset = random_split(
+            self.test_dataset, [test_size, n - test_size]
+        )
+
+    def predict_dataloader(self):
+        test_dataloader = torch.utils.data.DataLoader(
+            self.predict_dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+        )
+        return test_dataloader
+
+    def setup_infer(self):
+        self.predict_dataset = AnomaliesDataset(self.predict_source, self.get_aug(self.aug, 'test'))
+
+    def save_preds(self, out_batches, stage: Stages, dst_path: pathlib.Path):
+        out_file_path = dst_path / "results"
+        os.mkdir(out_file_path)
+        n = 0
+        for batch in out_batches:
+            for img, pred in zip(batch[0], batch[1]):
+                img = img.cpu().numpy()
+                pred = pred.numpy() * 255  # shape - (1024, 1024)
+                if pred.dtype != np.uint8:
+                    pred = pred.astype(np.uint8)
+                filename = out_file_path / f"out_{n}.png"
+                n += 1
+                cv2.imwrite(filename, pred)
+                mask_vis = np.zeros_like(img)
+                mask_vis[1, :, :] = pred / 255
+                img_with_mask = (img * 255 * 0.75 + mask_vis * 255 * 0.25).astype(np.uint8).transpose((1, 2, 0))
+                img_with_mask = cv2.cvtColor(img_with_mask, cv2.COLOR_BGR2RGB)
+                cv2.imwrite(str(filename).replace('out_', 'vis_'), img_with_mask)
+        logging.info(f"Saved result to: {out_file_path}")
diff --git a/innofw/core/datasets/anomalies.py b/innofw/core/datasets/anomalies.py
@@ -0,0 +1,51 @@
+from pathlib import Path
+
+import cv2
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+
+
+class AnomaliesDataset(Dataset):
+    """
+    A class to represent a custom ECG Dataset.
+
+    data_path: str
+        path to folder with structure:
+        data_path/images/
+        data_path/labels/ (optional)
+
+    augmentations: transforms to apply on images
+
+    add_labels: whether to return anomaly segmentation with the image
+
+    Methods
+    -------
+    __getitem__(self, idx):
+        returns X-features, and Y-targets (if the dataset is for testing or validation)
+    """
+
+    def __init__(self, data_path, augmentations, add_labels=False):
+        if str(data_path).endswith('images') or str(data_path).endswith('labels'):
+            data_path = data_path.parent
+        self.images = list(Path(str(data_path) + '/images').iterdir())
+        self.add_labels = add_labels
+        self.augmentations = augmentations
+        if self.add_labels:
+            self.labels = list(Path(str(data_path) + '/labels').iterdir())
+
+    def __len__(self):
+        return len(self.images)
+
+    def __getitem__(self, idx):
+        image_path = self.images[idx]
+        image = cv2.imread(str(image_path))
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        image = torch.from_numpy(image).float()
+        image = torch.div(image, 255)
+        if not self.add_labels:
+            return self.augmentations(image) if self.augmentations is not None else image
+        mask = cv2.imread(str(self.labels[idx]), 0)
+        if self.augmentations is not None:
+            image, mask = self.augmentations(image, mask)
+        return image, mask
diff --git a/innofw/core/models/torch/architectures/autoencoders/convolutional_ae.py b/innofw/core/models/torch/architectures/autoencoders/convolutional_ae.py
@@ -0,0 +1,20 @@
+import torch
+import torch.nn as nn
+from segmentation_models_pytorch import Unet
+
+
+class CAE(nn.Module):
+    def __init__(self, anomaly_threshold, input_channels=3):
+        super(CAE, self).__init__()
+        self.model = Unet(classes=input_channels, activation='sigmoid')
+        self.anomaly_threshold = anomaly_threshold
+
+    def forward(self, x):
+        x_hat = self.model(x)
+        return x_hat
+
+
+if __name__ == '__main__':
+    model = CAE(0)
+    _x = torch.zeros((10, 3, 512, 512))
+    print(model(_x).shape)
diff --git a/innofw/core/models/torch/lightning_modules/__init__.py b/innofw/core/models/torch/lightning_modules/__init__.py
@@ -1,6 +1,7 @@
 from .anomaly_detection_timeseries import (
     AnomalyDetectionTimeSeriesLightningModule,
 )
+from .anomaly_detection_images import AnomalyDetectionImagesLightningModule
 from .biobert_ner_model import BiobertNERModel
 from .chemistry_vae import ChemistryVAEForwardLightningModule
 from .chemistry_vae import ChemistryVAELightningModule

diff --git a/innofw/core/models/torch/lightning_modules/anomaly_detection_images.py b/innofw/core/models/torch/lightning_modules/anomaly_detection_images.py
@@ -0,0 +1,126 @@
+from typing import Any
+
+import torch
+from torchmetrics import MetricCollection
+from torchmetrics.classification import BinaryJaccardIndex, BinaryF1Score, BinaryPrecision, \
+    BinaryRecall
+from torchmetrics.regression import MeanAbsoluteError, MeanSquaredError
+from lovely_numpy import lo
+
+from innofw.core.models.torch.lightning_modules.base import BaseLightningModule
+
+
+class AnomalyDetectionImagesLightningModule(BaseLightningModule):
+    """
+    PyTorchLightning module for Anomaly Detection in Time Series
+    ...
+
+    Attributes
+    ----------
+    model : nn.Module
+        model to train
+    losses : losses
+        loss to use while training
+    optimizer_cfg : cfg
+        optimizer configurations
+    scheduler_cfg : cfg
+        scheduler configuration
+
+    Methods
+    -------
+    forward(x):
+        returns result of prediction
+    """
+
+    def __init__(
+            self,
+            model,
+            losses,
+            optimizer_cfg,
+            scheduler_cfg,
+            *args: Any,
+            **kwargs: Any,
+    ):
+        super().__init__(*args, **kwargs)
+        self.model = model
+        self.losses = losses
+        self.optimizer_cfg = optimizer_cfg
+        self.scheduler_cfg = scheduler_cfg
+
+        self.loss_fn = torch.nn.MSELoss()
+
+        metrics = MetricCollection(
+            [MeanSquaredError(), MeanAbsoluteError()]
+        )
+
+        self.train_metrics = metrics.clone(prefix='train')
+        segmentation_metrics = MetricCollection(
+            [
+                BinaryF1Score(),
+                BinaryPrecision(),
+                BinaryRecall(),
+                BinaryJaccardIndex(),
+            ]
+        )
+        self.val_metrics = segmentation_metrics.clone(prefix='val')
+        self.test_metrics = segmentation_metrics.clone(prefix='val')
+
+    def forward(self, x, *args, **kwargs) -> Any:
+        return self.model(x.float())
+
+    def training_step(self, x, batch_idx):
+        x_rec = self.forward(x)
+        loss = self.loss_fn(x, x_rec)
+        metrics = self.compute_metrics('train', x_rec, x)
+        self.log_metrics('train', metrics)
+        self.log("train_loss", loss, on_step=False, on_epoch=True)
+        return {"loss": loss}
+
+    def validation_step(self, batch, batch_idx):
+        x, y = batch
+        y = y.bool()
+        x_rec = self.forward(x)
+        loss = self.loss_fn(x, x_rec)
+        mask = self.compute_anomaly_mask(x)
+        metrics = self.compute_metrics('val', mask, y)
+        self.log_metrics('val', metrics)
+        print(mask.float().mean(), y.float().mean())
+        self.log("val_loss", loss, on_step=False, on_epoch=True)
+        return {"loss": loss}
+
+    def test_step(self, batch, batch_idx):
+        x, y = batch
+        x_rec = self.forward(x)
+        loss = self.loss_fn(x, x_rec)
+        mask = self.compute_anomaly_mask(x)
+        metrics = self.compute_metrics('test', mask, y)
+        self.log_metrics('test', metrics)
+        self.log("test_loss", loss, on_step=False, on_epoch=True)
+        return {"loss": loss}
+
+    def predict_step(self, x, batch_idx, **kwargs):
+        return (x, self.compute_anomaly_mask(x))
+
+    def compute_anomaly_mask(self, x):
+        x_rec = self.forward(x)  # (B, C, W, H)
+        diff = ((x - x_rec) ** 2).sum(dim=1)  # sum across channels
+        mask = diff >= self.model.anomaly_threshold
+        return mask
+
+    def log_metrics(self, stage, metrics_res, *args, **kwargs):
+        for key, value in metrics_res.items():
+            self.log(key, value)  # , sync_dist=True
+
+    def compute_metrics(self, stage, predictions, labels):
+        # Reshape labels from [B, 1, H, W] to [B, H, W]
+        if labels.shape[1] == 1:
+            labels = labels.squeeze(1)
+            labels = labels.type(dtype=torch.long)
+
+        if stage == "train":
+            return self.train_metrics(predictions, labels)
+        elif stage == "val":
+            out1 = self.val_metrics(predictions, labels)
+            return out1
+        elif stage == "test":
+            return self.test_metrics(predictions, labels)
diff --git a/innofw/core/models/torch_adapter.py b/innofw/core/models/torch_adapter.py
@@ -161,7 +161,7 @@ def predict(self, datamodule, ckpt_path=None):
     def train(self, data_module, ckpt_path=None):
         self.trainer.fit(self.pl_module, data_module, ckpt_path=ckpt_path)
 
-    def test(self, data_module):
+    def test(self, data_module, ckpt_path=None):
         outputs = self.trainer.test(self.pl_module, data_module)
         return outputs