From 6f5cd3a101915f4d8d9e5b30766b334f5b7b347d Mon Sep 17 00:00:00 2001 From: Cameron Mattson Date: Fri, 1 Nov 2024 11:13:36 -0600 Subject: [PATCH 1/2] Added dataset class, losses, and model performances with code --- 1.develop_vision_models/ImageMetaDataset.py | 71 +++ .../evaluate_vision_models.ipynb | 472 ++++++++++++++++++ .../model_manifest.csv | 5 + .../model_metrics/best_model_metrics.csv | 9 + .../nbconverted/evaluate_vision_models.py | 189 +++++++ .../losses/AbstractLoss.py | 13 + 1.develop_vision_models/losses/L1Loss.py | 21 + 1.develop_vision_models/losses/L2Loss.py | 21 + 1.develop_vision_models/losses/PSNR.py | 19 + 1.develop_vision_models/losses/SSIM.py | 28 ++ 10 files changed, 848 insertions(+) create mode 100644 1.develop_vision_models/ImageMetaDataset.py create mode 100644 1.develop_vision_models/evaluate_best_vision_models/evaluate_vision_models.ipynb create mode 100644 1.develop_vision_models/evaluate_best_vision_models/model_manifest.csv create mode 100644 1.develop_vision_models/evaluate_best_vision_models/model_metrics/best_model_metrics.csv create mode 100644 1.develop_vision_models/evaluate_best_vision_models/nbconverted/evaluate_vision_models.py create mode 100644 1.develop_vision_models/losses/AbstractLoss.py create mode 100644 1.develop_vision_models/losses/L1Loss.py create mode 100644 1.develop_vision_models/losses/L2Loss.py create mode 100644 1.develop_vision_models/losses/PSNR.py create mode 100644 1.develop_vision_models/losses/SSIM.py diff --git a/1.develop_vision_models/ImageMetaDataset.py b/1.develop_vision_models/ImageMetaDataset.py new file mode 100644 index 0000000..db35a0d --- /dev/null +++ b/1.develop_vision_models/ImageMetaDataset.py @@ -0,0 +1,71 @@ +import pathlib +from typing import Optional + +import numpy as np +import torch +from albumentations import ImageOnlyTransform +from PIL import Image +from torch.utils.data import Dataset + + +class ImageMetaDataset(Dataset): + """Iterable Image Dataset for Stained Images, which supports applying transformations to the inputs and targets""" + + def __init__( + self, + _input_dir: pathlib.Path, + _target_dir: pathlib.Path, + _input_transform: Optional[ImageOnlyTransform] = None, + _target_transform: Optional[ImageOnlyTransform] = None + ): + self.__input_dir = _input_dir + self.__target_dir = _target_dir + + # Retrieve all data from the specified directory + self.__image_path = list(self.__input_dir.glob('*')) + + self.__input_transform = _input_transform + self.__target_transform = _target_transform + + def __len__(self): + return len(self.__image_path) + + @property + def input_transform(self): + return self.__input_transform + + @property + def target_transform(self): + return self.__target_transform + + @property + def input_name(self): + if not self.__input_name: + raise ValueError("The input is not yet defined, so __input_name is not defined.") + return self.__input_name + + @property + def target_name(self): + if not self.__target_name: + raise ValueError("The target is not yet defined, so __target_name is not defined.") + return self.__target_name + + def __getitem__(self, _idx): + """Retrieve input and target image stain""" + + self.__input_name = self.__image_path[_idx].name + self.__target_name = str(self.__input_name).replace("CH0", "CH2").replace("dapi", "gold") + input_image = np.array(Image.open(self.__input_dir / self.__input_name).convert('I;16')) + target_image = np.array(Image.open(self.__target_dir / self.__target_name).convert('I;16')) + + if self.__input_transform: + input_image = self.__input_transform(image=input_image)["image"] + input_image = torch.from_numpy(input_image).unsqueeze(0).float() + + if self.__target_transform: + target_image = self.__target_transform(image=target_image)["image"] + target_image = torch.from_numpy(target_image).unsqueeze(0).float() + + return input_image, target_image, {"input_name": self.__input_name, + "target_name": self.__target_name + } diff --git a/1.develop_vision_models/evaluate_best_vision_models/evaluate_vision_models.ipynb b/1.develop_vision_models/evaluate_best_vision_models/evaluate_vision_models.ipynb new file mode 100644 index 0000000..929b6e5 --- /dev/null +++ b/1.develop_vision_models/evaluate_best_vision_models/evaluate_vision_models.ipynb @@ -0,0 +1,472 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "jukit_cell_id": "4AA77kNCKm" + }, + "source": [ + "# Evaluate Vision Models\n", + "Here, the best vision models, thus far, are evaluated according to four metrics on both the training and validation datasets:\n", + "L1 Loss, L2 Loss, PSNR, and SSIM" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-01T16:56:24.885320Z", + "iopub.status.busy": "2024-11-01T16:56:24.885224Z", + "iopub.status.idle": "2024-11-01T16:56:26.445305Z", + "shell.execute_reply": "2024-11-01T16:56:26.444935Z" + }, + "jukit_cell_id": "hiLBvSISaw" + }, + "outputs": [], + "source": [ + "import copy\n", + "import pathlib\n", + "import random\n", + "import sys\n", + "from collections import defaultdict\n", + "\n", + "import albumentations as A\n", + "import mlflow\n", + "import numpy as np\n", + "import pandas as pd\n", + "import torch\n", + "from torch.utils.data import random_split" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "jukit_cell_id": "Meri0joHnI" + }, + "source": [ + "## Find the root of the git repo on the host system" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-01T16:56:26.448179Z", + "iopub.status.busy": "2024-11-01T16:56:26.447779Z", + "iopub.status.idle": "2024-11-01T16:56:26.450695Z", + "shell.execute_reply": "2024-11-01T16:56:26.450447Z" + }, + "jukit_cell_id": "Ko16YFvQXV" + }, + "outputs": [], + "source": [ + "# Get the current working directory\n", + "cwd = pathlib.Path.cwd()\n", + "\n", + "if (cwd / \".git\").is_dir():\n", + " root_dir = cwd\n", + "\n", + "else:\n", + " root_dir = None\n", + " for parent in cwd.parents:\n", + " if (parent / \".git\").is_dir():\n", + " root_dir = parent\n", + " break\n", + "\n", + "# Check if a Git root directory was found\n", + "if root_dir is None:\n", + " raise FileNotFoundError(\"No Git root directory found.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "jukit_cell_id": "vGuEzAxPyF" + }, + "source": [ + "## Custom Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-01T16:56:26.452804Z", + "iopub.status.busy": "2024-11-01T16:56:26.452679Z", + "iopub.status.idle": "2024-11-01T16:56:26.465703Z", + "shell.execute_reply": "2024-11-01T16:56:26.465463Z" + }, + "jukit_cell_id": "RTGYNW7JLL" + }, + "outputs": [], + "source": [ + "sys.path.append(str((root_dir / \"1.develop_vision_models\").resolve(strict=True)))\n", + "sys.path.append(str((root_dir / \"1.develop_vision_models/losses\").resolve(strict=True)))\n", + "sys.path.append(str((root_dir / \"1.develop_vision_models/models\").resolve(strict=True)))\n", + "\n", + "from ImageDataset import ImageDataset\n", + "from L1Loss import L1Loss\n", + "from L2Loss import L2Loss\n", + "from PSNR import PSNR\n", + "from SSIM import SSIM\n", + "from transforms.CropNPixels import CropNPixels\n", + "from transforms.MinMaxNormalize import MinMaxNormalize" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "jukit_cell_id": "ZDjx5NhMh5" + }, + "source": [ + "## Set random seeds" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-01T16:56:26.467764Z", + "iopub.status.busy": "2024-11-01T16:56:26.467640Z", + "iopub.status.idle": "2024-11-01T16:56:26.498706Z", + "shell.execute_reply": "2024-11-01T16:56:26.498484Z" + }, + "jukit_cell_id": "goD9xIS6sd" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "random.seed(0)\n", + "np.random.seed(0)\n", + "torch.manual_seed(0)\n", + "\n", + "mlflow.log_param(\"random_seed\", 0)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "jukit_cell_id": "dbCREcNBCC" + }, + "source": [ + "# Inputs" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-01T16:56:26.517905Z", + "iopub.status.busy": "2024-11-01T16:56:26.517780Z", + "iopub.status.idle": "2024-11-01T16:56:26.521598Z", + "shell.execute_reply": "2024-11-01T16:56:26.521352Z" + }, + "jukit_cell_id": "2hSr3QFUwp" + }, + "outputs": [], + "source": [ + "# Nuclei crops path of treated nuclei in the Dapi channel with all original pixel values\n", + "treated_dapi_crops = (root_dir / \"vision_nuclear_speckle_prediction/treated_nuclei_dapi_crops_same_background\").resolve(strict=True)\n", + "\n", + "# Nuclei crops path of nuclei in the Gold channel with all original pixel values\n", + "gold_crops = (root_dir / \"vision_nuclear_speckle_prediction/gold_cropped_nuclei_same_background\").resolve(strict=True)\n", + "\n", + "# Contains model metadata\n", + "model_manifestdf = pd.read_csv(\"model_manifest.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "jukit_cell_id": "SCnVToH5O1" + }, + "source": [ + "# Outputs" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-01T16:56:26.522838Z", + "iopub.status.busy": "2024-11-01T16:56:26.522742Z", + "iopub.status.idle": "2024-11-01T16:56:26.524540Z", + "shell.execute_reply": "2024-11-01T16:56:26.524294Z" + }, + "jukit_cell_id": "tf1RlhXNwo" + }, + "outputs": [], + "source": [ + "metrics_path = pathlib.Path(\"model_metrics\")\n", + "metrics_path.mkdir(parents=True, exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "jukit_cell_id": "5TaDZfsCpl" + }, + "source": [ + "# Evaluate Models" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-01T16:56:26.525590Z", + "iopub.status.busy": "2024-11-01T16:56:26.525497Z", + "iopub.status.idle": "2024-11-01T17:07:23.979777Z", + "shell.execute_reply": "2024-11-01T17:07:23.979283Z" + }, + "jukit_cell_id": "hOpgm7TTzF" + }, + "outputs": [], + "source": [ + "loss_funcs = {\n", + " \"l1_loss\": L1Loss(_metric_name=\"l1_loss\"),\n", + " \"l2_loss\": L2Loss(_metric_name=\"l2_loss\"),\n", + " \"psnr\": PSNR(_metric_name=\"psnr\"),\n", + " \"ssim\": SSIM(_metric_name=\"ssim\")\n", + "}\n", + "\n", + "losses = defaultdict(list)\n", + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "\n", + "# Iterate through best models\n", + "for _, model_metadata in model_manifestdf.iterrows():\n", + "\n", + " if \"fnet\" in model_metadata[\"model_name\"]:\n", + "\n", + " input_transforms = A.Compose([\n", + " MinMaxNormalize(_normalization_factor=(2 ** 16) - 1, _always_apply=True),\n", + " CropNPixels(_pixel_count=1, _always_apply=True)\n", + " ])\n", + "\n", + " else:\n", + "\n", + " input_transforms = A.Compose([\n", + " MinMaxNormalize(_normalization_factor=(2 ** 16) - 1, _always_apply=True),\n", + " ])\n", + "\n", + " target_transforms = copy.deepcopy(input_transforms)\n", + "\n", + " img_dataset = ImageDataset(\n", + " _input_dir=treated_dapi_crops,\n", + " _target_dir=gold_crops,\n", + " _input_transform=input_transforms,\n", + " _target_transform=target_transforms\n", + " )\n", + "\n", + " # Same splitting procedure as in model trainers\n", + " train_size = int(0.7 * len(img_dataset))\n", + " val_size = int(0.15 * len(img_dataset))\n", + " test_size = len(img_dataset) - train_size - val_size\n", + " train_dataset, val_dataset, _ = random_split(img_dataset, [train_size, val_size, test_size])\n", + "\n", + " with torch.no_grad():\n", + "\n", + " generator_model = mlflow.pytorch.load_model(model_metadata[\"model_path\"]).eval().to(device)\n", + " val_metric_counts = defaultdict(float)\n", + " train_metric_counts = defaultdict(float)\n", + "\n", + " for input, target in val_dataset:\n", + " target = target.unsqueeze(0).to(device)\n", + " output = generator_model(input.unsqueeze(0).to(device))\n", + "\n", + " for loss_name, loss_func in loss_funcs.items():\n", + " val_metric_counts[loss_name] += loss_func(_generated_outputs=output, _targets=target)\n", + "\n", + " for input, target in train_dataset:\n", + " target = target.unsqueeze(0).to(device)\n", + " output = generator_model(input.unsqueeze(0).to(device))\n", + "\n", + " for loss_name, loss_func in loss_funcs.items():\n", + " train_metric_counts[loss_name] += loss_func(_generated_outputs=output, _targets=target)\n", + "\n", + " losses[\"model_name\"].append(model_metadata[\"model_name\"])\n", + " losses[\"model_name\"].append(model_metadata[\"model_name\"])\n", + " losses[\"datasplit\"].append(\"training\")\n", + " losses[\"datasplit\"].append(\"validation\")\n", + "\n", + " for loss_name, loss_func in loss_funcs.items():\n", + " losses[loss_name].append(train_metric_counts[loss_name].item() / len(train_dataset))\n", + " losses[loss_name].append(val_metric_counts[loss_name].item() / len(val_dataset))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-01T17:07:23.981844Z", + "iopub.status.busy": "2024-11-01T17:07:23.981693Z", + "iopub.status.idle": "2024-11-01T17:07:23.985134Z", + "shell.execute_reply": "2024-11-01T17:07:23.984807Z" + }, + "jukit_cell_id": "bwF17818lw" + }, + "outputs": [], + "source": [ + "lossdf = pd.DataFrame(losses)\n", + "lossdf.to_csv(metrics_path / \"best_model_metrics.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-01T17:07:23.986451Z", + "iopub.status.busy": "2024-11-01T17:07:23.986257Z", + "iopub.status.idle": "2024-11-01T17:07:23.991650Z", + "shell.execute_reply": "2024-11-01T17:07:23.991319Z" + }, + "jukit_cell_id": "qiKzCTm57t" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
model_namedatasplitl1_lossl2_losspsnrssim
0unconditional_pix2pix_unchanged_backgroundtraining0.0034310.00007344.8386431.648817
1unconditional_pix2pix_unchanged_backgroundvalidation0.0034280.00007244.8333851.656627
2fnet_unchanged_background_standard_scalartraining0.2602010.09239111.0975540.143274
3fnet_unchanged_background_standard_scalarvalidation0.2601210.09223511.1026060.141958
4fnet_unchanged_background_min_max_normalizedtraining0.0049810.00011842.2619891.600845
\n", + "
" + ], + "text/plain": [ + " model_name datasplit l1_loss \\\n", + "0 unconditional_pix2pix_unchanged_background training 0.003431 \n", + "1 unconditional_pix2pix_unchanged_background validation 0.003428 \n", + "2 fnet_unchanged_background_standard_scalar training 0.260201 \n", + "3 fnet_unchanged_background_standard_scalar validation 0.260121 \n", + "4 fnet_unchanged_background_min_max_normalized training 0.004981 \n", + "\n", + " l2_loss psnr ssim \n", + "0 0.000073 44.838643 1.648817 \n", + "1 0.000072 44.833385 1.656627 \n", + "2 0.092391 11.097554 0.143274 \n", + "3 0.092235 11.102606 0.141958 \n", + "4 0.000118 42.261989 1.600845 " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lossdf.head()" + ] + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "python", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.10" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/1.develop_vision_models/evaluate_best_vision_models/model_manifest.csv b/1.develop_vision_models/evaluate_best_vision_models/model_manifest.csv new file mode 100644 index 0000000..32a7ef3 --- /dev/null +++ b/1.develop_vision_models/evaluate_best_vision_models/model_manifest.csv @@ -0,0 +1,5 @@ +trainer_file_name,trainer_class_name,model_name,model_path +Pix2PixTrainer.py,Pix2PixTrainer,unconditional_pix2pix_unchanged_background,file:///home/camo/projects/nuclear_speckles_analysis/mlruns/598485576074396081/d0b972a11464420eb1ca89205b9db0b9/artifacts/discriminator_model +Pix2PixTrainer.py,Pix2PixTrainer,fnet_unchanged_background_standard_scalar,file:///home/camo/projects/nuclear_speckles_analysis/mlruns/598485576074396081/35d00becb7d14716ae1248728ab0b260/artifacts/model +Pix2PixTrainer.py,Pix2PixTrainer,fnet_unchanged_background_min_max_normalized,file:///home/camo/projects/nuclear_speckles_analysis/mlruns/598485576074396081/1e9b1b0690874e568de3744e6ec5c4a5/artifacts/model +WGANGPPix2PixTrainer.py,WGANGPPix2PixTrainer,wgan_unchanged_background,file:///home/camo/projects/nuclear_speckles_analysis/mlruns/598485576074396081/6397b0ee108c4747af7990462377005b/artifacts/generator_model diff --git a/1.develop_vision_models/evaluate_best_vision_models/model_metrics/best_model_metrics.csv b/1.develop_vision_models/evaluate_best_vision_models/model_metrics/best_model_metrics.csv new file mode 100644 index 0000000..f07f34c --- /dev/null +++ b/1.develop_vision_models/evaluate_best_vision_models/model_metrics/best_model_metrics.csv @@ -0,0 +1,9 @@ +,model_name,datasplit,l1_loss,l2_loss,psnr,ssim +0,unconditional_pix2pix_unchanged_background,training,0.0034311376551529812,7.297120127036144e-05,44.838642699166016,1.6488170918191034 +1,unconditional_pix2pix_unchanged_background,validation,0.003428399594877756,7.225046131177918e-05,44.83338485109128,1.6566267236049887 +2,fnet_unchanged_background_standard_scalar,training,0.2602008551885108,0.09239136524004458,11.097554197576372,0.14327376210590126 +3,fnet_unchanged_background_standard_scalar,validation,0.26012092953714655,0.09223497499638816,11.10260573459843,0.14195789074259488 +4,fnet_unchanged_background_min_max_normalized,training,0.00498064472912281,0.00011830444583770664,42.26198854997005,1.6008448932751234 +5,fnet_unchanged_background_min_max_normalized,validation,0.004959837722081085,0.00011675777697791311,42.24481238576497,1.6116350266436406 +6,wgan_unchanged_background,training,0.0034527258571323666,7.283252565255337e-05,44.85177452425932,1.6699889200225775 +7,wgan_unchanged_background,validation,0.0034577849859474116,7.054378570837176e-05,44.81343403935061,1.6787998686297172 diff --git a/1.develop_vision_models/evaluate_best_vision_models/nbconverted/evaluate_vision_models.py b/1.develop_vision_models/evaluate_best_vision_models/nbconverted/evaluate_vision_models.py new file mode 100644 index 0000000..45496a9 --- /dev/null +++ b/1.develop_vision_models/evaluate_best_vision_models/nbconverted/evaluate_vision_models.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python +# coding: utf-8 + +# # Evaluate Vision Models +# Here, the best vision models, thus far, are evaluated according to four metrics on both the training and validation datasets: +# L1 Loss, L2 Loss, PSNR, and SSIM + +# In[1]: + + +import copy +import pathlib +import random +import sys +from collections import defaultdict + +import albumentations as A +import mlflow +import numpy as np +import pandas as pd +import torch +from torch.utils.data import random_split + + +# ## Find the root of the git repo on the host system + +# In[2]: + + +# Get the current working directory +cwd = pathlib.Path.cwd() + +if (cwd / ".git").is_dir(): + root_dir = cwd + +else: + root_dir = None + for parent in cwd.parents: + if (parent / ".git").is_dir(): + root_dir = parent + break + +# Check if a Git root directory was found +if root_dir is None: + raise FileNotFoundError("No Git root directory found.") + + +# ## Custom Imports + +# In[3]: + + +sys.path.append(str((root_dir / "1.develop_vision_models").resolve(strict=True))) +sys.path.append(str((root_dir / "1.develop_vision_models/losses").resolve(strict=True))) +sys.path.append(str((root_dir / "1.develop_vision_models/models").resolve(strict=True))) + +from ImageDataset import ImageDataset +from L1Loss import L1Loss +from L2Loss import L2Loss +from PSNR import PSNR +from SSIM import SSIM +from transforms.CropNPixels import CropNPixels +from transforms.MinMaxNormalize import MinMaxNormalize + + +# ## Set random seeds + +# In[4]: + + +random.seed(0) +np.random.seed(0) +torch.manual_seed(0) + +mlflow.log_param("random_seed", 0) + + +# # Inputs + +# In[5]: + + +# Nuclei crops path of treated nuclei in the Dapi channel with all original pixel values +treated_dapi_crops = (root_dir / "vision_nuclear_speckle_prediction/treated_nuclei_dapi_crops_same_background").resolve(strict=True) + +# Nuclei crops path of nuclei in the Gold channel with all original pixel values +gold_crops = (root_dir / "vision_nuclear_speckle_prediction/gold_cropped_nuclei_same_background").resolve(strict=True) + +# Contains model metadata +model_manifestdf = pd.read_csv("model_manifest.csv") + + +# # Outputs + +# In[6]: + + +metrics_path = pathlib.Path("model_metrics") +metrics_path.mkdir(parents=True, exist_ok=True) + + +# # Evaluate Models + +# In[7]: + + +loss_funcs = { + "l1_loss": L1Loss(_metric_name="l1_loss"), + "l2_loss": L2Loss(_metric_name="l2_loss"), + "psnr": PSNR(_metric_name="psnr"), + "ssim": SSIM(_metric_name="ssim") +} + +losses = defaultdict(list) +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +# Iterate through best models +for _, model_metadata in model_manifestdf.iterrows(): + + if "fnet" in model_metadata["model_name"]: + + input_transforms = A.Compose([ + MinMaxNormalize(_normalization_factor=(2 ** 16) - 1, _always_apply=True), + CropNPixels(_pixel_count=1, _always_apply=True) + ]) + + else: + + input_transforms = A.Compose([ + MinMaxNormalize(_normalization_factor=(2 ** 16) - 1, _always_apply=True), + ]) + + target_transforms = copy.deepcopy(input_transforms) + + img_dataset = ImageDataset( + _input_dir=treated_dapi_crops, + _target_dir=gold_crops, + _input_transform=input_transforms, + _target_transform=target_transforms + ) + + # Same splitting procedure as in model trainers + train_size = int(0.7 * len(img_dataset)) + val_size = int(0.15 * len(img_dataset)) + test_size = len(img_dataset) - train_size - val_size + train_dataset, val_dataset, _ = random_split(img_dataset, [train_size, val_size, test_size]) + + with torch.no_grad(): + + generator_model = mlflow.pytorch.load_model(model_metadata["model_path"]).eval().to(device) + val_metric_counts = defaultdict(float) + train_metric_counts = defaultdict(float) + + for input, target in val_dataset: + target = target.unsqueeze(0).to(device) + output = generator_model(input.unsqueeze(0).to(device)) + + for loss_name, loss_func in loss_funcs.items(): + val_metric_counts[loss_name] += loss_func(_generated_outputs=output, _targets=target) + + for input, target in train_dataset: + target = target.unsqueeze(0).to(device) + output = generator_model(input.unsqueeze(0).to(device)) + + for loss_name, loss_func in loss_funcs.items(): + train_metric_counts[loss_name] += loss_func(_generated_outputs=output, _targets=target) + + losses["model_name"].append(model_metadata["model_name"]) + losses["model_name"].append(model_metadata["model_name"]) + losses["datasplit"].append("training") + losses["datasplit"].append("validation") + + for loss_name, loss_func in loss_funcs.items(): + losses[loss_name].append(train_metric_counts[loss_name].item() / len(train_dataset)) + losses[loss_name].append(val_metric_counts[loss_name].item() / len(val_dataset)) + + +# In[8]: + + +lossdf = pd.DataFrame(losses) +lossdf.to_csv(metrics_path / "best_model_metrics.csv") + + +# In[9]: + + +lossdf.head() + diff --git a/1.develop_vision_models/losses/AbstractLoss.py b/1.develop_vision_models/losses/AbstractLoss.py new file mode 100644 index 0000000..bc82737 --- /dev/null +++ b/1.develop_vision_models/losses/AbstractLoss.py @@ -0,0 +1,13 @@ +import torch.nn as nn +from abc import ABC, abstractmethod + +class AbstractLoss(nn.Module, ABC): + + @property + @abstractmethod + def metric_name(self): + pass + + @abstractmethod + def forward(self, x): + pass diff --git a/1.develop_vision_models/losses/L1Loss.py b/1.develop_vision_models/losses/L1Loss.py new file mode 100644 index 0000000..2dba90c --- /dev/null +++ b/1.develop_vision_models/losses/L1Loss.py @@ -0,0 +1,21 @@ +import torch +from AbstractLoss import AbstractLoss + + +class L1Loss(AbstractLoss): + + def __init__( + self, + _metric_name + ): + super(L1Loss, self).__init__() + + self.__metric_func = torch.nn.L1Loss(reduction="mean") + self.__metric_name = _metric_name + + def forward(self, _generated_outputs: torch.Tensor, _targets: torch.Tensor): + return self.__metric_func(_generated_outputs, _targets) + + @property + def metric_name(self): + return self.__metric_name diff --git a/1.develop_vision_models/losses/L2Loss.py b/1.develop_vision_models/losses/L2Loss.py new file mode 100644 index 0000000..5a24463 --- /dev/null +++ b/1.develop_vision_models/losses/L2Loss.py @@ -0,0 +1,21 @@ +import torch +from AbstractLoss import AbstractLoss + + +class L2Loss(AbstractLoss): + + def __init__( + self, + _metric_name + ): + super(L2Loss, self).__init__() + + self.__metric_func = torch.nn.MSELoss(reduction="mean") + self.__metric_name = _metric_name + + def forward(self, _generated_outputs: torch.Tensor, _targets: torch.Tensor): + return self.__metric_func(_generated_outputs, _targets) + + @property + def metric_name(self): + return self.__metric_name diff --git a/1.develop_vision_models/losses/PSNR.py b/1.develop_vision_models/losses/PSNR.py new file mode 100644 index 0000000..22f4fdc --- /dev/null +++ b/1.develop_vision_models/losses/PSNR.py @@ -0,0 +1,19 @@ +import torch +from AbstractLoss import AbstractLoss + +class PSNR(AbstractLoss): + def __init__(self, _metric_name, _max_pixel_value = 1): + super(PSNR, self).__init__() + + self.__metric_name = _metric_name + self.__max_pixel_value = _max_pixel_value + + def forward(self, _generated_outputs: torch.Tensor, _targets: torch.Tensor): + mse = torch.mean((_generated_outputs - _targets) ** 2, dim=[2, 3]) + psnr = torch.where(mse == 0, torch.tensor(0.0), 10 * torch.log10((self.__max_pixel_value ** 2) / mse)) + + return psnr.mean() + + @property + def metric_name(self): + return self.__metric_name diff --git a/1.develop_vision_models/losses/SSIM.py b/1.develop_vision_models/losses/SSIM.py new file mode 100644 index 0000000..b0bb55e --- /dev/null +++ b/1.develop_vision_models/losses/SSIM.py @@ -0,0 +1,28 @@ +import torch +from AbstractLoss import AbstractLoss + +class SSIM(AbstractLoss): + def __init__(self, _metric_name, _max_pixel_value = 1): + super(SSIM, self).__init__() + + self.__metric_name = _metric_name + self.__max_pixel_value = _max_pixel_value + + def forward(self, _generated_outputs: torch.Tensor, _targets: torch.Tensor): + mu1 = _generated_outputs.mean(dim=[2, 3], keepdim=True) + mu2 = _targets.mean(dim=[2, 3], keepdim=True) + + sigma1_sq = ((_generated_outputs - mu1) ** 2).mean(dim=[2, 3], keepdim=True) + sigma2_sq = ((_targets - mu2) ** 2).mean(dim=[2, 3], keepdim=True) + sigma12 = ((_generated_outputs - mu1) * (_targets - mu2)).mean(dim=[2, 3], keepdim=True) + + C1 = (self.__max_pixel_value * 0.01) ** 2 + C2 = (self.__max_pixel_value * 0.03) ** 2 + + ssim_value = ((2 * mu1 * mu2 + C1) * (2 * sigma12 + C2)) / ((mu1 ** 2 + mu2 ** 2 + C1) * (sigma1_sq ** 2 + sigma2_sq ** 2 + C2)) + + return ssim_value.mean() + + @property + def metric_name(self): + return self.__metric_name From 43a25d8acac97deff3bbea7521a85e0208eae8aa Mon Sep 17 00:00:00 2001 From: Cameron Mattson Date: Fri, 1 Nov 2024 13:01:50 -0600 Subject: [PATCH 2/2] Removed indices for metric results --- .../evaluate_vision_models.ipynb | 74 +++++++++---------- .../model_metrics/best_model_metrics.csv | 18 ++--- .../nbconverted/evaluate_vision_models.py | 2 +- 3 files changed, 47 insertions(+), 47 deletions(-) diff --git a/1.develop_vision_models/evaluate_best_vision_models/evaluate_vision_models.ipynb b/1.develop_vision_models/evaluate_best_vision_models/evaluate_vision_models.ipynb index 929b6e5..44a17ff 100644 --- a/1.develop_vision_models/evaluate_best_vision_models/evaluate_vision_models.ipynb +++ b/1.develop_vision_models/evaluate_best_vision_models/evaluate_vision_models.ipynb @@ -16,10 +16,10 @@ "execution_count": 1, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T16:56:24.885320Z", - "iopub.status.busy": "2024-11-01T16:56:24.885224Z", - "iopub.status.idle": "2024-11-01T16:56:26.445305Z", - "shell.execute_reply": "2024-11-01T16:56:26.444935Z" + "iopub.execute_input": "2024-11-01T17:18:14.073278Z", + "iopub.status.busy": "2024-11-01T17:18:14.073182Z", + "iopub.status.idle": "2024-11-01T17:18:15.635993Z", + "shell.execute_reply": "2024-11-01T17:18:15.635626Z" }, "jukit_cell_id": "hiLBvSISaw" }, @@ -53,10 +53,10 @@ "execution_count": 2, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T16:56:26.448179Z", - "iopub.status.busy": "2024-11-01T16:56:26.447779Z", - "iopub.status.idle": "2024-11-01T16:56:26.450695Z", - "shell.execute_reply": "2024-11-01T16:56:26.450447Z" + "iopub.execute_input": "2024-11-01T17:18:15.639215Z", + "iopub.status.busy": "2024-11-01T17:18:15.638823Z", + "iopub.status.idle": "2024-11-01T17:18:15.641853Z", + "shell.execute_reply": "2024-11-01T17:18:15.641539Z" }, "jukit_cell_id": "Ko16YFvQXV" }, @@ -94,10 +94,10 @@ "execution_count": 3, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T16:56:26.452804Z", - "iopub.status.busy": "2024-11-01T16:56:26.452679Z", - "iopub.status.idle": "2024-11-01T16:56:26.465703Z", - "shell.execute_reply": "2024-11-01T16:56:26.465463Z" + "iopub.execute_input": "2024-11-01T17:18:15.643983Z", + "iopub.status.busy": "2024-11-01T17:18:15.643856Z", + "iopub.status.idle": "2024-11-01T17:18:15.657361Z", + "shell.execute_reply": "2024-11-01T17:18:15.657048Z" }, "jukit_cell_id": "RTGYNW7JLL" }, @@ -130,10 +130,10 @@ "execution_count": 4, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T16:56:26.467764Z", - "iopub.status.busy": "2024-11-01T16:56:26.467640Z", - "iopub.status.idle": "2024-11-01T16:56:26.498706Z", - "shell.execute_reply": "2024-11-01T16:56:26.498484Z" + "iopub.execute_input": "2024-11-01T17:18:15.659320Z", + "iopub.status.busy": "2024-11-01T17:18:15.659222Z", + "iopub.status.idle": "2024-11-01T17:18:15.690180Z", + "shell.execute_reply": "2024-11-01T17:18:15.689807Z" }, "jukit_cell_id": "goD9xIS6sd" }, @@ -171,10 +171,10 @@ "execution_count": 5, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T16:56:26.517905Z", - "iopub.status.busy": "2024-11-01T16:56:26.517780Z", - "iopub.status.idle": "2024-11-01T16:56:26.521598Z", - "shell.execute_reply": "2024-11-01T16:56:26.521352Z" + "iopub.execute_input": "2024-11-01T17:18:15.709328Z", + "iopub.status.busy": "2024-11-01T17:18:15.709191Z", + "iopub.status.idle": "2024-11-01T17:18:15.713552Z", + "shell.execute_reply": "2024-11-01T17:18:15.713245Z" }, "jukit_cell_id": "2hSr3QFUwp" }, @@ -204,10 +204,10 @@ "execution_count": 6, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T16:56:26.522838Z", - "iopub.status.busy": "2024-11-01T16:56:26.522742Z", - "iopub.status.idle": "2024-11-01T16:56:26.524540Z", - "shell.execute_reply": "2024-11-01T16:56:26.524294Z" + "iopub.execute_input": "2024-11-01T17:18:15.714917Z", + "iopub.status.busy": "2024-11-01T17:18:15.714710Z", + "iopub.status.idle": "2024-11-01T17:18:15.716579Z", + "shell.execute_reply": "2024-11-01T17:18:15.716275Z" }, "jukit_cell_id": "tf1RlhXNwo" }, @@ -231,10 +231,10 @@ "execution_count": 7, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T16:56:26.525590Z", - "iopub.status.busy": "2024-11-01T16:56:26.525497Z", - "iopub.status.idle": "2024-11-01T17:07:23.979777Z", - "shell.execute_reply": "2024-11-01T17:07:23.979283Z" + "iopub.execute_input": "2024-11-01T17:18:15.717713Z", + "iopub.status.busy": "2024-11-01T17:18:15.717557Z", + "iopub.status.idle": "2024-11-01T17:29:20.709704Z", + "shell.execute_reply": "2024-11-01T17:29:20.709321Z" }, "jukit_cell_id": "hOpgm7TTzF" }, @@ -316,17 +316,17 @@ "execution_count": 8, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T17:07:23.981844Z", - "iopub.status.busy": "2024-11-01T17:07:23.981693Z", - "iopub.status.idle": "2024-11-01T17:07:23.985134Z", - "shell.execute_reply": "2024-11-01T17:07:23.984807Z" + "iopub.execute_input": "2024-11-01T17:29:20.712038Z", + "iopub.status.busy": "2024-11-01T17:29:20.711891Z", + "iopub.status.idle": "2024-11-01T17:29:20.715391Z", + "shell.execute_reply": "2024-11-01T17:29:20.715132Z" }, "jukit_cell_id": "bwF17818lw" }, "outputs": [], "source": [ "lossdf = pd.DataFrame(losses)\n", - "lossdf.to_csv(metrics_path / \"best_model_metrics.csv\")" + "lossdf.to_csv(metrics_path / \"best_model_metrics.csv\", index=False)" ] }, { @@ -334,10 +334,10 @@ "execution_count": 9, "metadata": { "execution": { - "iopub.execute_input": "2024-11-01T17:07:23.986451Z", - "iopub.status.busy": "2024-11-01T17:07:23.986257Z", - "iopub.status.idle": "2024-11-01T17:07:23.991650Z", - "shell.execute_reply": "2024-11-01T17:07:23.991319Z" + "iopub.execute_input": "2024-11-01T17:29:20.717172Z", + "iopub.status.busy": "2024-11-01T17:29:20.717048Z", + "iopub.status.idle": "2024-11-01T17:29:20.722600Z", + "shell.execute_reply": "2024-11-01T17:29:20.722359Z" }, "jukit_cell_id": "qiKzCTm57t" }, diff --git a/1.develop_vision_models/evaluate_best_vision_models/model_metrics/best_model_metrics.csv b/1.develop_vision_models/evaluate_best_vision_models/model_metrics/best_model_metrics.csv index f07f34c..4d9e146 100644 --- a/1.develop_vision_models/evaluate_best_vision_models/model_metrics/best_model_metrics.csv +++ b/1.develop_vision_models/evaluate_best_vision_models/model_metrics/best_model_metrics.csv @@ -1,9 +1,9 @@ -,model_name,datasplit,l1_loss,l2_loss,psnr,ssim -0,unconditional_pix2pix_unchanged_background,training,0.0034311376551529812,7.297120127036144e-05,44.838642699166016,1.6488170918191034 -1,unconditional_pix2pix_unchanged_background,validation,0.003428399594877756,7.225046131177918e-05,44.83338485109128,1.6566267236049887 -2,fnet_unchanged_background_standard_scalar,training,0.2602008551885108,0.09239136524004458,11.097554197576372,0.14327376210590126 -3,fnet_unchanged_background_standard_scalar,validation,0.26012092953714655,0.09223497499638816,11.10260573459843,0.14195789074259488 -4,fnet_unchanged_background_min_max_normalized,training,0.00498064472912281,0.00011830444583770664,42.26198854997005,1.6008448932751234 -5,fnet_unchanged_background_min_max_normalized,validation,0.004959837722081085,0.00011675777697791311,42.24481238576497,1.6116350266436406 -6,wgan_unchanged_background,training,0.0034527258571323666,7.283252565255337e-05,44.85177452425932,1.6699889200225775 -7,wgan_unchanged_background,validation,0.0034577849859474116,7.054378570837176e-05,44.81343403935061,1.6787998686297172 +model_name,datasplit,l1_loss,l2_loss,psnr,ssim +unconditional_pix2pix_unchanged_background,training,0.0034311376551529812,7.297120127036144e-05,44.838642699166016,1.6488170918191034 +unconditional_pix2pix_unchanged_background,validation,0.003428399594877756,7.225046131177918e-05,44.83338485109128,1.6566267236049887 +fnet_unchanged_background_standard_scalar,training,0.2602008551885108,0.09239136524004458,11.097554197576372,0.14327376210590126 +fnet_unchanged_background_standard_scalar,validation,0.26012092953714655,0.09223497499638816,11.10260573459843,0.14195789074259488 +fnet_unchanged_background_min_max_normalized,training,0.00498064472912281,0.00011830444583770664,42.26198854997005,1.6008448932751234 +fnet_unchanged_background_min_max_normalized,validation,0.004959837722081085,0.00011675777697791311,42.24481238576497,1.6116350266436406 +wgan_unchanged_background,training,0.0034527258571323666,7.283252565255337e-05,44.85177452425932,1.6699889200225775 +wgan_unchanged_background,validation,0.0034577849859474116,7.054378570837176e-05,44.81343403935061,1.6787998686297172 diff --git a/1.develop_vision_models/evaluate_best_vision_models/nbconverted/evaluate_vision_models.py b/1.develop_vision_models/evaluate_best_vision_models/nbconverted/evaluate_vision_models.py index 45496a9..a041301 100644 --- a/1.develop_vision_models/evaluate_best_vision_models/nbconverted/evaluate_vision_models.py +++ b/1.develop_vision_models/evaluate_best_vision_models/nbconverted/evaluate_vision_models.py @@ -179,7 +179,7 @@ lossdf = pd.DataFrame(losses) -lossdf.to_csv(metrics_path / "best_model_metrics.csv") +lossdf.to_csv(metrics_path / "best_model_metrics.csv", index=False) # In[9]: