From 6f5cd3a101915f4d8d9e5b30766b334f5b7b347d Mon Sep 17 00:00:00 2001
From: Cameron Mattson <camo@oak.ucdenver.pvt>
Date: Fri, 1 Nov 2024 11:13:36 -0600
Subject: [PATCH 1/2] Added dataset class, losses, and model performances with
 code

---
 1.develop_vision_models/ImageMetaDataset.py   |  71 +++
 .../evaluate_vision_models.ipynb              | 472 ++++++++++++++++++
 .../model_manifest.csv                        |   5 +
 .../model_metrics/best_model_metrics.csv      |   9 +
 .../nbconverted/evaluate_vision_models.py     | 189 +++++++
 .../losses/AbstractLoss.py                    |  13 +
 1.develop_vision_models/losses/L1Loss.py      |  21 +
 1.develop_vision_models/losses/L2Loss.py      |  21 +
 1.develop_vision_models/losses/PSNR.py        |  19 +
 1.develop_vision_models/losses/SSIM.py        |  28 ++
 10 files changed, 848 insertions(+)
 create mode 100644 1.develop_vision_models/ImageMetaDataset.py
 create mode 100644 1.develop_vision_models/evaluate_best_vision_models/evaluate_vision_models.ipynb
 create mode 100644 1.develop_vision_models/evaluate_best_vision_models/model_manifest.csv
 create mode 100644 1.develop_vision_models/evaluate_best_vision_models/model_metrics/best_model_metrics.csv
 create mode 100644 1.develop_vision_models/evaluate_best_vision_models/nbconverted/evaluate_vision_models.py
 create mode 100644 1.develop_vision_models/losses/AbstractLoss.py
 create mode 100644 1.develop_vision_models/losses/L1Loss.py
 create mode 100644 1.develop_vision_models/losses/L2Loss.py
 create mode 100644 1.develop_vision_models/losses/PSNR.py
 create mode 100644 1.develop_vision_models/losses/SSIM.py

diff --git a/1.develop_vision_models/ImageMetaDataset.py b/1.develop_vision_models/ImageMetaDataset.py
new file mode 100644
index 0000000..db35a0d
--- /dev/null
+++ b/1.develop_vision_models/ImageMetaDataset.py
@@ -0,0 +1,71 @@
+import pathlib
+from typing import Optional
+
+import numpy as np
+import torch
+from albumentations import ImageOnlyTransform
+from PIL import Image
+from torch.utils.data import Dataset
+
+
+class ImageMetaDataset(Dataset):
+    """Iterable Image Dataset for Stained Images, which supports applying transformations to the inputs and targets"""
+
+    def __init__(
+        self,
+        _input_dir: pathlib.Path,
+        _target_dir: pathlib.Path,
+        _input_transform: Optional[ImageOnlyTransform] = None,
+        _target_transform: Optional[ImageOnlyTransform] = None
+    ):
+        self.__input_dir = _input_dir
+        self.__target_dir = _target_dir
+
+        # Retrieve all data from the specified directory
+        self.__image_path = list(self.__input_dir.glob('*'))
+
+        self.__input_transform = _input_transform
+        self.__target_transform = _target_transform
+
+    def __len__(self):
+        return len(self.__image_path)
+
+    @property
+    def input_transform(self):
+        return self.__input_transform
+
+    @property
+    def target_transform(self):
+        return self.__target_transform
+
+    @property
+    def input_name(self):
+        if not self.__input_name:
+            raise ValueError("The input is not yet defined, so __input_name is not defined.")
+        return self.__input_name
+
+    @property
+    def target_name(self):
+        if not self.__target_name:
+            raise ValueError("The target is not yet defined, so __target_name is not defined.")
+        return self.__target_name
+
+    def __getitem__(self, _idx):
+        """Retrieve input and target image stain"""
+
+        self.__input_name = self.__image_path[_idx].name
+        self.__target_name = str(self.__input_name).replace("CH0", "CH2").replace("dapi", "gold")
+        input_image = np.array(Image.open(self.__input_dir / self.__input_name).convert('I;16'))
+        target_image = np.array(Image.open(self.__target_dir / self.__target_name).convert('I;16'))
+
+        if self.__input_transform:
+            input_image = self.__input_transform(image=input_image)["image"]
+            input_image = torch.from_numpy(input_image).unsqueeze(0).float()
+
+        if self.__target_transform:
+            target_image = self.__target_transform(image=target_image)["image"]
+            target_image = torch.from_numpy(target_image).unsqueeze(0).float()
+
+        return input_image, target_image, {"input_name": self.__input_name,
+                                           "target_name": self.__target_name
+                                           }
diff --git a/1.develop_vision_models/evaluate_best_vision_models/evaluate_vision_models.ipynb b/1.develop_vision_models/evaluate_best_vision_models/evaluate_vision_models.ipynb
new file mode 100644
index 0000000..929b6e5
--- /dev/null
+++ b/1.develop_vision_models/evaluate_best_vision_models/evaluate_vision_models.ipynb
@@ -0,0 +1,472 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "jukit_cell_id": "4AA77kNCKm"
+   },
+   "source": [
+    "# Evaluate Vision Models\n",
+    "Here, the best vision models, thus far, are evaluated according to four metrics on both the training and validation datasets:\n",
+    "L1 Loss, L2 Loss, PSNR, and SSIM"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-01T16:56:24.885320Z",
+     "iopub.status.busy": "2024-11-01T16:56:24.885224Z",
+     "iopub.status.idle": "2024-11-01T16:56:26.445305Z",
+     "shell.execute_reply": "2024-11-01T16:56:26.444935Z"
+    },
+    "jukit_cell_id": "hiLBvSISaw"
+   },
+   "outputs": [],
+   "source": [
+    "import copy\n",
+    "import pathlib\n",
+    "import random\n",
+    "import sys\n",
+    "from collections import defaultdict\n",
+    "\n",
+    "import albumentations as A\n",
+    "import mlflow\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import torch\n",
+    "from torch.utils.data import random_split"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "jukit_cell_id": "Meri0joHnI"
+   },
+   "source": [
+    "## Find the root of the git repo on the host system"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-01T16:56:26.448179Z",
+     "iopub.status.busy": "2024-11-01T16:56:26.447779Z",
+     "iopub.status.idle": "2024-11-01T16:56:26.450695Z",
+     "shell.execute_reply": "2024-11-01T16:56:26.450447Z"
+    },
+    "jukit_cell_id": "Ko16YFvQXV"
+   },
+   "outputs": [],
+   "source": [
+    "# Get the current working directory\n",
+    "cwd = pathlib.Path.cwd()\n",
+    "\n",
+    "if (cwd / \".git\").is_dir():\n",
+    "    root_dir = cwd\n",
+    "\n",
+    "else:\n",
+    "    root_dir = None\n",
+    "    for parent in cwd.parents:\n",
+    "        if (parent / \".git\").is_dir():\n",
+    "            root_dir = parent\n",
+    "            break\n",
+    "\n",
+    "# Check if a Git root directory was found\n",
+    "if root_dir is None:\n",
+    "    raise FileNotFoundError(\"No Git root directory found.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "jukit_cell_id": "vGuEzAxPyF"
+   },
+   "source": [
+    "## Custom Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-01T16:56:26.452804Z",
+     "iopub.status.busy": "2024-11-01T16:56:26.452679Z",
+     "iopub.status.idle": "2024-11-01T16:56:26.465703Z",
+     "shell.execute_reply": "2024-11-01T16:56:26.465463Z"
+    },
+    "jukit_cell_id": "RTGYNW7JLL"
+   },
+   "outputs": [],
+   "source": [
+    "sys.path.append(str((root_dir / \"1.develop_vision_models\").resolve(strict=True)))\n",
+    "sys.path.append(str((root_dir / \"1.develop_vision_models/losses\").resolve(strict=True)))\n",
+    "sys.path.append(str((root_dir / \"1.develop_vision_models/models\").resolve(strict=True)))\n",
+    "\n",
+    "from ImageDataset import ImageDataset\n",
+    "from L1Loss import L1Loss\n",
+    "from L2Loss import L2Loss\n",
+    "from PSNR import PSNR\n",
+    "from SSIM import SSIM\n",
+    "from transforms.CropNPixels import CropNPixels\n",
+    "from transforms.MinMaxNormalize import MinMaxNormalize"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "jukit_cell_id": "ZDjx5NhMh5"
+   },
+   "source": [
+    "## Set random seeds"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-01T16:56:26.467764Z",
+     "iopub.status.busy": "2024-11-01T16:56:26.467640Z",
+     "iopub.status.idle": "2024-11-01T16:56:26.498706Z",
+     "shell.execute_reply": "2024-11-01T16:56:26.498484Z"
+    },
+    "jukit_cell_id": "goD9xIS6sd"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "random.seed(0)\n",
+    "np.random.seed(0)\n",
+    "torch.manual_seed(0)\n",
+    "\n",
+    "mlflow.log_param(\"random_seed\", 0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "jukit_cell_id": "dbCREcNBCC"
+   },
+   "source": [
+    "# Inputs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-01T16:56:26.517905Z",
+     "iopub.status.busy": "2024-11-01T16:56:26.517780Z",
+     "iopub.status.idle": "2024-11-01T16:56:26.521598Z",
+     "shell.execute_reply": "2024-11-01T16:56:26.521352Z"
+    },
+    "jukit_cell_id": "2hSr3QFUwp"
+   },
+   "outputs": [],
+   "source": [
+    "# Nuclei crops path of treated nuclei in the Dapi channel with all original pixel values\n",
+    "treated_dapi_crops = (root_dir / \"vision_nuclear_speckle_prediction/treated_nuclei_dapi_crops_same_background\").resolve(strict=True)\n",
+    "\n",
+    "# Nuclei crops path of nuclei in the Gold channel with all original pixel values\n",
+    "gold_crops = (root_dir / \"vision_nuclear_speckle_prediction/gold_cropped_nuclei_same_background\").resolve(strict=True)\n",
+    "\n",
+    "# Contains model metadata\n",
+    "model_manifestdf = pd.read_csv(\"model_manifest.csv\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "jukit_cell_id": "SCnVToH5O1"
+   },
+   "source": [
+    "# Outputs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-01T16:56:26.522838Z",
+     "iopub.status.busy": "2024-11-01T16:56:26.522742Z",
+     "iopub.status.idle": "2024-11-01T16:56:26.524540Z",
+     "shell.execute_reply": "2024-11-01T16:56:26.524294Z"
+    },
+    "jukit_cell_id": "tf1RlhXNwo"
+   },
+   "outputs": [],
+   "source": [
+    "metrics_path = pathlib.Path(\"model_metrics\")\n",
+    "metrics_path.mkdir(parents=True, exist_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "jukit_cell_id": "5TaDZfsCpl"
+   },
+   "source": [
+    "# Evaluate Models"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-01T16:56:26.525590Z",
+     "iopub.status.busy": "2024-11-01T16:56:26.525497Z",
+     "iopub.status.idle": "2024-11-01T17:07:23.979777Z",
+     "shell.execute_reply": "2024-11-01T17:07:23.979283Z"
+    },
+    "jukit_cell_id": "hOpgm7TTzF"
+   },
+   "outputs": [],
+   "source": [
+    "loss_funcs = {\n",
+    "    \"l1_loss\": L1Loss(_metric_name=\"l1_loss\"),\n",
+    "    \"l2_loss\": L2Loss(_metric_name=\"l2_loss\"),\n",
+    "    \"psnr\": PSNR(_metric_name=\"psnr\"),\n",
+    "    \"ssim\": SSIM(_metric_name=\"ssim\")\n",
+    "}\n",
+    "\n",
+    "losses = defaultdict(list)\n",
+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+    "\n",
+    "# Iterate through best models\n",
+    "for _, model_metadata in model_manifestdf.iterrows():\n",
+    "\n",
+    "    if \"fnet\" in model_metadata[\"model_name\"]:\n",
+    "\n",
+    "        input_transforms = A.Compose([\n",
+    "            MinMaxNormalize(_normalization_factor=(2 ** 16) - 1, _always_apply=True),\n",
+    "            CropNPixels(_pixel_count=1, _always_apply=True)\n",
+    "        ])\n",
+    "\n",
+    "    else:\n",
+    "\n",
+    "        input_transforms = A.Compose([\n",
+    "            MinMaxNormalize(_normalization_factor=(2 ** 16) - 1, _always_apply=True),\n",
+    "        ])\n",
+    "\n",
+    "    target_transforms = copy.deepcopy(input_transforms)\n",
+    "\n",
+    "    img_dataset = ImageDataset(\n",
+    "        _input_dir=treated_dapi_crops,\n",
+    "        _target_dir=gold_crops,\n",
+    "        _input_transform=input_transforms,\n",
+    "        _target_transform=target_transforms\n",
+    "    )\n",
+    "\n",
+    "    # Same splitting procedure as in model trainers\n",
+    "    train_size = int(0.7 * len(img_dataset))\n",
+    "    val_size = int(0.15 * len(img_dataset))\n",
+    "    test_size = len(img_dataset) - train_size - val_size\n",
+    "    train_dataset, val_dataset, _ = random_split(img_dataset, [train_size, val_size, test_size])\n",
+    "\n",
+    "    with torch.no_grad():\n",
+    "\n",
+    "        generator_model = mlflow.pytorch.load_model(model_metadata[\"model_path\"]).eval().to(device)\n",
+    "        val_metric_counts = defaultdict(float)\n",
+    "        train_metric_counts = defaultdict(float)\n",
+    "\n",
+    "        for input, target in val_dataset:\n",
+    "            target = target.unsqueeze(0).to(device)\n",
+    "            output = generator_model(input.unsqueeze(0).to(device))\n",
+    "\n",
+    "            for loss_name, loss_func in loss_funcs.items():\n",
+    "                val_metric_counts[loss_name] += loss_func(_generated_outputs=output, _targets=target)\n",
+    "\n",
+    "        for input, target in train_dataset:\n",
+    "            target = target.unsqueeze(0).to(device)\n",
+    "            output = generator_model(input.unsqueeze(0).to(device))\n",
+    "\n",
+    "            for loss_name, loss_func in loss_funcs.items():\n",
+    "                train_metric_counts[loss_name] += loss_func(_generated_outputs=output, _targets=target)\n",
+    "\n",
+    "        losses[\"model_name\"].append(model_metadata[\"model_name\"])\n",
+    "        losses[\"model_name\"].append(model_metadata[\"model_name\"])\n",
+    "        losses[\"datasplit\"].append(\"training\")\n",
+    "        losses[\"datasplit\"].append(\"validation\")\n",
+    "\n",
+    "        for loss_name, loss_func in loss_funcs.items():\n",
+    "            losses[loss_name].append(train_metric_counts[loss_name].item() / len(train_dataset))\n",
+    "            losses[loss_name].append(val_metric_counts[loss_name].item() / len(val_dataset))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-01T17:07:23.981844Z",
+     "iopub.status.busy": "2024-11-01T17:07:23.981693Z",
+     "iopub.status.idle": "2024-11-01T17:07:23.985134Z",
+     "shell.execute_reply": "2024-11-01T17:07:23.984807Z"
+    },
+    "jukit_cell_id": "bwF17818lw"
+   },
+   "outputs": [],
+   "source": [
+    "lossdf = pd.DataFrame(losses)\n",
+    "lossdf.to_csv(metrics_path / \"best_model_metrics.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2024-11-01T17:07:23.986451Z",
+     "iopub.status.busy": "2024-11-01T17:07:23.986257Z",
+     "iopub.status.idle": "2024-11-01T17:07:23.991650Z",
+     "shell.execute_reply": "2024-11-01T17:07:23.991319Z"
+    },
+    "jukit_cell_id": "qiKzCTm57t"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>model_name</th>\n",
+       "      <th>datasplit</th>\n",
+       "      <th>l1_loss</th>\n",
+       "      <th>l2_loss</th>\n",
+       "      <th>psnr</th>\n",
+       "      <th>ssim</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>unconditional_pix2pix_unchanged_background</td>\n",
+       "      <td>training</td>\n",
+       "      <td>0.003431</td>\n",
+       "      <td>0.000073</td>\n",
+       "      <td>44.838643</td>\n",
+       "      <td>1.648817</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>unconditional_pix2pix_unchanged_background</td>\n",
+       "      <td>validation</td>\n",
+       "      <td>0.003428</td>\n",
+       "      <td>0.000072</td>\n",
+       "      <td>44.833385</td>\n",
+       "      <td>1.656627</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>fnet_unchanged_background_standard_scalar</td>\n",
+       "      <td>training</td>\n",
+       "      <td>0.260201</td>\n",
+       "      <td>0.092391</td>\n",
+       "      <td>11.097554</td>\n",
+       "      <td>0.143274</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>fnet_unchanged_background_standard_scalar</td>\n",
+       "      <td>validation</td>\n",
+       "      <td>0.260121</td>\n",
+       "      <td>0.092235</td>\n",
+       "      <td>11.102606</td>\n",
+       "      <td>0.141958</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>fnet_unchanged_background_min_max_normalized</td>\n",
+       "      <td>training</td>\n",
+       "      <td>0.004981</td>\n",
+       "      <td>0.000118</td>\n",
+       "      <td>42.261989</td>\n",
+       "      <td>1.600845</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                     model_name   datasplit   l1_loss  \\\n",
+       "0    unconditional_pix2pix_unchanged_background    training  0.003431   \n",
+       "1    unconditional_pix2pix_unchanged_background  validation  0.003428   \n",
+       "2     fnet_unchanged_background_standard_scalar    training  0.260201   \n",
+       "3     fnet_unchanged_background_standard_scalar  validation  0.260121   \n",
+       "4  fnet_unchanged_background_min_max_normalized    training  0.004981   \n",
+       "\n",
+       "    l2_loss       psnr      ssim  \n",
+       "0  0.000073  44.838643  1.648817  \n",
+       "1  0.000072  44.833385  1.656627  \n",
+       "2  0.092391  11.097554  0.143274  \n",
+       "3  0.092235  11.102606  0.141958  \n",
+       "4  0.000118  42.261989  1.600845  "
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "lossdf.head()"
+   ]
+  }
+ ],
+ "metadata": {
+  "anaconda-cloud": {},
+  "kernelspec": {
+   "display_name": "python",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/1.develop_vision_models/evaluate_best_vision_models/model_manifest.csv b/1.develop_vision_models/evaluate_best_vision_models/model_manifest.csv
new file mode 100644
index 0000000..32a7ef3
--- /dev/null
+++ b/1.develop_vision_models/evaluate_best_vision_models/model_manifest.csv
@@ -0,0 +1,5 @@
+trainer_file_name,trainer_class_name,model_name,model_path
+Pix2PixTrainer.py,Pix2PixTrainer,unconditional_pix2pix_unchanged_background,file:///home/camo/projects/nuclear_speckles_analysis/mlruns/598485576074396081/d0b972a11464420eb1ca89205b9db0b9/artifacts/discriminator_model
+Pix2PixTrainer.py,Pix2PixTrainer,fnet_unchanged_background_standard_scalar,file:///home/camo/projects/nuclear_speckles_analysis/mlruns/598485576074396081/35d00becb7d14716ae1248728ab0b260/artifacts/model
+Pix2PixTrainer.py,Pix2PixTrainer,fnet_unchanged_background_min_max_normalized,file:///home/camo/projects/nuclear_speckles_analysis/mlruns/598485576074396081/1e9b1b0690874e568de3744e6ec5c4a5/artifacts/model
+WGANGPPix2PixTrainer.py,WGANGPPix2PixTrainer,wgan_unchanged_background,file:///home/camo/projects/nuclear_speckles_analysis/mlruns/598485576074396081/6397b0ee108c4747af7990462377005b/artifacts/generator_model
diff --git a/1.develop_vision_models/evaluate_best_vision_models/model_metrics/best_model_metrics.csv b/1.develop_vision_models/evaluate_best_vision_models/model_metrics/best_model_metrics.csv
new file mode 100644
index 0000000..f07f34c
--- /dev/null
+++ b/1.develop_vision_models/evaluate_best_vision_models/model_metrics/best_model_metrics.csv
@@ -0,0 +1,9 @@
+,model_name,datasplit,l1_loss,l2_loss,psnr,ssim
+0,unconditional_pix2pix_unchanged_background,training,0.0034311376551529812,7.297120127036144e-05,44.838642699166016,1.6488170918191034
+1,unconditional_pix2pix_unchanged_background,validation,0.003428399594877756,7.225046131177918e-05,44.83338485109128,1.6566267236049887
+2,fnet_unchanged_background_standard_scalar,training,0.2602008551885108,0.09239136524004458,11.097554197576372,0.14327376210590126
+3,fnet_unchanged_background_standard_scalar,validation,0.26012092953714655,0.09223497499638816,11.10260573459843,0.14195789074259488
+4,fnet_unchanged_background_min_max_normalized,training,0.00498064472912281,0.00011830444583770664,42.26198854997005,1.6008448932751234
+5,fnet_unchanged_background_min_max_normalized,validation,0.004959837722081085,0.00011675777697791311,42.24481238576497,1.6116350266436406
+6,wgan_unchanged_background,training,0.0034527258571323666,7.283252565255337e-05,44.85177452425932,1.6699889200225775
+7,wgan_unchanged_background,validation,0.0034577849859474116,7.054378570837176e-05,44.81343403935061,1.6787998686297172
diff --git a/1.develop_vision_models/evaluate_best_vision_models/nbconverted/evaluate_vision_models.py b/1.develop_vision_models/evaluate_best_vision_models/nbconverted/evaluate_vision_models.py
new file mode 100644
index 0000000..45496a9
--- /dev/null
+++ b/1.develop_vision_models/evaluate_best_vision_models/nbconverted/evaluate_vision_models.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# # Evaluate Vision Models
+# Here, the best vision models, thus far, are evaluated according to four metrics on both the training and validation datasets:
+# L1 Loss, L2 Loss, PSNR, and SSIM
+
+# In[1]:
+
+
+import copy
+import pathlib
+import random
+import sys
+from collections import defaultdict
+
+import albumentations as A
+import mlflow
+import numpy as np
+import pandas as pd
+import torch
+from torch.utils.data import random_split
+
+
+# ## Find the root of the git repo on the host system
+
+# In[2]:
+
+
+# Get the current working directory
+cwd = pathlib.Path.cwd()
+
+if (cwd / ".git").is_dir():
+    root_dir = cwd
+
+else:
+    root_dir = None
+    for parent in cwd.parents:
+        if (parent / ".git").is_dir():
+            root_dir = parent
+            break
+
+# Check if a Git root directory was found
+if root_dir is None:
+    raise FileNotFoundError("No Git root directory found.")
+
+
+# ## Custom Imports
+
+# In[3]:
+
+
+sys.path.append(str((root_dir / "1.develop_vision_models").resolve(strict=True)))
+sys.path.append(str((root_dir / "1.develop_vision_models/losses").resolve(strict=True)))
+sys.path.append(str((root_dir / "1.develop_vision_models/models").resolve(strict=True)))
+
+from ImageDataset import ImageDataset
+from L1Loss import L1Loss
+from L2Loss import L2Loss
+from PSNR import PSNR
+from SSIM import SSIM
+from transforms.CropNPixels import CropNPixels
+from transforms.MinMaxNormalize import MinMaxNormalize
+
+
+# ## Set random seeds
+
+# In[4]:
+
+
+random.seed(0)
+np.random.seed(0)
+torch.manual_seed(0)
+
+mlflow.log_param("random_seed", 0)
+
+
+# # Inputs
+
+# In[5]:
+
+
+# Nuclei crops path of treated nuclei in the Dapi channel with all original pixel values
+treated_dapi_crops = (root_dir / "vision_nuclear_speckle_prediction/treated_nuclei_dapi_crops_same_background").resolve(strict=True)
+
+# Nuclei crops path of nuclei in the Gold channel with all original pixel values
+gold_crops = (root_dir / "vision_nuclear_speckle_prediction/gold_cropped_nuclei_same_background").resolve(strict=True)
+
+# Contains model metadata
+model_manifestdf = pd.read_csv("model_manifest.csv")
+
+
+# # Outputs
+
+# In[6]:
+
+
+metrics_path = pathlib.Path("model_metrics")
+metrics_path.mkdir(parents=True, exist_ok=True)
+
+
+# # Evaluate Models
+
+# In[7]:
+
+
+loss_funcs = {
+    "l1_loss": L1Loss(_metric_name="l1_loss"),
+    "l2_loss": L2Loss(_metric_name="l2_loss"),
+    "psnr": PSNR(_metric_name="psnr"),
+    "ssim": SSIM(_metric_name="ssim")
+}
+
+losses = defaultdict(list)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# Iterate through best models
+for _, model_metadata in model_manifestdf.iterrows():
+
+    if "fnet" in model_metadata["model_name"]:
+
+        input_transforms = A.Compose([
+            MinMaxNormalize(_normalization_factor=(2 ** 16) - 1, _always_apply=True),
+            CropNPixels(_pixel_count=1, _always_apply=True)
+        ])
+
+    else:
+
+        input_transforms = A.Compose([
+            MinMaxNormalize(_normalization_factor=(2 ** 16) - 1, _always_apply=True),
+        ])
+
+    target_transforms = copy.deepcopy(input_transforms)
+
+    img_dataset = ImageDataset(
+        _input_dir=treated_dapi_crops,
+        _target_dir=gold_crops,
+        _input_transform=input_transforms,
+        _target_transform=target_transforms
+    )
+
+    # Same splitting procedure as in model trainers
+    train_size = int(0.7 * len(img_dataset))
+    val_size = int(0.15 * len(img_dataset))
+    test_size = len(img_dataset) - train_size - val_size
+    train_dataset, val_dataset, _ = random_split(img_dataset, [train_size, val_size, test_size])
+
+    with torch.no_grad():
+
+        generator_model = mlflow.pytorch.load_model(model_metadata["model_path"]).eval().to(device)
+        val_metric_counts = defaultdict(float)
+        train_metric_counts = defaultdict(float)
+
+        for input, target in val_dataset:
+            target = target.unsqueeze(0).to(device)
+            output = generator_model(input.unsqueeze(0).to(device))
+
+            for loss_name, loss_func in loss_funcs.items():
+                val_metric_counts[loss_name] += loss_func(_generated_outputs=output, _targets=target)
+
+        for input, target in train_dataset:
+            target = target.unsqueeze(0).to(device)
+            output = generator_model(input.unsqueeze(0).to(device))
+
+            for loss_name, loss_func in loss_funcs.items():
+                train_metric_counts[loss_name] += loss_func(_generated_outputs=output, _targets=target)
+
+        losses["model_name"].append(model_metadata["model_name"])
+        losses["model_name"].append(model_metadata["model_name"])
+        losses["datasplit"].append("training")
+        losses["datasplit"].append("validation")
+
+        for loss_name, loss_func in loss_funcs.items():
+            losses[loss_name].append(train_metric_counts[loss_name].item() / len(train_dataset))
+            losses[loss_name].append(val_metric_counts[loss_name].item() / len(val_dataset))
+
+
+# In[8]:
+
+
+lossdf = pd.DataFrame(losses)
+lossdf.to_csv(metrics_path / "best_model_metrics.csv")
+
+
+# In[9]:
+
+
+lossdf.head()
+
diff --git a/1.develop_vision_models/losses/AbstractLoss.py b/1.develop_vision_models/losses/AbstractLoss.py
new file mode 100644
index 0000000..bc82737
--- /dev/null
+++ b/1.develop_vision_models/losses/AbstractLoss.py
@@ -0,0 +1,13 @@
+import torch.nn as nn
+from abc import ABC, abstractmethod
+
+class AbstractLoss(nn.Module, ABC):
+
+    @property
+    @abstractmethod
+    def metric_name(self):
+        pass
+
+    @abstractmethod
+    def forward(self, x):
+        pass
diff --git a/1.develop_vision_models/losses/L1Loss.py b/1.develop_vision_models/losses/L1Loss.py
new file mode 100644
index 0000000..2dba90c
--- /dev/null
+++ b/1.develop_vision_models/losses/L1Loss.py
@@ -0,0 +1,21 @@
+import torch
+from AbstractLoss import AbstractLoss
+
+
+class L1Loss(AbstractLoss):
+
+    def __init__(
+        self,
+        _metric_name
+    ):
+        super(L1Loss, self).__init__()
+
+        self.__metric_func = torch.nn.L1Loss(reduction="mean")
+        self.__metric_name = _metric_name
+
+    def forward(self, _generated_outputs: torch.Tensor, _targets: torch.Tensor):
+        return self.__metric_func(_generated_outputs, _targets)
+
+    @property
+    def metric_name(self):
+        return self.__metric_name
diff --git a/1.develop_vision_models/losses/L2Loss.py b/1.develop_vision_models/losses/L2Loss.py
new file mode 100644
index 0000000..5a24463
--- /dev/null
+++ b/1.develop_vision_models/losses/L2Loss.py
@@ -0,0 +1,21 @@
+import torch
+from AbstractLoss import AbstractLoss
+
+
+class L2Loss(AbstractLoss):
+
+    def __init__(
+        self,
+        _metric_name
+    ):
+        super(L2Loss, self).__init__()
+
+        self.__metric_func = torch.nn.MSELoss(reduction="mean")
+        self.__metric_name = _metric_name
+
+    def forward(self, _generated_outputs: torch.Tensor, _targets: torch.Tensor):
+        return self.__metric_func(_generated_outputs, _targets)
+
+    @property
+    def metric_name(self):
+        return self.__metric_name
diff --git a/1.develop_vision_models/losses/PSNR.py b/1.develop_vision_models/losses/PSNR.py
new file mode 100644
index 0000000..22f4fdc
--- /dev/null
+++ b/1.develop_vision_models/losses/PSNR.py
@@ -0,0 +1,19 @@
+import torch
+from AbstractLoss import AbstractLoss
+
+class PSNR(AbstractLoss):
+    def __init__(self, _metric_name, _max_pixel_value = 1):
+        super(PSNR, self).__init__()
+
+        self.__metric_name = _metric_name
+        self.__max_pixel_value = _max_pixel_value
+
+    def forward(self, _generated_outputs: torch.Tensor, _targets: torch.Tensor):
+        mse = torch.mean((_generated_outputs - _targets) ** 2, dim=[2, 3])
+        psnr = torch.where(mse == 0, torch.tensor(0.0), 10 * torch.log10((self.__max_pixel_value ** 2) / mse))
+
+        return psnr.mean()
+
+    @property
+    def metric_name(self):
+        return self.__metric_name
diff --git a/1.develop_vision_models/losses/SSIM.py b/1.develop_vision_models/losses/SSIM.py
new file mode 100644
index 0000000..b0bb55e
--- /dev/null
+++ b/1.develop_vision_models/losses/SSIM.py
@@ -0,0 +1,28 @@
+import torch
+from AbstractLoss import AbstractLoss
+
+class SSIM(AbstractLoss):
+    def __init__(self, _metric_name, _max_pixel_value = 1):
+        super(SSIM, self).__init__()
+
+        self.__metric_name = _metric_name
+        self.__max_pixel_value = _max_pixel_value
+
+    def forward(self, _generated_outputs: torch.Tensor, _targets: torch.Tensor):
+        mu1 = _generated_outputs.mean(dim=[2, 3], keepdim=True)
+        mu2 = _targets.mean(dim=[2, 3], keepdim=True)
+
+        sigma1_sq = ((_generated_outputs - mu1) ** 2).mean(dim=[2, 3], keepdim=True)
+        sigma2_sq = ((_targets - mu2) ** 2).mean(dim=[2, 3], keepdim=True)
+        sigma12 = ((_generated_outputs - mu1) * (_targets - mu2)).mean(dim=[2, 3], keepdim=True)
+
+        C1 = (self.__max_pixel_value * 0.01) ** 2
+        C2 = (self.__max_pixel_value * 0.03) ** 2
+
+        ssim_value = ((2 * mu1 * mu2 + C1) * (2 * sigma12 + C2)) / ((mu1 ** 2 + mu2 ** 2 + C1) * (sigma1_sq ** 2 + sigma2_sq ** 2 + C2))
+
+        return ssim_value.mean()
+
+    @property
+    def metric_name(self):
+        return self.__metric_name

From 43a25d8acac97deff3bbea7521a85e0208eae8aa Mon Sep 17 00:00:00 2001
From: Cameron Mattson <camo@oak.ucdenver.pvt>
Date: Fri, 1 Nov 2024 13:01:50 -0600
Subject: [PATCH 2/2] Removed indices for metric results

---
 .../evaluate_vision_models.ipynb              | 74 +++++++++----------
 .../model_metrics/best_model_metrics.csv      | 18 ++---
 .../nbconverted/evaluate_vision_models.py     |  2 +-
 3 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/1.develop_vision_models/evaluate_best_vision_models/evaluate_vision_models.ipynb b/1.develop_vision_models/evaluate_best_vision_models/evaluate_vision_models.ipynb
index 929b6e5..44a17ff 100644
--- a/1.develop_vision_models/evaluate_best_vision_models/evaluate_vision_models.ipynb
+++ b/1.develop_vision_models/evaluate_best_vision_models/evaluate_vision_models.ipynb
@@ -16,10 +16,10 @@
    "execution_count": 1,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-11-01T16:56:24.885320Z",
-     "iopub.status.busy": "2024-11-01T16:56:24.885224Z",
-     "iopub.status.idle": "2024-11-01T16:56:26.445305Z",
-     "shell.execute_reply": "2024-11-01T16:56:26.444935Z"
+     "iopub.execute_input": "2024-11-01T17:18:14.073278Z",
+     "iopub.status.busy": "2024-11-01T17:18:14.073182Z",
+     "iopub.status.idle": "2024-11-01T17:18:15.635993Z",
+     "shell.execute_reply": "2024-11-01T17:18:15.635626Z"
     },
     "jukit_cell_id": "hiLBvSISaw"
    },
@@ -53,10 +53,10 @@
    "execution_count": 2,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-11-01T16:56:26.448179Z",
-     "iopub.status.busy": "2024-11-01T16:56:26.447779Z",
-     "iopub.status.idle": "2024-11-01T16:56:26.450695Z",
-     "shell.execute_reply": "2024-11-01T16:56:26.450447Z"
+     "iopub.execute_input": "2024-11-01T17:18:15.639215Z",
+     "iopub.status.busy": "2024-11-01T17:18:15.638823Z",
+     "iopub.status.idle": "2024-11-01T17:18:15.641853Z",
+     "shell.execute_reply": "2024-11-01T17:18:15.641539Z"
     },
     "jukit_cell_id": "Ko16YFvQXV"
    },
@@ -94,10 +94,10 @@
    "execution_count": 3,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-11-01T16:56:26.452804Z",
-     "iopub.status.busy": "2024-11-01T16:56:26.452679Z",
-     "iopub.status.idle": "2024-11-01T16:56:26.465703Z",
-     "shell.execute_reply": "2024-11-01T16:56:26.465463Z"
+     "iopub.execute_input": "2024-11-01T17:18:15.643983Z",
+     "iopub.status.busy": "2024-11-01T17:18:15.643856Z",
+     "iopub.status.idle": "2024-11-01T17:18:15.657361Z",
+     "shell.execute_reply": "2024-11-01T17:18:15.657048Z"
     },
     "jukit_cell_id": "RTGYNW7JLL"
    },
@@ -130,10 +130,10 @@
    "execution_count": 4,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-11-01T16:56:26.467764Z",
-     "iopub.status.busy": "2024-11-01T16:56:26.467640Z",
-     "iopub.status.idle": "2024-11-01T16:56:26.498706Z",
-     "shell.execute_reply": "2024-11-01T16:56:26.498484Z"
+     "iopub.execute_input": "2024-11-01T17:18:15.659320Z",
+     "iopub.status.busy": "2024-11-01T17:18:15.659222Z",
+     "iopub.status.idle": "2024-11-01T17:18:15.690180Z",
+     "shell.execute_reply": "2024-11-01T17:18:15.689807Z"
     },
     "jukit_cell_id": "goD9xIS6sd"
    },
@@ -171,10 +171,10 @@
    "execution_count": 5,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-11-01T16:56:26.517905Z",
-     "iopub.status.busy": "2024-11-01T16:56:26.517780Z",
-     "iopub.status.idle": "2024-11-01T16:56:26.521598Z",
-     "shell.execute_reply": "2024-11-01T16:56:26.521352Z"
+     "iopub.execute_input": "2024-11-01T17:18:15.709328Z",
+     "iopub.status.busy": "2024-11-01T17:18:15.709191Z",
+     "iopub.status.idle": "2024-11-01T17:18:15.713552Z",
+     "shell.execute_reply": "2024-11-01T17:18:15.713245Z"
     },
     "jukit_cell_id": "2hSr3QFUwp"
    },
@@ -204,10 +204,10 @@
    "execution_count": 6,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-11-01T16:56:26.522838Z",
-     "iopub.status.busy": "2024-11-01T16:56:26.522742Z",
-     "iopub.status.idle": "2024-11-01T16:56:26.524540Z",
-     "shell.execute_reply": "2024-11-01T16:56:26.524294Z"
+     "iopub.execute_input": "2024-11-01T17:18:15.714917Z",
+     "iopub.status.busy": "2024-11-01T17:18:15.714710Z",
+     "iopub.status.idle": "2024-11-01T17:18:15.716579Z",
+     "shell.execute_reply": "2024-11-01T17:18:15.716275Z"
     },
     "jukit_cell_id": "tf1RlhXNwo"
    },
@@ -231,10 +231,10 @@
    "execution_count": 7,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-11-01T16:56:26.525590Z",
-     "iopub.status.busy": "2024-11-01T16:56:26.525497Z",
-     "iopub.status.idle": "2024-11-01T17:07:23.979777Z",
-     "shell.execute_reply": "2024-11-01T17:07:23.979283Z"
+     "iopub.execute_input": "2024-11-01T17:18:15.717713Z",
+     "iopub.status.busy": "2024-11-01T17:18:15.717557Z",
+     "iopub.status.idle": "2024-11-01T17:29:20.709704Z",
+     "shell.execute_reply": "2024-11-01T17:29:20.709321Z"
     },
     "jukit_cell_id": "hOpgm7TTzF"
    },
@@ -316,17 +316,17 @@
    "execution_count": 8,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-11-01T17:07:23.981844Z",
-     "iopub.status.busy": "2024-11-01T17:07:23.981693Z",
-     "iopub.status.idle": "2024-11-01T17:07:23.985134Z",
-     "shell.execute_reply": "2024-11-01T17:07:23.984807Z"
+     "iopub.execute_input": "2024-11-01T17:29:20.712038Z",
+     "iopub.status.busy": "2024-11-01T17:29:20.711891Z",
+     "iopub.status.idle": "2024-11-01T17:29:20.715391Z",
+     "shell.execute_reply": "2024-11-01T17:29:20.715132Z"
     },
     "jukit_cell_id": "bwF17818lw"
    },
    "outputs": [],
    "source": [
     "lossdf = pd.DataFrame(losses)\n",
-    "lossdf.to_csv(metrics_path / \"best_model_metrics.csv\")"
+    "lossdf.to_csv(metrics_path / \"best_model_metrics.csv\", index=False)"
    ]
   },
   {
@@ -334,10 +334,10 @@
    "execution_count": 9,
    "metadata": {
     "execution": {
-     "iopub.execute_input": "2024-11-01T17:07:23.986451Z",
-     "iopub.status.busy": "2024-11-01T17:07:23.986257Z",
-     "iopub.status.idle": "2024-11-01T17:07:23.991650Z",
-     "shell.execute_reply": "2024-11-01T17:07:23.991319Z"
+     "iopub.execute_input": "2024-11-01T17:29:20.717172Z",
+     "iopub.status.busy": "2024-11-01T17:29:20.717048Z",
+     "iopub.status.idle": "2024-11-01T17:29:20.722600Z",
+     "shell.execute_reply": "2024-11-01T17:29:20.722359Z"
     },
     "jukit_cell_id": "qiKzCTm57t"
    },
diff --git a/1.develop_vision_models/evaluate_best_vision_models/model_metrics/best_model_metrics.csv b/1.develop_vision_models/evaluate_best_vision_models/model_metrics/best_model_metrics.csv
index f07f34c..4d9e146 100644
--- a/1.develop_vision_models/evaluate_best_vision_models/model_metrics/best_model_metrics.csv
+++ b/1.develop_vision_models/evaluate_best_vision_models/model_metrics/best_model_metrics.csv
@@ -1,9 +1,9 @@
-,model_name,datasplit,l1_loss,l2_loss,psnr,ssim
-0,unconditional_pix2pix_unchanged_background,training,0.0034311376551529812,7.297120127036144e-05,44.838642699166016,1.6488170918191034
-1,unconditional_pix2pix_unchanged_background,validation,0.003428399594877756,7.225046131177918e-05,44.83338485109128,1.6566267236049887
-2,fnet_unchanged_background_standard_scalar,training,0.2602008551885108,0.09239136524004458,11.097554197576372,0.14327376210590126
-3,fnet_unchanged_background_standard_scalar,validation,0.26012092953714655,0.09223497499638816,11.10260573459843,0.14195789074259488
-4,fnet_unchanged_background_min_max_normalized,training,0.00498064472912281,0.00011830444583770664,42.26198854997005,1.6008448932751234
-5,fnet_unchanged_background_min_max_normalized,validation,0.004959837722081085,0.00011675777697791311,42.24481238576497,1.6116350266436406
-6,wgan_unchanged_background,training,0.0034527258571323666,7.283252565255337e-05,44.85177452425932,1.6699889200225775
-7,wgan_unchanged_background,validation,0.0034577849859474116,7.054378570837176e-05,44.81343403935061,1.6787998686297172
+model_name,datasplit,l1_loss,l2_loss,psnr,ssim
+unconditional_pix2pix_unchanged_background,training,0.0034311376551529812,7.297120127036144e-05,44.838642699166016,1.6488170918191034
+unconditional_pix2pix_unchanged_background,validation,0.003428399594877756,7.225046131177918e-05,44.83338485109128,1.6566267236049887
+fnet_unchanged_background_standard_scalar,training,0.2602008551885108,0.09239136524004458,11.097554197576372,0.14327376210590126
+fnet_unchanged_background_standard_scalar,validation,0.26012092953714655,0.09223497499638816,11.10260573459843,0.14195789074259488
+fnet_unchanged_background_min_max_normalized,training,0.00498064472912281,0.00011830444583770664,42.26198854997005,1.6008448932751234
+fnet_unchanged_background_min_max_normalized,validation,0.004959837722081085,0.00011675777697791311,42.24481238576497,1.6116350266436406
+wgan_unchanged_background,training,0.0034527258571323666,7.283252565255337e-05,44.85177452425932,1.6699889200225775
+wgan_unchanged_background,validation,0.0034577849859474116,7.054378570837176e-05,44.81343403935061,1.6787998686297172
diff --git a/1.develop_vision_models/evaluate_best_vision_models/nbconverted/evaluate_vision_models.py b/1.develop_vision_models/evaluate_best_vision_models/nbconverted/evaluate_vision_models.py
index 45496a9..a041301 100644
--- a/1.develop_vision_models/evaluate_best_vision_models/nbconverted/evaluate_vision_models.py
+++ b/1.develop_vision_models/evaluate_best_vision_models/nbconverted/evaluate_vision_models.py
@@ -179,7 +179,7 @@
 
 
 lossdf = pd.DataFrame(losses)
-lossdf.to_csv(metrics_path / "best_model_metrics.csv")
+lossdf.to_csv(metrics_path / "best_model_metrics.csv", index=False)
 
 
 # In[9]: