From 6e2ce390dcd22e6d0194dd1c9310e96950b706c5 Mon Sep 17 00:00:00 2001
From: Oliver King <11923466+OllyK@users.noreply.github.com>
Date: Mon, 30 Sep 2024 16:58:23 +0100
Subject: [PATCH 1/7] fix: changes to catalog_splitter.py to utilise less RAM
 for Colab use

---
 examples/lotssdr2/catalog_splitter.py | 38 +++++++++++++--------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/examples/lotssdr2/catalog_splitter.py b/examples/lotssdr2/catalog_splitter.py
index fbf836c..ff1ce4e 100644
--- a/examples/lotssdr2/catalog_splitter.py
+++ b/examples/lotssdr2/catalog_splitter.py
@@ -8,9 +8,8 @@
 
 def load_catalogue(catalogue_path):
     """Load the FITS catalogue using astropy Table and convert to a pandas DataFrame."""
-    table = Table.read(catalogue_path)
-    df = table.to_pandas()
-    return df
+    table = Table.read(catalogue_path, memmap=True)
+    return table
 
 
 def get_image_wcs_and_data(image_path):
@@ -21,21 +20,18 @@ def get_image_wcs_and_data(image_path):
     return wcs, image_data
 
 
-def save_sub_catalogue(df, output_path, overwrite=True):
+def save_sub_catalogue(table, output_path, overwrite=True):
     """Save the sub-catalogue from a pandas DataFrame to a new FITS file using astropy Table."""
-    table = Table.from_pandas(df)
     table.write(output_path, format="fits", overwrite=overwrite)
 
 
-def filter_objects_by_central_pixel(df, wcs, image_data):
+def filter_objects_by_central_pixel(table, wcs, image_data):
     """Filter objects within the RA and DEC boundaries and valid data regions using pandas."""
-    df = df.dropna(subset=["RA", "DEC"])
-
     # Convert RA and DEC to pixel coordinates
-    x, y = wcs.wcs_world2pix(df["RA"].values, df["DEC"].values, 0)
+    x, y = wcs.wcs_world2pix(table["RA"], table["DEC"], 0)
 
     # Initialize mask with all True values
-    valid_mask = np.ones(len(df), dtype=bool)
+    valid_mask = np.ones(len(table), dtype=bool)
 
     # Check bounds
     valid_mask &= (
@@ -49,21 +45,25 @@ def filter_objects_by_central_pixel(df, wcs, image_data):
     )
 
     # Filter DataFrame
-    valid_df = df[valid_mask]
-    print(f"STRONG FILTER: samples: {valid_df.shape[0]}")
+    valid_table = table[valid_mask]
+    print(f"STRONG FILTER: samples: {len(valid_table)}")
 
-    return valid_df
+    return valid_table
 
 
-def filter_by_mosaic_id(df, image_path):
-    df = df.dropna(subset=["RA", "DEC"])
+def filter_by_mosaic_id(table, image_path):
+    for col in [table["RA"], table["DEC"]]:
+        has_nan = np.zeros(len(table), dtype=bool)
+        if col.info.dtype.kind == "f":
+            has_nan |= np.isnan(col)
+        table = table[~has_nan]
     field_name = os.path.dirname(image_path).split("/")[-1]
-    original_sample_count = df.shape[0]
-    df = df.loc[df["Mosaic_ID"] == field_name.encode("UTF-8")]
+    original_sample_count = len(table)
+    table = table[table["Mosaic_ID"] == field_name.encode("UTF-8")]
     print(
-        f"SIMPLE FILTER: field_name: {field_name}; samples: {df.shape[0]}; original_sample_count: {original_sample_count}; sample_estimate: {int(1/841*original_sample_count)}"
+        f"SIMPLE FILTER: field_name: {field_name}; samples: {len(table)}; original_sample_count: {original_sample_count}; sample_estimate: {int(1/841*original_sample_count)}"
     )
-    return df
+    return table
 
 
 def main(catalogue_path, image_paths):

From b327a68aaaa824e42bb765fd1be8293028598d6e Mon Sep 17 00:00:00 2001
From: Oliver King <11923466+OllyK@users.noreply.github.com>
Date: Mon, 30 Sep 2024 17:09:02 +0100
Subject: [PATCH 2/7] fix: converts input arg for catalog_splitter to American
 dialect

---
 examples/lotssdr2/catalog_splitter.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/lotssdr2/catalog_splitter.py b/examples/lotssdr2/catalog_splitter.py
index ff1ce4e..cf0e067 100644
--- a/examples/lotssdr2/catalog_splitter.py
+++ b/examples/lotssdr2/catalog_splitter.py
@@ -87,8 +87,8 @@ def main(catalogue_path, image_paths):
     argparser = argparse.ArgumentParser()
     argparser.add_argument(
         "-c",
-        "--catalogue_path",
-        help="Path to the FITS catalogue.",
+        "--catalog_path",
+        help="Path to the FITS catalog.",
         default="data/lotssdr2/combined-release-v1.1-LM_opt_mass.fits",
     )
     argparser.add_argument(
@@ -111,4 +111,4 @@ def main(catalogue_path, image_paths):
     else:
         image_paths = args.image_paths.split(",")
     print(image_paths)
-    main(args.catalogue_path, image_paths)
+    main(args.catalog_path, image_paths)

From 3ac1542efbb2aea355023371aae43928bfd06c93 Mon Sep 17 00:00:00 2001
From: Oliver King <11923466+OllyK@users.noreply.github.com>
Date: Mon, 30 Sep 2024 18:16:30 +0100
Subject: [PATCH 3/7] feat: adds a notebook that can be opened and run on Colab

---
 examples/lotssdr2/Create_LoTTSDataset.ipynb | 264 +++++++++++++++-----
 1 file changed, 203 insertions(+), 61 deletions(-)

diff --git a/examples/lotssdr2/Create_LoTTSDataset.ipynb b/examples/lotssdr2/Create_LoTTSDataset.ipynb
index 057e545..3c48fdb 100644
--- a/examples/lotssdr2/Create_LoTTSDataset.ipynb
+++ b/examples/lotssdr2/Create_LoTTSDataset.ipynb
@@ -1,64 +1,206 @@
 {
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e06b0a43-62e7-48b5-a009-e37e691024c9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from data import LoTTSDataset\n",
-    "from torchvision.transforms import v2\n",
-    "import torch\n",
-    "\n",
-    "transforms = v2.Compose(\n",
-    "    [\n",
-    "        v2.ToImage(),\n",
-    "        v2.ToDtype(torch.float32),\n",
-    "        v2.Resize(size=(64, 64)),\n",
-    "    ]\n",
-    ")\n",
-    "\n",
-    "data = LoTTSDataset(\n",
-    "    data_folder=\"./data/lotssdr2/public\",  # Change this to where you saved your data\n",
-    "    cutout_scaling=1.5,\n",
-    "    transform=transforms,\n",
-    ")\n",
-    "\n",
-    "for i in range(len(data)):\n",
-    "    if i > 10:\n",
-    "        break\n",
-    "    data.plot(\n",
-    "        i,\n",
-    "        contours=True,\n",
-    "        sigma_name=\"Isl_rms\",\n",
-    "        min_sigma=2,\n",
-    "        title=data.df.iloc[i][\"Source_Name\"] + data.df.iloc[i][\"S_Code\"],\n",
-    "    )\n",
-    "\n",
-    "data.df.head()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "authorship_tag": "ABX9TyOOak3TSj8ruaDx439hKsct",
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
   },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/OllyK/Cata2Data/blob/colab/Copy_of_Create_LoTTS_Dataset.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Create a LoTTS Dataset Using Cata2Data"
+      ],
+      "metadata": {
+        "id": "0p1nWoeAcBL1"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "To start, create a local clone of this repository\n",
+        "\n",
+        "Install cata2data into your local environment (We recommend that you should use a venv on your local machine).\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "IewboBsacOya"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "4OjNn-aHGrye"
+      },
+      "outputs": [],
+      "source": [
+        "!git clone https://github.com/mb010/Cata2Data.git && pip install ./Cata2Data && cp Cata2Data/examples/lotssdr2/data.py ."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Download the data\n",
+        "\n",
+        "Use the `data_scrapper.py` script to download the image files. If you want to just download one pointing (instead of all 841 pointings; 434 GB), then call it using the --test flag:"
+      ],
+      "metadata": {
+        "id": "hszOwcQicuL7"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "%%python Cata2Data/examples/lotssdr2/data_scrapper.py --dir downloaded_data/ --test"
+      ],
+      "metadata": {
+        "id": "lAC3ao-_H36q"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "This will have downloaded a .fits image file"
+      ],
+      "metadata": {
+        "id": "nRpXlcpgf3y4"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!ls downloaded_data/public/DR2/mosaics/P000+23/"
+      ],
+      "metadata": {
+        "id": "bEKXzeCNf3ZZ"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Next, you need to download the catalog directly from the website at this link (3.9 GB). This dataloader is currently built to work with the [Radio-optical cross match](https://lofar-surveys.org/dr2_release.html#:~:text=Radio%2Doptical%20crossmatch%20catalogue) catalog described in [Hardcastle et al. 2023](https://arxiv.org/abs/2309.00102)."
+      ],
+      "metadata": {
+        "id": "E-GKT9tMgZmg"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!wget -P downloaded_data/ https://lofar-surveys.org/public/DR2/catalogues/combined-release-v1.1-LM_opt_mass.fits"
+      ],
+      "metadata": {
+        "id": "FH9feKhYgZNg"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Split the Catalogue\n",
+        "\n",
+        "This will take the full catalog and split it into one catalog per image and save those into the folder where each of those images is stored. This is what Cata2Data currently expects - lists of images and catalogs with equal length to use to construct a dataloader."
+      ],
+      "metadata": {
+        "id": "gD-CHCdVh4mk"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "%%python /content/Cata2Data/examples/lotssdr2/catalog_splitter.py --catalog_path downloaded_data/combined-release-v1.1-LM_opt_mass.fits --image_paths downloaded_data/public/DR2/mosaics/P000+23/"
+      ],
+      "metadata": {
+        "id": "DOkJxKa4rFsI"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "## Construct the dataset\n",
+        "\n",
+        "Running the example cell below will construct a dataset from the data that has been downloaded. The LoTTSDataset class is imported from the [data.py file](https://github.com/mb010/Cata2Data/blob/main/examples/lotssdr2/data.py) before being populated with data from the `downloaded_data` directory. We then plot images for the first ten members of the dataset and print the first ten rows of the corresponding dataframe."
+      ],
+      "metadata": {
+        "id": "sISui8RyxNzY"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from data import LoTTSDataset\n",
+        "from torchvision.transforms import v2\n",
+        "import torch\n",
+        "\n",
+        "transforms = v2.Compose(\n",
+        "    [\n",
+        "        v2.ToImage(),\n",
+        "        v2.ToDtype(torch.float32),\n",
+        "        v2.Resize(size=(64, 64)),\n",
+        "    ]\n",
+        ")\n",
+        "\n",
+        "data = LoTTSDataset(\n",
+        "    data_folder=\"downloaded_data\",  # Change this to where you saved your data\n",
+        "    cutout_scaling=1.5,\n",
+        "    transform=transforms,\n",
+        ")\n",
+        "\n",
+        "for i in range(len(data)):\n",
+        "    if i > 10:\n",
+        "        break\n",
+        "    data.plot(\n",
+        "        i,\n",
+        "        contours=True,\n",
+        "        sigma_name=\"Isl_rms\",\n",
+        "        min_sigma=2,\n",
+        "        title=data.df.iloc[i][\"Source_Name\"] + data.df.iloc[i][\"S_Code\"],\n",
+        "    )\n",
+        "\n",
+        "data.df.head(10)"
+      ],
+      "metadata": {
+        "id": "AnbFg6PyVx21"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "HvmK7UdizYEo"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
 }

From 29f0986180704541b63e13a51271e2672af0de9b Mon Sep 17 00:00:00 2001
From: Oliver King <11923466+OllyK@users.noreply.github.com>
Date: Mon, 30 Sep 2024 18:29:28 +0100
Subject: [PATCH 4/7] fix: updates README to reflect Colab notebook

---
 examples/lotssdr2/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/lotssdr2/README.md b/examples/lotssdr2/README.md
index 05941d4..0e0c7d9 100644
--- a/examples/lotssdr2/README.md
+++ b/examples/lotssdr2/README.md
@@ -1,7 +1,7 @@
 # LOTSS DR2 Data Class
 
 This folder contains the utilities to produce a full dataloder for LOTSS DR2 using [Cata2Data](https://github.com/mb010/Cata2Data).
-The dataloader was initially developed for a different project. It serves to highlight how powerfull of a tool [Cata2Data](https://github.com/mb010/Cata2Data) can be.
+The dataloader was initially developed for a different project. It serves to highlight how powerful of a tool [Cata2Data](https://github.com/mb010/Cata2Data) can be.
 
 # Quick walkthrough:
 To start, create a local clone of this repository and navigate to this directory.
@@ -33,7 +33,7 @@ python catalog_splitter.py --catalog_path PATH_TO_THE_FULL_CATALOG --image_paths
 This will take the full catalog and split it into one catalog per image and save those into the folder where each of those images is stored. This is what Cata2Data currently expects - lists of images and catalogs with equal length to use to construct a dataloader.
 
 ## Construct the dataset
-A number of decisions have been made in the selection of sources etc, but in general everything is in [the data.py file](data.py). To run the code below you can install Jupyter to your environment using `pip install notebook` then open a Jupyter notebook using the command `jupyter notebook Create_LoTTSDataset.ipynb`.
+A number of decisions have been made in the selection of sources etc, but in general everything is in [the data.py file](data.py). To run the code below you can run the `Create_LoTTSDataset.ipynb` notebook in Colab.
 
 ```python
 from data import LoTTSDataset

From 86f590d8a22c41602e3b45572c6472b3c9d50df7 Mon Sep 17 00:00:00 2001
From: Oliver King <11923466+OllyK@users.noreply.github.com>
Date: Mon, 30 Sep 2024 18:30:10 +0100
Subject: [PATCH 5/7] fix: updates path to 'open in colab' badge to reflect new
 file location

---
 examples/lotssdr2/Create_LoTTSDataset.ipynb | 156 ++++++++++----------
 1 file changed, 79 insertions(+), 77 deletions(-)

diff --git a/examples/lotssdr2/Create_LoTTSDataset.ipynb b/examples/lotssdr2/Create_LoTTSDataset.ipynb
index 3c48fdb..7917eb1 100644
--- a/examples/lotssdr2/Create_LoTTSDataset.ipynb
+++ b/examples/lotssdr2/Create_LoTTSDataset.ipynb
@@ -1,51 +1,37 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "provenance": [],
-      "authorship_tag": "ABX9TyOOak3TSj8ruaDx439hKsct",
-      "include_colab_link": true
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    }
-  },
   "cells": [
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "view-in-github",
-        "colab_type": "text"
+        "colab_type": "text",
+        "id": "view-in-github"
       },
       "source": [
-        "<a href=\"https://colab.research.google.com/github/OllyK/Cata2Data/blob/colab/Copy_of_Create_LoTTS_Dataset.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+        "<a target=\"_blank\" href=\"https://colab.research.google.com/github/mb010/Cata2Data/blob/main/examples/lotssdr2/Create_LoTTSDataset.ipynb\">\n",
+        "  <img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/>\n",
+        "</a>"
       ]
     },
     {
       "cell_type": "markdown",
-      "source": [
-        "## Create a LoTTS Dataset Using Cata2Data"
-      ],
       "metadata": {
         "id": "0p1nWoeAcBL1"
-      }
+      },
+      "source": [
+        "## Create a LoTTS Dataset Using Cata2Data"
+      ]
     },
     {
       "cell_type": "markdown",
+      "metadata": {
+        "id": "IewboBsacOya"
+      },
       "source": [
         "To start, create a local clone of this repository\n",
         "\n",
         "Install cata2data into your local environment (We recommend that you should use a venv on your local machine).\n",
         "\n"
-      ],
-      "metadata": {
-        "id": "IewboBsacOya"
-      }
+      ]
     },
     {
       "cell_type": "code",
@@ -60,101 +46,106 @@
     },
     {
       "cell_type": "markdown",
+      "metadata": {
+        "id": "hszOwcQicuL7"
+      },
       "source": [
         "## Download the data\n",
         "\n",
         "Use the `data_scrapper.py` script to download the image files. If you want to just download one pointing (instead of all 841 pointings; 434 GB), then call it using the --test flag:"
-      ],
-      "metadata": {
-        "id": "hszOwcQicuL7"
-      }
+      ]
     },
     {
       "cell_type": "code",
-      "source": [
-        "%%python Cata2Data/examples/lotssdr2/data_scrapper.py --dir downloaded_data/ --test"
-      ],
+      "execution_count": null,
       "metadata": {
         "id": "lAC3ao-_H36q"
       },
-      "execution_count": null,
-      "outputs": []
+      "outputs": [],
+      "source": [
+        "%%python Cata2Data/examples/lotssdr2/data_scrapper.py --dir downloaded_data/ --test"
+      ]
     },
     {
       "cell_type": "markdown",
-      "source": [
-        "This will have downloaded a .fits image file"
-      ],
       "metadata": {
         "id": "nRpXlcpgf3y4"
-      }
+      },
+      "source": [
+        "This will have downloaded a .fits image file"
+      ]
     },
     {
       "cell_type": "code",
-      "source": [
-        "!ls downloaded_data/public/DR2/mosaics/P000+23/"
-      ],
+      "execution_count": null,
       "metadata": {
         "id": "bEKXzeCNf3ZZ"
       },
-      "execution_count": null,
-      "outputs": []
+      "outputs": [],
+      "source": [
+        "!ls downloaded_data/public/DR2/mosaics/P000+23/"
+      ]
     },
     {
       "cell_type": "markdown",
-      "source": [
-        "Next, you need to download the catalog directly from the website at this link (3.9 GB). This dataloader is currently built to work with the [Radio-optical cross match](https://lofar-surveys.org/dr2_release.html#:~:text=Radio%2Doptical%20crossmatch%20catalogue) catalog described in [Hardcastle et al. 2023](https://arxiv.org/abs/2309.00102)."
-      ],
       "metadata": {
         "id": "E-GKT9tMgZmg"
-      }
+      },
+      "source": [
+        "Next, you need to download the catalog directly from the website at this link (3.9 GB). This dataloader is currently built to work with the [Radio-optical cross match](https://lofar-surveys.org/dr2_release.html#:~:text=Radio%2Doptical%20crossmatch%20catalogue) catalog described in [Hardcastle et al. 2023](https://arxiv.org/abs/2309.00102)."
+      ]
     },
     {
       "cell_type": "code",
-      "source": [
-        "!wget -P downloaded_data/ https://lofar-surveys.org/public/DR2/catalogues/combined-release-v1.1-LM_opt_mass.fits"
-      ],
+      "execution_count": null,
       "metadata": {
         "id": "FH9feKhYgZNg"
       },
-      "execution_count": null,
-      "outputs": []
+      "outputs": [],
+      "source": [
+        "!wget -P downloaded_data/ https://lofar-surveys.org/public/DR2/catalogues/combined-release-v1.1-LM_opt_mass.fits"
+      ]
     },
     {
       "cell_type": "markdown",
+      "metadata": {
+        "id": "gD-CHCdVh4mk"
+      },
       "source": [
         "## Split the Catalogue\n",
         "\n",
         "This will take the full catalog and split it into one catalog per image and save those into the folder where each of those images is stored. This is what Cata2Data currently expects - lists of images and catalogs with equal length to use to construct a dataloader."
-      ],
-      "metadata": {
-        "id": "gD-CHCdVh4mk"
-      }
+      ]
     },
     {
       "cell_type": "code",
-      "source": [
-        "%%python /content/Cata2Data/examples/lotssdr2/catalog_splitter.py --catalog_path downloaded_data/combined-release-v1.1-LM_opt_mass.fits --image_paths downloaded_data/public/DR2/mosaics/P000+23/"
-      ],
+      "execution_count": null,
       "metadata": {
         "id": "DOkJxKa4rFsI"
       },
-      "execution_count": null,
-      "outputs": []
+      "outputs": [],
+      "source": [
+        "%%python /content/Cata2Data/examples/lotssdr2/catalog_splitter.py --catalog_path downloaded_data/combined-release-v1.1-LM_opt_mass.fits --image_paths downloaded_data/public/DR2/mosaics/P000+23/"
+      ]
     },
     {
       "cell_type": "markdown",
+      "metadata": {
+        "id": "sISui8RyxNzY"
+      },
       "source": [
         "## Construct the dataset\n",
         "\n",
         "Running the example cell below will construct a dataset from the data that has been downloaded. The LoTTSDataset class is imported from the [data.py file](https://github.com/mb010/Cata2Data/blob/main/examples/lotssdr2/data.py) before being populated with data from the `downloaded_data` directory. We then plot images for the first ten members of the dataset and print the first ten rows of the corresponding dataframe."
-      ],
-      "metadata": {
-        "id": "sISui8RyxNzY"
-      }
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "AnbFg6PyVx21"
+      },
+      "outputs": [],
       "source": [
         "from data import LoTTSDataset\n",
         "from torchvision.transforms import v2\n",
@@ -186,21 +177,32 @@
         "    )\n",
         "\n",
         "data.df.head(10)"
-      ],
-      "metadata": {
-        "id": "AnbFg6PyVx21"
-      },
-      "execution_count": null,
-      "outputs": []
+      ]
     },
     {
       "cell_type": "code",
-      "source": [],
+      "execution_count": null,
       "metadata": {
         "id": "HvmK7UdizYEo"
       },
-      "execution_count": null,
-      "outputs": []
+      "outputs": [],
+      "source": []
     }
-  ]
+  ],
+  "metadata": {
+    "colab": {
+      "authorship_tag": "ABX9TyOOak3TSj8ruaDx439hKsct",
+      "include_colab_link": true,
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
 }

From 5a638c25d5b150ad63ac179e82db299e93efbb57 Mon Sep 17 00:00:00 2001
From: Olly King <11923466+OllyK@users.noreply.github.com>
Date: Tue, 1 Oct 2024 15:20:11 +0100
Subject: [PATCH 6/7] Apply suggestions from code review

Co-authored-by: Micah Bowles <micah.bowles@postgrad.manchester.ac.uk>
---
 examples/lotssdr2/Create_LoTTSDataset.ipynb | 6 +++---
 examples/lotssdr2/README.md                 | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/lotssdr2/Create_LoTTSDataset.ipynb b/examples/lotssdr2/Create_LoTTSDataset.ipynb
index 7917eb1..a761048 100644
--- a/examples/lotssdr2/Create_LoTTSDataset.ipynb
+++ b/examples/lotssdr2/Create_LoTTSDataset.ipynb
@@ -29,7 +29,7 @@
       "source": [
         "To start, create a local clone of this repository\n",
         "\n",
-        "Install cata2data into your local environment (We recommend that you should use a venv on your local machine).\n",
+        "Install cata2data into your environment (We recommend that you should use a venv on your local machine).\n",
         "\n"
       ]
     },
@@ -72,7 +72,7 @@
         "id": "nRpXlcpgf3y4"
       },
       "source": [
-        "This will have downloaded a .fits image file"
+        "This will have downloaded a `.fits` image file"
       ]
     },
     {
@@ -92,7 +92,7 @@
         "id": "E-GKT9tMgZmg"
       },
       "source": [
-        "Next, you need to download the catalog directly from the website at this link (3.9 GB). This dataloader is currently built to work with the [Radio-optical cross match](https://lofar-surveys.org/dr2_release.html#:~:text=Radio%2Doptical%20crossmatch%20catalogue) catalog described in [Hardcastle et al. 2023](https://arxiv.org/abs/2309.00102)."
+        "Next, you need to download the catalog directly from the website at this link (3.9 GB). This example dataloader is currently built to work with the [Radio-optical cross match](https://lofar-surveys.org/dr2_release.html#:~:text=Radio%2Doptical%20crossmatch%20catalogue) catalog described in [Hardcastle et al. 2023](https://arxiv.org/abs/2309.00102)."
       ]
     },
     {
diff --git a/examples/lotssdr2/README.md b/examples/lotssdr2/README.md
index 0e0c7d9..66d35ec 100644
--- a/examples/lotssdr2/README.md
+++ b/examples/lotssdr2/README.md
@@ -33,7 +33,7 @@ python catalog_splitter.py --catalog_path PATH_TO_THE_FULL_CATALOG --image_paths
 This will take the full catalog and split it into one catalog per image and save those into the folder where each of those images is stored. This is what Cata2Data currently expects - lists of images and catalogs with equal length to use to construct a dataloader.
 
 ## Construct the dataset
-A number of decisions have been made in the selection of sources etc, but in general everything is in [the data.py file](data.py). To run the code below you can run the `Create_LoTTSDataset.ipynb` notebook in Colab.
+A number of decisions have been made in the selection of sources etc, but in general everything is in [the data.py file](data.py). To run the code below you can run the `Create_LoTTSDataset.ipynb` [notebook in Colab](https://colab.research.google.com/github/mb010/Cata2Data/blob/main/examples/lotssdr2/Create_LoTTSDataset.ipynb).
 
 ```python
 from data import LoTTSDataset

From f20de83cc966016d55b7a8c6f48a2f8bc29defc3 Mon Sep 17 00:00:00 2001
From: Olly King <11923466+OllyK@users.noreply.github.com>
Date: Tue, 1 Oct 2024 15:26:05 +0100
Subject: [PATCH 7/7] Update Create_LoTTSDataset.ipynb

Fixes path to be more general as suggested
---
 examples/lotssdr2/Create_LoTTSDataset.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/lotssdr2/Create_LoTTSDataset.ipynb b/examples/lotssdr2/Create_LoTTSDataset.ipynb
index a761048..817a691 100644
--- a/examples/lotssdr2/Create_LoTTSDataset.ipynb
+++ b/examples/lotssdr2/Create_LoTTSDataset.ipynb
@@ -83,7 +83,7 @@
       },
       "outputs": [],
       "source": [
-        "!ls downloaded_data/public/DR2/mosaics/P000+23/"
+        "!ls downloaded_data/public/DR2/mosaics/*"
       ]
     },
     {