From 5e658b82d8b17375a65080da0d07b70e09e627a3 Mon Sep 17 00:00:00 2001
From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com>
Date: Fri, 15 Mar 2024 18:06:45 +0100
Subject: [PATCH 01/24] typos and broken links

---
 src/preprocessing.py | 32 ++++++++++++--------------------
 1 file changed, 12 insertions(+), 20 deletions(-)
diff --git a/src/preprocessing.py b/src/preprocessing.py
index e5f9ed8..30a0bfa 100644
--- a/src/preprocessing.py
+++ b/src/preprocessing.py
@@ -25,7 +25,6 @@
 # 1. **Organize** your neuroimaging data.
 # 2. **Preprocess** your neuroimaging data.
 # 3. Check the preprocessing **quality**.
-# 4. **Prepare data** by extracting tensors from your preprocessed data.
 
 # %% [markdown]
 # ## Organization of neuroimaging data: the Brain Imaging Data Structure (BIDS)
@@ -128,7 +127,6 @@
 # and especially before training a neural network with that data.
 # - **Registration** helps to standardize the neuroimaging data so that it is 
 # consistent across different subjects, scanners, and imaging modalities. This 
-
 # makes it easier for the deep neural network to learn patterns and make
 # accurate predictions. 
 # - Preprocessing techniques such as **motion correction** and **noise
@@ -171,17 +169,17 @@
 # %% [markdown]
 # This notebook presents three possible preprocessing steps using the [Clinica](https://www.clinica.run/doc/)
 # software: 
-# - `t1-linear`: Affine registration of T1w images to the MNI standard space
-# - `t1-volume`: Volume-based processing of T1-weighted MR images with SPM
+# - `t1-linear`: Affine registration of T1w images to the MNI standard space,
+# - `t1-volume`: Volume-based processing of T1w images with SPM,
 # - `pet-linear`: Spatial normalization to the MNI space and intensity
-# normalization of PET images
+# normalization of PET images.
 
 # %% [markdown]
 # <a id='preprocessing:t1-linear'></a>
 # ## Image preprocessing with the `t1-linear` pipeline
 # For this tutorial, we propose a "minimal preprocessing" (as described in [(Wen
 # et al., 2020)](https://doi.org/10.1016/j.media.2020.101694)) implemented in
-# the [`t1-linear` pipeline](http://www.clinica.run/doc/Pipelines/T1_Linear/)
+# the [`t1-linear` pipeline](https://aramislab.paris.inria.fr/clinica/docs/public/latest/Pipelines/T1_Linear/)
 # using the [ANTs](http://stnava.github.io/ANTs/) software package [(Avants et
 # al., 2014)](https://doi.org/10.3389/fninf.2014.00044). This preprocessing
 # includes:
@@ -212,9 +210,9 @@
 # where:
 
 # - `bids_directory` is the input folder containing the dataset in a
-# [BIDS](http://www.clinica.run/doc/BIDS/) hierarchy,
+# [BIDS](https://aramislab.paris.inria.fr/clinica/docs/public/latest/BIDS/) hierarchy,
 # - `caps_directory` is the output folder containing the results in a
-# [CAPS](http://www.clinica.run/doc/CAPS/) hierarchy.
+# [CAPS](https://aramislab.paris.inria.fr/clinica/docs/public/latest/CAPS/Introduction/) hierarchy.
 
 # %% [markdown]
 # ```{note}
@@ -236,7 +234,7 @@
 # `_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz`. 
 # %% [markdown]
 # (If you failed to obtain the preprocessing using the `t1-linear` pipeline,
-# please uncomment the next cell)
+# please uncomment the next cell).
 # %%
 # # !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/CAPS_example.tar.gz -o CAPS_example.tar.gz
 # # !tar xf CAPS_example.tar.gz
@@ -298,9 +296,9 @@
 # where:
 
 # - `bids_directory` is the input folder containing the dataset in a
-# [BIDS](http://www.clinica.run/doc/BIDS/) hierarchy;
+# [BIDS](https://aramislab.paris.inria.fr/clinica/docs/public/latest/BIDS/) hierarchy;
 # - `caps_director` is the output folder containing the results in a
-# [CAPS](http://www.clinica.run/doc/CAPS/) hierarchy;
+# [CAPS](https://aramislab.paris.inria.fr/clinica/docs/public/latest/CAPS/Introduction/) hierarchy;
 # - `acq_label` is the label given to the PET acquisition, specifying the tracer
 # used (trc-<acq_label>). It can be for instance '18FFDG' for
 # 18F-fluorodeoxyglucose or '18FAV45' for 18F-florbetapir;
@@ -323,14 +321,8 @@
 # ```
 # %% [markdown]
 # ### Run the pipeline
-# Start by downloading a dataset of PET images for 4 subjects from ADNI
-# database.  The dataset was converted to the BIDS specification using `clinica
-# convert adni-to-bids`.
-
-# %%[markdown]
 # Please uncomment the next cells to download a dataset of pet images of 4 
-# subjects from ADNI in a BIDS format (convert to BIDS with `clinica convert 
-# adni-to-bids`)
+# subjects from ADNI in a BIDS format (convert to BIDS with `clinica convert adni-to-bids`).
 
 # %%
 !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_adni/BIDS_example.tar.gz -o adniBids.tar.gz
@@ -403,11 +395,11 @@
 # - `preprocessing` corresponds to the preprocessing pipeline whose outputs will
 # be checked (`t1-linear` or `pet-linear` or `t1-volume`),
 # - `caps_directory` is the folder containing the results of the preprocessing
-# pipeline in a [CAPS](http://www.clinica.run/doc/CAPS/Introduction/) hierarchy,
+# pipeline in a [CAPS](https://aramislab.paris.inria.fr/clinica/docs/public/latest/CAPS/Introduction/) hierarchy,
 # - `output_path` is the path to the output TSV file (or directory for
 # `t1-volume`) containing QC results.
 
-##
+# %% [markdown]
 # ```{note}
 # Quality checks pipelines are all different and depend on the chosen
 # preprocessing. They should not be applied to other preprocessing procedures as

From f0e9f8586ad0a0ffb9d1eb668ccf71e88f6c5b78 Mon Sep 17 00:00:00 2001
From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com>
Date: Mon, 18 Mar 2024 13:25:48 +0100
Subject: [PATCH 02/24] more typos

---
 src/label_extraction.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/label_extraction.py b/src/label_extraction.py
index 24a0043..a39f2fb 100644
--- a/src/label_extraction.py
+++ b/src/label_extraction.py
@@ -124,7 +124,7 @@
 # of the sessions, for now. 
 #
 # The whole preprocessing process has been run for you on these datasets. The
-# results of the [quality check procedure](./preprocessing.ipynb#quality-check-of-your-preprocessed-data) have been used
+# results of the [quality check procedure](./preprocessing.html#quality-check-of-your-preprocessed-data) have been used
 # to filter sessions. `data_oasis/oasis_after_qc.tsv` and `data_adni/adni_after_qc.tsv`
 # store the list of the sessions that have been accepted for each dataset.
 # 
@@ -136,13 +136,13 @@
 #for OASIS-1 dataset
 !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/iotools_output.tar.gz -o iotools_output.tar.gz
 !tar xf iotools_output.tar.gz
-!curl https://raw.githubusercontent.com/aramis-lab/clinicadl_handbook/main/data/oasis_after_qc.tsv  -O data_oasis/oasis_after_qc.tsv
+!curl https://raw.githubusercontent.com/aramis-lab/clinicadl_handbook/main/data/oasis_after_qc.tsv  --output data_oasis/oasis_after_qc.tsv
 
 # %%
 #for the ADNI dataset
 !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_adni/iotools_output.tar.gz -o iotools_output.tar.gz
 !tar xf iotools_output.tar.gz
-!curl https://raw.githubusercontent.com/aramis-lab/clinicadl_handbook/main/data/adni_after_qc.tsv  -O data_adni/adni_after_qc.tsv
+!curl https://raw.githubusercontent.com/aramis-lab/clinicadl_handbook/main/data/adni_after_qc.tsv  --output data_adni/adni_after_qc.tsv
 
 # %% [markdown]
 # ### Get the labels
@@ -151,12 +151,12 @@
 # MCI) can be extracted with ClinicaDL using the command:
 #
 # ```bash
-# clinicadl tsvtools get-labels bids_directory results_tsv
+# clinicadl tsvtools get-labels <bids_directory> <results_tsv>
 # ```
 # where:
 # - `bids_directory` the input folder containing the dataset in a BIDS
 # hierarchy.
-# - `results_path` is the path to the tsv file.
+# - `results_tsv` is the path to the tsv file.
 
 # ```{tip}
 # You can increase the verbosity of the command by adding -v flag(s).

From 5d8bc0b49bb541435e521651a61f3d673229f2d8 Mon Sep 17 00:00:00 2001
From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com>
Date: Mon, 18 Mar 2024 15:00:37 +0100
Subject: [PATCH 03/24] more typos and layout issues

---
 src/label_extraction.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/label_extraction.py b/src/label_extraction.py
index a39f2fb..585c956 100644
--- a/src/label_extraction.py
+++ b/src/label_extraction.py
@@ -318,8 +318,8 @@ def display_table(table_path):
 #   clinicadl tsvtools get-progression [OPTIONS] DATA_TSV
 # ``` 
 # with :
-#  - `DATA_TSV` (str) is the TSV file containing the data (output of clinicadl
-#  tsvtools get-labels|split|kfold).
+#  - `DATA_TSV` (str) is the TSV file containing the data (output of `clinicadl
+#  tsvtools get-labels|split|kfold`).
 #  - `--time_horizon` (int) can be added: It is the time horizon in months that
 #  is used to assess the stability of the MCI subjects. Default value: 36.
 
@@ -376,7 +376,7 @@ def display_table(table_path):
 # ```
 # where:
 # - `data_tsv` is the TSV file with the data that are going to be split
-# (output of `clinicadl tsvtools getlabels|split|kfold`).
+# (output of `clinicadl tsvtools get-labels|split|kfold`).
 #
 # Each diagnosis label is split independently. Random splits are generated 
 # until the differences between age and sex distributions between the test 

From bcc244fe43ea79e39f058a311437f688505012a2 Mon Sep 17 00:00:00 2001
From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com>
Date: Mon, 18 Mar 2024 15:01:00 +0100
Subject: [PATCH 04/24] reduce the size of the test set

---
 src/label_extraction.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/label_extraction.py b/src/label_extraction.py
index 585c956..a4a8c23 100644
--- a/src/label_extraction.py
+++ b/src/label_extraction.py
@@ -392,7 +392,7 @@ def display_table(table_path):
 
 # Let's create a test set including 20 subjects:
 # %% 
-!clinicadl tsvtools split data_oasis/labels.tsv --n_test 20 --subset_name test 
+!clinicadl tsvtools split data_oasis/labels.tsv --n_test 0.2 --subset_name test 
 
 # %% 
 # for Adni dataset

From ff06f79b362daf888246abe3235d914ef65e6892 Mon Sep 17 00:00:00 2001
From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com>
Date: Mon, 18 Mar 2024 15:14:31 +0100
Subject: [PATCH 05/24] typos

---
 src/label_extraction.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/label_extraction.py b/src/label_extraction.py
index a4a8c23..dc34c7c 100644
--- a/src/label_extraction.py
+++ b/src/label_extraction.py
@@ -437,7 +437,7 @@ def display_table(table_path):
 # clinicadl tsvtool kfold <formatted_data_path>
 # ```
 #
-# where `formatted_data_path` is the output tsv file of `clinicadl tsvtool getlabels|split|kfold`.
+# where `formatted_data_path` is the output tsv file of `clinicadl tsvtool get-labels|split|kfold`.
 
 # In a similar way as for the test split, three tsv files are written
 # **per split** for each set:

From b03a884845ceb7f19ae17d0dfaadd714ee8dcd3b Mon Sep 17 00:00:00 2001
From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com>
Date: Mon, 18 Mar 2024 15:17:13 +0100
Subject: [PATCH 06/24] correction on the number of splits for kfold cross
 validation

---
 src/label_extraction.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/label_extraction.py b/src/label_extraction.py
index dc34c7c..78dead6 100644
--- a/src/label_extraction.py
+++ b/src/label_extraction.py
@@ -451,11 +451,11 @@ def display_table(table_path):
 # across the results of the 5 folds already reduces bias compared to a single
 # data split.
 # %%
-!clinicadl tsvtools kfold data_oasis/split/train.tsv --n_splits 4 --subset_name validation
+!clinicadl tsvtools kfold data_oasis/split/train.tsv --n_splits 5 --subset_name validation
 
 # %%
 # for ADNI dataset
-!clinicadl tsvtools kfold data_adni/split/train.tsv --n_splits 4 --subset_name validation
+!clinicadl tsvtools kfold data_adni/split/train.tsv --n_splits 5 --subset_name validation
 # %% [markdown]
 # ### Check the absence of data leakage
 #

From 8d5353414d1286acea99ae8c92b523a589efb889 Mon Sep 17 00:00:00 2001
From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com>
Date: Mon, 18 Mar 2024 15:27:08 +0100
Subject: [PATCH 07/24] correction of bug in data leakage check

---
 src/label_extraction.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/label_extraction.py b/src/label_extraction.py
index 78dead6..e478871 100644
--- a/src/label_extraction.py
+++ b/src/label_extraction.py
@@ -520,7 +520,7 @@ def check_is_independent(train_path_baseline: Path, test_path_baseline: Path):
 
 
 def run_test_suite(data_tsv: Path, n_splits: int):
-    _run_test_suite_no_split(data_tsv) if n_splits == 0 else _run_test_suite_multiple_splits(data_tsv)
+    _run_test_suite_no_split(data_tsv) if n_splits == 0 else _run_test_suite_multiple_splits(data_tsv, n_splits)
 
 
 def _run_test_suite_no_split(data_tsv: Path):
@@ -535,7 +535,7 @@ def _run_test_suite_no_split(data_tsv: Path):
         check_is_independent(train_baseline_tsv, test_baseline_tsv)
 
 
-def _run_test_suite_multiple_splits(data_tsv: Path):
+def _run_test_suite_multiple_splits(data_tsv: Path, n_splits: int):
     for _ in range(n_splits):
         for folder, _, files in os.walk(data_tsv):
             folder = Path(folder)
@@ -554,7 +554,7 @@ def _run_test_suite_multiple_splits(data_tsv: Path):
 run_test_suite(Path("./data_oasis/split"), n_splits=0)
 
 # Run check for train / validation splits
-run_test_suite(Path("./data_oasis/split/4_fold"), n_splits=4)
+run_test_suite(Path("./data_oasis/split/5_fold"), n_splits=5)
 # %% [markdown]
 # If no Error was raised then none of the three conditions was broken. It is now
 # possible to use the train and the validation sets to perform a classification

From 2d2e85043ef89045f5e343e0595a9d34fbc3e390 Mon Sep 17 00:00:00 2001
From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com>
Date: Mon, 18 Mar 2024 15:29:11 +0100
Subject: [PATCH 08/24] typos

---
 src/label_extraction.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/label_extraction.py b/src/label_extraction.py
index e478871..a44e7e2 100644
--- a/src/label_extraction.py
+++ b/src/label_extraction.py
@@ -556,7 +556,7 @@ def _run_test_suite_multiple_splits(data_tsv: Path, n_splits: int):
 # Run check for train / validation splits
 run_test_suite(Path("./data_oasis/split/5_fold"), n_splits=5)
 # %% [markdown]
-# If no Error was raised then none of the three conditions was broken. It is now
+# If no Error was raised, then none of the three conditions was broken. It is now
 # possible to use the train and the validation sets to perform a classification
 # task, and then to evaluate correctly the performance of the classifier on the
 # test set.
@@ -572,6 +572,6 @@ def _run_test_suite_multiple_splits(data_tsv: Path, n_splits: int):
 # </div>
 
 # %% [markdown]
-# Now that you have your train, test and validation split, you can train a 
+# Now that you have your train, test and validation splits, you can train a 
 # network for classification, regression or reconstruction with clinicaDL.
 # %%

From 26f84cbb79a4f01cb99503e073baa5c5c99e877b Mon Sep 17 00:00:00 2001
From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com>
Date: Tue, 19 Mar 2024 09:41:55 +0100
Subject: [PATCH 09/24] typos

---
 src/generate.py         | 20 ++++++++++----------
 src/label_extraction.py |  2 +-
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/generate.py b/src/generate.py
index 3fcfe84..652f9f3 100644
--- a/src/generate.py
+++ b/src/generate.py
@@ -20,22 +20,22 @@
 # # Generate a synthetic dataset
 
 #
-# When looking for new networks architecture to improve the performance of the
-# deep learning tasks implies to tests different sets of hyperparameters. This
-# takes a lot of time and frequently we finish with networks that don't
+# Looking for new network architectures to improve performance on a
+# deep learning task implies testing different sets of hyperparameters. This
+# takes a lot of time and we often end up with networks that don't
 # converge.  To avoid this pitfall, it is often advised to simplify the problem:
-# focus on a subset of data / classification task that is more tractable than
+# focus on a subset of data or a task that is more tractable than
 # the one that is currently explored. This is the purpose of `clinicadl
 # generate` which creates synthetic, tractable data from real data to
 # check that developed networks are working on this simple case before going
 # further.
 #
-# With Clinicadl, you can generate three types of synthetic data sets for a
-# binary classification depending on the option chosen: trivial, random or
-# shepplogan.
+# With ClinicaDL, you can generate three types of synthetic data sets for a
+# binary classification task depending on the option chosen: `trivial`, `random` or
+# `shepplogan`.
 #
 # If you ran the previous notebook, you must have a folder called
-# `CAPS_example` in the data_oasis directory (otherwise uncomment the next cell
+# `CAPS_example` in the `data_oasis` directory (otherwise uncomment the next cell
 # to download a local version of the necessary folders).
 # %%
 # !curl -k https://aramislab.paris.inria.fr/clinicadl/files/data/handbook_2023/data_oasis/CAPS_example.tar.gz -o oasisCaps.tar.gz
@@ -68,8 +68,8 @@
 # where:
 
 # - `caps_directory` is the output folder containing the results in a
-# [CAPS](http://www.clinica.run/doc/CAPS/) hierarchy.
-# - `output_directory` is the folder where the synthetic CAPS is stored.
+# [CAPS](https://aramislab.paris.inria.fr/clinica/docs/public/latest/CAPS/Introduction/) hierarchy,
+# - `output_directory` is the folder where the synthetic CAPS is stored,
 # - `n_subjects` is the number of subjects per label in the synthetic dataset.
 # Default value: 300.
 
diff --git a/src/label_extraction.py b/src/label_extraction.py
index a44e7e2..083b948 100644
--- a/src/label_extraction.py
+++ b/src/label_extraction.py
@@ -573,5 +573,5 @@ def _run_test_suite_multiple_splits(data_tsv: Path, n_splits: int):
 
 # %% [markdown]
 # Now that you have your train, test and validation splits, you can train a 
-# network for classification, regression or reconstruction with clinicaDL.
+# network for classification, regression or reconstruction with ClinicaDL.
 # %%

From b25135062d3ab707a68a3293cb19562001b31fea Mon Sep 17 00:00:00 2001
From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com>
Date: Tue, 19 Mar 2024 09:46:53 +0100
Subject: [PATCH 10/24] remove superfluous line in data leakage check

---
 src/label_extraction.py | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/src/label_extraction.py b/src/label_extraction.py
index 083b948..e07a010 100644
--- a/src/label_extraction.py
+++ b/src/label_extraction.py
@@ -390,7 +390,7 @@ def display_table(table_path):
 # In OASIS there is no longitudinal follow-up, hence the last two TSV files are
 # identical.
 
-# Let's create a test set including 20 subjects:
+# Let's create a test set including 20% of the subjects:
 # %% 
 !clinicadl tsvtools split data_oasis/labels.tsv --n_test 0.2 --subset_name test 
 
@@ -520,7 +520,7 @@ def check_is_independent(train_path_baseline: Path, test_path_baseline: Path):
 
 
 def run_test_suite(data_tsv: Path, n_splits: int):
-    _run_test_suite_no_split(data_tsv) if n_splits == 0 else _run_test_suite_multiple_splits(data_tsv, n_splits)
+    _run_test_suite_no_split(data_tsv) if n_splits == 0 else _run_test_suite_multiple_splits(data_tsv)
 
 
 def _run_test_suite_no_split(data_tsv: Path):
@@ -535,18 +535,17 @@ def _run_test_suite_no_split(data_tsv: Path):
         check_is_independent(train_baseline_tsv, test_baseline_tsv)
 
 
-def _run_test_suite_multiple_splits(data_tsv: Path, n_splits: int):
-    for _ in range(n_splits):
-        for folder, _, files in os.walk(data_tsv):
-            folder = Path(folder)
-            for file in files:
-                if file[-3:] == "tsv":
-                    check_is_subject_unique(folder / file)
-            train_baseline_tsv = folder / "train_baseline.tsv"
-            test_baseline_tsv = folder / "validation_baseline.tsv"
-            if train_baseline_tsv.exists():
-                if test_baseline_tsv.exists():
-                    check_is_independent(train_baseline_tsv, test_baseline_tsv)
+def _run_test_suite_multiple_splits(data_tsv: Path):
+    for folder, _, files in os.walk(data_tsv):
+        folder = Path(folder)
+        for file in files:
+            if file[-3:] == "tsv":
+                check_is_subject_unique(folder / file)
+        train_baseline_tsv = folder / "train_baseline.tsv"
+        test_baseline_tsv = folder / "validation_baseline.tsv"
+        if train_baseline_tsv.exists():
+            if test_baseline_tsv.exists():
+                check_is_independent(train_baseline_tsv, test_baseline_tsv)
                 
 
 

From c4fa99c0e1317092de23a9ee0cf8b6264ee7b484 Mon Sep 17 00:00:00 2001
From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com>
Date: Tue, 19 Mar 2024 14:38:16 +0100
Subject: [PATCH 11/24] add missing directory

---
 src/generate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/generate.py b/src/generate.py
index 652f9f3..f831290 100644
--- a/src/generate.py
+++ b/src/generate.py
@@ -98,7 +98,7 @@
 # folder as the BIDS folder.
 # %%
 !mkdir data/fake_bids
-!clinicadl tsvtools get-labels data/fake_bids --missing_mods data/synthetic/missing_mods --merged_tsv data/synthetic/data.tsv --modality synthetic
+!clinicadl tsvtools get-labels data/fake_bids data --missing_mods data/synthetic/missing_mods --merged_tsv data/synthetic/data.tsv --modality synthetic
 # %%
 # Split train and test data
 !clinicadl tsvtools split data/labels.tsv --n_test 0.25 --subset_name test

From 5e6cccc1adaefb9794c8f49c9718b991b6b31614 Mon Sep 17 00:00:00 2001
From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com>
Date: Tue, 19 Mar 2024 14:48:10 +0100
Subject: [PATCH 12/24] typos

---
 src/generate.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/generate.py b/src/generate.py
index f831290..de25b10 100644
--- a/src/generate.py
+++ b/src/generate.py
@@ -57,7 +57,7 @@
 # ```{warning}
 # You need to execute the `clinica run` and `clinicadl prepare-data` pipelines
 # before running this task.  Moreover, the trivial option can synthesize at
-# most $n$ images per label, where $n$ is the total number of images in the 
+# most n images per label, where n is the total number of images in the 
 # input CAPS.
 # ```
 # ### Running the task
@@ -85,12 +85,12 @@
 
 # In order to train a network, meta data must be organized in a file system
 # generated by `clinicadl tsvtools`. For more information on the following
-# commands, please refer to the section ["Define your
-# population"](./label_extraction.ipynb).
+# commands, please refer to the section [Define your
+# population](./label_extraction.ipynb).
 # %% [markdown]
-# #### Get the labels AD and CN.
-# This command needs a BIDS folder as an argument in order to create the
-# `missing_mods_directory` and the `merged.tsv` file, but if you already 
+# #### Get the labels AD and CN
+# `get-labels` command needs a BIDS folder as an argument in order to create the
+# `missing_mods` directory and the `merged_tsv` file, but if you already 
 # have these, you can give an empty folder as argument and provide the paths 
 # to the required files separately as keyword arguments.
 
@@ -103,7 +103,7 @@
 # Split train and test data
 !clinicadl tsvtools split data/labels.tsv --n_test 0.25 --subset_name test
 # %%
-# Split train and validation data in a 5-fold cross-validation
+# Split train and validation data in a 3-fold cross-validation
 !clinicadl tsvtools kfold data/split/train.tsv --n_splits 3
 # %% [markdown]
 # ## Train a model on synthetic data

From 1a2c9535f9717b702d79d150329e3e0a6c3da006 Mon Sep 17 00:00:00 2001
From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com>
Date: Tue, 19 Mar 2024 14:49:34 +0100
Subject: [PATCH 13/24] remove outdated comment (output dir is now an argument)

---
 src/generate.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/generate.py b/src/generate.py
index de25b10..20706b9 100644
--- a/src/generate.py
+++ b/src/generate.py
@@ -94,8 +94,6 @@
 # have these, you can give an empty folder as argument and provide the paths 
 # to the required files separately as keyword arguments.
 
-# Be careful, the output of the command (`labels.tsv`) is saved in the same
-# folder as the BIDS folder.
 # %%
 !mkdir data/fake_bids
 !clinicadl tsvtools get-labels data/fake_bids data --missing_mods data/synthetic/missing_mods --merged_tsv data/synthetic/data.tsv --modality synthetic

From e6aadd50f53aea364404c67cff10a6fac558f371 Mon Sep 17 00:00:00 2001
From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com>
Date: Tue, 19 Mar 2024 14:52:29 +0100
Subject: [PATCH 14/24] clarification on caps

---
 src/generate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/generate.py b/src/generate.py
index 20706b9..6f09144 100644
--- a/src/generate.py
+++ b/src/generate.py
@@ -67,7 +67,7 @@
 # ```
 # where:
 
-# - `caps_directory` is the output folder containing the results in a
+# - `caps_directory` is the output folder containing the results of `clinica run` in a
 # [CAPS](https://aramislab.paris.inria.fr/clinica/docs/public/latest/CAPS/Introduction/) hierarchy,
 # - `output_directory` is the folder where the synthetic CAPS is stored,
 # - `n_subjects` is the number of subjects per label in the synthetic dataset.

From b92cab3b9a54414099e1f2cb12badaceb2ca5d19 Mon Sep 17 00:00:00 2001
From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com>
Date: Tue, 19 Mar 2024 14:57:03 +0100
Subject: [PATCH 15/24] remove mention of prepare-data (not used yet)

---
 src/generate.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/generate.py b/src/generate.py
index 6f09144..fd30b76 100644
--- a/src/generate.py
+++ b/src/generate.py
@@ -55,10 +55,9 @@
 # <img src="../images/generate_trivial.png" alt="generate trivial" style="height: 350px; margin: 10px; text-align: center;">
 
 # ```{warning}
-# You need to execute the `clinica run` and `clinicadl prepare-data` pipelines
-# before running this task.  Moreover, the trivial option can synthesize at
-# most n images per label, where n is the total number of images in the 
-# input CAPS.
+# You need to execute the `clinica run` pipeline before running this task.  
+# Moreover, the trivial option can synthesize at most n images per label, 
+# where n is the total number of images in the input CAPS.
 # ```
 # ### Running the task
 #

From 42729134d7f08272dc4594239d52f4ef01b5e6f8 Mon Sep 17 00:00:00 2001
From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com>
Date: Tue, 19 Mar 2024 17:10:35 +0100
Subject: [PATCH 16/24] clarify training section

---
 src/generate.py | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/src/generate.py b/src/generate.py
index fd30b76..515d924 100644
--- a/src/generate.py
+++ b/src/generate.py
@@ -105,26 +105,34 @@
 # %% [markdown]
 # ## Train a model on synthetic data
 
-# Once data was generated and split it is possible to train a model using
+# Once data was generated and split, it is possible to train a model using
 # `clinicadl train` and evaluate its performance with `clinicadl interpret`. For
 # more information on the following command lines please read the sections
 # [Classification with a CNN on 2D slice](./training_classification.ipynb) and
 # [Regression with 3D images](./training_regression.ipynb).
 #
-# The following command uses a pre-build architecture of ClinicaDL `Conv4_FC3`.
+# The following `clinicadl train` command uses a pre-build architecture of ClinicaDL `Conv4_FC3`.
 # You can also implement your own models by following the instructions of [this
 # section](./training_custom.ipynb).
 #
-# If you failed to generate a trivial dataset, please uncomment the next cell.
-# %%
-# !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data/synthetic.tar.gz -o synthetic.tar.gz
-# !tar xf synthetic.tar.gz
-# %%
-# Prepare data (extraction of image tensors)
+# First, we need to run `prepare-data` to extract the tensors from the images:
+# %% 
 !clinicadl prepare-data image data/synthetic t1-linear --extract_json extract_T1linear_image
+# %% [markdown]
+# Then, we will train the network with the synthetic data. If you failed to generate a trivial dataset, 
+# please uncomment the next cell.
+# %%
+# # !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data/synthetic.tar.gz -o synthetic.tar.gz
+# # !mkdir data
+# # !tar xf synthetic.tar.gz -C data
+# # !mkdir data/fake_bids
+# # !clinicadl tsvtools get-labels data/fake_bids data --missing_mods data/synthetic/missing_mods --merged_tsv data/synthetic/data.tsv --modality synthetic
+# # !clinicadl tsvtools split data/labels.tsv --n_test 0.25 --subset_name test
+# # !clinicadl tsvtools kfold data/split/train.tsv --n_splits 3
+# # no need to run prepare-data
 # %%
-# Train a network with synthetic data
-!clinicadl train classification data/synthetic extract_T1linear_image data/split/3_fold data/synthetic_maps --architecture Conv4_FC3 --n_splits 3 --split 0 
+# Train a network with synthetic data (remove --no-gpu option if you do have access to a gpu)
+!clinicadl train classification data/synthetic extract_T1linear_image data/split/3_fold data/synthetic_maps --architecture Conv4_FC3 --n_splits 3 --split 0 --no-gpu
 # %% [markdown]
 # As the number of images is very small (4 per class), we do not rely on the
 # accuracy to select the model. Instead we evaluate the model which obtained the

From bdf56da74bb269e0bbf3c6bfb717e9f415e3fbe7 Mon Sep 17 00:00:00 2001
From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com>
Date: Tue, 19 Mar 2024 17:33:20 +0100
Subject: [PATCH 17/24] reduce size of an image

---
 src/generate.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/generate.py b/src/generate.py
index 515d924..6d1d26a 100644
--- a/src/generate.py
+++ b/src/generate.py
@@ -165,18 +165,19 @@
 # <img src="../images/generate_random.png" alt="generate random" style="height: 350px; margin: 10px; text-align: center;">
 
 # ```{warning}
-# You need to execute the `clinica run` and `clinicadl prepare-data` pipelines
-# prior to running this task.  Moreover, the random option can synthesize as
+# You need to execute the `clinica run` pipeline prior to running this task.  
+# Moreover, the random option can synthesize as
 # many images as wanted with only one input image.
 # ```
-# %% [markdown]
-# ###Running the task
+# ### Running the task
+#
 # ```bash
 # clinicadl generate random <caps_directory> <generated_caps_directory> 
 # ```
 # where:
 
-# - `caps_directory` is the output folder containing the results in a [CAPS](http://www.clinica.run/doc/CAPS/) hierarchy.
+# - `caps_directory` is the output folder containing the results of `clinica run` in a
+# [CAPS](https://aramislab.paris.inria.fr/clinica/docs/public/latest/CAPS/Introduction/) hierarchy,
 # - `generated_caps_directory` is the folder where the synthetic CAPS is stored.
 
 
@@ -201,7 +202,7 @@
 # - **subtype 1**: Top region has its maximum size but Bottom is atrophied, 
 # - **subtype 2**: Bottom region has its maximum size but Top is atrophied.
 
-# <img src="../images/generate_shepplogan.png" alt="generate shepplogan" style="height: 350px; margin: 10px; text-align: center;">
+# <img src="../images/generate_shepplogan.png" alt="generate shepplogan" style="height: 250px; margin: 5px; text-align: center;">
 
 # These three subtypes are spread between two labels which mimic the binary
 # classification between Alzheimer's disease patients (AD) with heterogeneous

From 88447b35a67663592d4df969251762fcc4b8ad8c Mon Sep 17 00:00:00 2001
From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com>
Date: Mon, 25 Mar 2024 14:38:33 +0100
Subject: [PATCH 18/24] typos and layout

---
 src/training_classification.py | 80 ++++++++++++++++------------------
 1 file changed, 38 insertions(+), 42 deletions(-)

diff --git a/src/training_classification.py b/src/training_classification.py
index 959366a..30a5d99 100644
--- a/src/training_classification.py
+++ b/src/training_classification.py
@@ -17,9 +17,9 @@
 # !pip install clinicadl==1.3.0
 
 # %% [markdown]
-# # Classification with a CNN on 2D slice.
+# # Classification with a CNN on 2D slice
 #
-# The objective of the *classification* task is to attribute a class to input
+# The objective of the `classification` task is to attribute a class to input
 # images. A CNN takes as input an image and outputs a vector of size `C`,
 # corresponding to the number of different labels existing in the dataset.  More
 # precisely, this vector contains a value for each class that is often
@@ -28,7 +28,7 @@
 # given image corresponds to the class with the highest probability in the
 # output vector.
 #
-# The cross-entropy loss between the ground truth and the network output is used
+# The `cross-entropy` loss between the ground truth and the network output is used
 # to quantify the error made by the network during the training process, which
 # becomes null if the network outputs 100% probability for the true class.
 #
@@ -37,7 +37,7 @@
 # as pooling, batch normalization, dropout and fully-connected layers are also
 # used.  The default CNN used for classification in ClinicaDL is `Conv5_FC3`
 # which is a convolutional neural network with 5 convolution and 3
-# fully-connected layer but in this notebook we will use the `resnet18`: 
+# fully-connected layer, but in this notebook we will use the `resnet18`: 
 
 # <figure>
 #   <img src="../images/resnet18.png" alt="resnet18 architecture" style="height: 300px; margin: 10px; text-align: center;">
@@ -61,19 +61,17 @@
 # for each patch.
 
 # %% [markdown]
-# Here, as you will use slice-level, you simply need to type the following
-# command line:
+# You need to run the following command line:
 
 # ```bash
 # clinicadl prepare-data {image/patch/roi/slice} <caps_directory> <modality>
 # ```
 # where:
 
-# - `caps_directory` is the folder containing the results of the [`t1-linear`
-# pipeline](#preprocessing:t1-linear) and the output of the present command,
-# both in a CAPS hierarchy.
+# - `caps_directory` is the folder in a CAPS hierarchy containing the images 
+# corresponding to the `modality` asked, 
 # - `modality` is the name of the preprocessing performed on the original
-# images. It can be `t1-linear` or `pet-linear`. You can choose custom if you
+# images (e.g. `t1-linear`). You can choose custom if you
 # want to get a tensor from a custom filename.
 #
 # When using patch or slice extraction, default values were set according to
@@ -87,29 +85,29 @@
 # deeplearning_prepare_data
 # ├── image_based
 # │   └── t1_linear
-# │       └── sub-<participant_label>_ses-<session_label>_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.pt
+# │       └── sub-<participant_label>_ses-<session_label>_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.pt
 # ├── slice_based
 # │   └── t1_linear
-# │       ├── sub-<participant_label>_ses-<session_label>_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_axis-axi_channel-rgb_slice-0_T1w.pt
-# │       ├── sub-<participant_label>_ses-<session_label>_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_axis-axi_channel-rgb_slice-1_T1w.pt
+# │       ├── sub-<participant_label>_ses-<session_label>_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_axis-axi_channel-rgb_slice-0_T1w.pt
+# │       ├── sub-<participant_label>_ses-<session_label>_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_axis-axi_channel-rgb_slice-1_T1w.pt
 # │       ├── ...
-# │       └── sub-<participant_label>_ses-<session_label>_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_axis-axi_channel-rgb_slice-N_T1w.pt
+# │       └── sub-<participant_label>_ses-<session_label>_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_axis-axi_channel-rgb_slice-N_T1w.pt
 # ├── patch_based
 # │   └── pet-linear
-# │       ├── sub-<participant_label>_ses-<session_label>_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_axis-axi_channel-rgb_patch-0_T1w.pt
-# │       ├── sub-<participant_label>_ses-<session_label>_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_axis-axi_channel-rgb_patch-1_T1w.pt
+# │       ├── sub-<participant_label>_ses-<session_label>_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_axis-axi_channel-rgb_patch-0_T1w.pt
+# │       ├── sub-<participant_label>_ses-<session_label>_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_axis-axi_channel-rgb_patch-1_T1w.pt
 # │       ├── ...
-# │       └── sub-<participant_label>_ses-<session_label>_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_axis-axi_channel-rgb_patch-N_T1w.pt
+# │       └── sub-<participant_label>_ses-<session_label>_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_axis-axi_channel-rgb_patch-N_T1w.pt
 # └── roi_based
 #     └── t1_linear
-#         └── sub-<participant_label>_ses-<session_label>_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.pt
+#         └── sub-<participant_label>_ses-<session_label>_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.pt
 # ```
 
 # %% [markdown]
 # In short, there is a folder for each feature (**image**, **slice**, **roi** or **patch**)
 # and inside the numbered tensor files with the corresponding feature. 
-# Files are saved with the .pt extension and contains tensors in PyTorch format.
-# A JSON file is also stored in the CAPS hierarchy under the tensor_extraction
+# Files are saved with the **.pt** extension and contains tensors in PyTorch format.
+# A JSON file is also stored in the CAPS hierarchy under the `tensor_extraction`
 # folder:
 
 # ```text
@@ -117,15 +115,15 @@
 # └── tensor_extraction
 #         └── <extract_json>.json
 #```
-# These files are compulsory to run the train command. They provide all the
-# details of the processing performed by the prepare-data command that will be
+# This file is compulsory to run the train command. It provides all the
+# details of the processing performed by the `prepare-data` command that will be
 # necessary when reading the tensors.
 
 # %% [markdown]
 # ```{warning}
-# The default behavior of the pipeline is to only extract images even if another
+# The default behavior of the pipeline is to only extract images, even if another
 # extraction method is specified.  However, all the options will be saved in the
-# preprocessing JSON file and then the extraction is done when data is loaded
+# preprocessing JSON file and then, the extraction is done when data is loaded
 # during the training. If you want to save the extracted method tensors in the
 # CAPS, you have to add the `--save-features` flag.
 # ```
@@ -144,14 +142,14 @@
 # %% [markdown]
 # ## Before starting
 # If you failed to obtain the preprocessing using the `t1-linear` pipeline,
-# please uncomment the next cell. You can extract tensors from this CAPS but
+# please uncomment the next cell. You can extract tensors from this CAPS, but
 # for the training part you will need a bigger dataset.
 # %%
-# !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/CAPS_example_prepared.tar.gz -o oasisCaps.tar.gz
+# !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/CAPS_example.tar.gz -o oasisCaps.tar.gz
 # !tar xf oasisCaps.tar.gz
 
 # %% [markdown]
-# If you have already download the full dataset and converted it to
+# If you have already downloaded the full dataset and converted it to
 # CAPS, you can give the path to the dataset directory by changing
 # the CAPS path. If not, just run it as written but the results will 
 # not be relevant.
@@ -180,10 +178,9 @@
 # ```
 
 # If you already know the models implemented in `clinicadl`, you can directly
-# jump to the `train custom` to implement your own custom experiment!
+# jump to [this section](./training_custom.ipynb) to implement your own custom experiment!
 
 # %%
-from pyrsistent import v
 import torch
 
 # Check if a GPU is available
@@ -191,26 +188,25 @@
 
 # %% [markdown]
 
-# ### Data used for training.
+# ### Data used for training
 #
 # Because they are time-costly, the preprocessing steps presented in the
 # beginning of this tutorial were only executed on a subset of OASIS-1, but
 # obviously two participants are insufficient to train a network! To obtain more
 # meaningful results, you should retrieve the whole <a
 # href="https://www.oasis-brains.org/">OASIS-1</a> dataset and run the training
-# based on the labels and splits performed in the previous section.  Of course,
-# you can use another dataset, but then you will have to perform again
-# "./label_extraction.ipynb" the extraction of labels and data splits on this
-# dataset.
+# based on the labels and splits obtained in the [previous section](./label_extraction.ipynb).  
+# Of course, you can use another dataset, on which you will also have to perform
+# labels extraction and data splitting.
 
 # %% [markdown]
-# ## `train CLASSIFICATION` 
+# ## `train classification` 
 
 # This functionality mainly relies on the PyTorch deep learning library
 # [[Paszke et al., 2019](https://papers.nips.cc/paper/9015-pytorch-an-imperative-style-high-performance-deep-learning-library)].
 #
 # Different tasks can be learnt by a network: `classification`, `reconstruction`
-# and `regression`, in this notebook, we focus on the `classification` task. 
+# and `regression`. In this notebook, we focus on the `classification` task. 
 
 
 # %% [markdown]
@@ -223,7 +219,7 @@
 # and used in a transfer learning fashion. Other advantages are the increased
 # number of training samples as many slices can be extracted from a single 3D
 # image, and a lower memory usage compared to using the full MR image as
-# input.This paradigm can be divided into two different frameworks:
+# input. This paradigm can be divided into two different frameworks:
 
 # - **single-CNN**: one CNN is trained on all slice locations.
 # - **multi-CNN**: one CNN is trained per slice location.
@@ -232,7 +228,7 @@
 # framework), however the CNNs may be more accurate as they are specialized for
 # one slice location.
 #
-# During training, the gradients update are done based on the loss computed at
+# During training, gradient updates are done based on the loss computed at
 # the slice level. Final performance metric are computed at the subject level by
 # combining the outputs of the slices of the same subject.
 # %% [markdown]
@@ -260,7 +256,7 @@
 # This will be used to load the correct tensor inputs with the wanted
 # preprocessing.
 # - `TSV_DIRECTORY` (Path) is the input folder of a TSV file tree generated by
-# `clinicadl tsvtool {split|kfold}`.
+# `clinicadl tsvtools {split|kfold}`.
 # In case of multi-cohort training, must be a path to a TSV file.
 # - `OUTPUT_MAPS_DIRECTORY` (Path) is the folder where the results are stored.
 #
@@ -288,7 +284,7 @@
 # soft-voting. It is only taken into account if several images are extracted
 # from the same original 3D image (i.e. `num_networks` > 1). Default: `0`.
 # - `--loss` (str) is the name of the loss used to optimize the classification
-# task.  Must correspond to a Pytorch class. Default: `CrossEntropyLoss`.
+# task.  Must correspond to a PyTorch class. Default: `CrossEntropyLoss`.
 
 # %% [markdown] 
 # ```{note}
@@ -320,14 +316,14 @@
 !clinicadl train classification data_oasis/CAPS_example slice_classification_t1 data_oasis/split/4_fold/ data_oasis/maps_classification_2D_slice_multi --n_splits 4 --architecture resnet18 --multi_network
 
 # %% [markdown]
-# The clinicadl train command outputs a MAPS structure in which there are only
+# The `clinicadl train command outputs` a MAPS structure in which there are only
 # two data groups: train and validation. 
 # A MAPS folder contains all the elements obtained during the training and other
 # post-processing procedures applied to a particular deep learning framework.
 # The hierarchy is organized according to the fold, selection metric and data
 # group used.
 
-# An example of a MAPS structure is given below
+# An example of a MAPS structure is given below:
 #```text
 # <maps_directory>
 # ├── environment.txt

From 40153942f4f2562b504129eb112e835a214c5221 Mon Sep 17 00:00:00 2001
From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com>
Date: Mon, 25 Mar 2024 14:45:53 +0100
Subject: [PATCH 19/24] minor change

---
 src/training_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/training_classification.py b/src/training_classification.py
index 30a5d99..428bf71 100644
--- a/src/training_classification.py
+++ b/src/training_classification.py
@@ -316,7 +316,7 @@
 !clinicadl train classification data_oasis/CAPS_example slice_classification_t1 data_oasis/split/4_fold/ data_oasis/maps_classification_2D_slice_multi --n_splits 4 --architecture resnet18 --multi_network
 
 # %% [markdown]
-# The `clinicadl train command outputs` a MAPS structure in which there are only
+# The `clinicadl train` command outputs a MAPS structure in which there are only
 # two data groups: train and validation. 
 # A MAPS folder contains all the elements obtained during the training and other
 # post-processing procedures applied to a particular deep learning framework.

From 89a1ba1ab74b1cbb7df8f0e29c1936bb204a67e0 Mon Sep 17 00:00:00 2001
From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com>
Date: Tue, 26 Mar 2024 10:18:47 +0100
Subject: [PATCH 20/24] correction on wrong CAPS folder

---
 src/training_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/training_classification.py b/src/training_classification.py
index 428bf71..1338c50 100644
--- a/src/training_classification.py
+++ b/src/training_classification.py
@@ -164,7 +164,7 @@
 # next cell.
 
 # %%
-# !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/CAPS_extracted.tar.gz -o oasisCaps.tar.gz
+# !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/CAPS_example_prepared.tar.gz -o oasisCaps.tar.gz
 # !tar xf oasisCaps.tar.gz
 # %%
 !tree -L 3 data_oasis/CAPS_example/subjects/sub-OASIS10*/ses-M000/deeplearning_prepare_data/

From 729ef093335561546932405c808dc18ca898b722 Mon Sep 17 00:00:00 2001
From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com>
Date: Tue, 26 Mar 2024 17:24:14 +0100
Subject: [PATCH 21/24] add new training dataset

---
 src/training_classification.py | 86 +++++++++++++++++-----------------
 1 file changed, 42 insertions(+), 44 deletions(-)

diff --git a/src/training_classification.py b/src/training_classification.py
index 1338c50..56e99ce 100644
--- a/src/training_classification.py
+++ b/src/training_classification.py
@@ -142,17 +142,11 @@
 # %% [markdown]
 # ## Before starting
 # If you failed to obtain the preprocessing using the `t1-linear` pipeline,
-# please uncomment the next cell. You can extract tensors from this CAPS, but
-# for the training part you will need a bigger dataset.
+# please uncomment the next cell.
 # %%
 # !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/CAPS_example.tar.gz -o oasisCaps.tar.gz
 # !tar xf oasisCaps.tar.gz
 
-# %% [markdown]
-# If you have already downloaded the full dataset and converted it to
-# CAPS, you can give the path to the dataset directory by changing
-# the CAPS path. If not, just run it as written but the results will 
-# not be relevant.
 # %% [markdown]
 # To perform the feature extraction for our dataset, run the following cell:     
 # %%
@@ -198,6 +192,17 @@
 # based on the labels and splits obtained in the [previous section](./label_extraction.ipynb).  
 # Of course, you can use another dataset, on which you will also have to perform
 # labels extraction and data splitting.
+#
+# The purpose of this notebook is not to fully train a network, but rather to understand
+# how ClinicaDL works. Therefore, we will keep working with a subset of OASIS-1. This new
+# subset contains 10 T1w images, pre-processed with the pipeline `t1-linear`of Clinia. The
+# `prepare-data` pipeline has already been performed on the dataset.
+#
+# You can remove your old `data_oasis` folder and download the new one:
+
+# %%
+# !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/CAPS_example_train.tar.gz -o oasisCaps.tar.gz
+# !tar xf oasisCaps.tar.gz
 
 # %% [markdown]
 # ## `train classification` 
@@ -234,9 +239,9 @@
 # %% [markdown]
 # ### Prerequisites
 #
-# You need to execute `clinicadl tsvtools get-labels` and `clinicadl tsvtools
-# {split|kfold}` commands prior to running this task to have the correct TSV file
-# organization.  Moreover, there should be a CAPS, obtained running the
+# If you use your own dataset, you need to execute `clinicadl tsvtools get-labels` 
+# and `clinicadl tsvtools {split|kfold}` commands prior to running this task to have 
+# the correct TSV file organization. Moreover, there should be a CAPS, obtained running the
 # preprocessing pipeline wanted.
 # %% [markdown]
 # ### Running the task
@@ -252,7 +257,7 @@
 # [CAPS](https://aramislab.paris.inria.fr/clinica/docs/public/latest/CAPS/Introduction/)
 # hierarchy. In case of multi-cohort training, must be a path to a TSV file.
 # - `PREPROCESSING_JSON` (str) is the name of the preprocessing json file stored
-# in the `CAPS_DIRECTORY` that corresponds to the `clinicadl extract` output.
+# in the `CAPS_DIRECTORY` that corresponds to the `clinicadl prepare-data` output.
 # This will be used to load the correct tensor inputs with the wanted
 # preprocessing.
 # - `TSV_DIRECTORY` (Path) is the input folder of a TSV file tree generated by
@@ -299,21 +304,26 @@
 # The default label for the classification task is `diagnosis` but as long as it
 # is a categorical variable, it can be of any type.
 # %% [markdown]
-# The next cell train a `resnet18` to classify 2D slices of t1-linear MRI by
+# The next cells train `resnet18` networks to classify 2D slices of t1-linear MRI by
 # diagnosis (AD or CN). 
-# Please note that the purpose of this notebook is not to fully train a network
-# because we don't have enough data. The objective is to understand how ClinicaDL 
-# works and make inferences using pretrained models in the next section.
+# Please note once again that we don't expect any interesting results with a
+# network trained on only 10 MRI images. That's why we will train the networks for
+# only few epochs.
+#
+# Let's first train a **single-CNN** on all slice locations (actually 4 networks are
+# trained, one for each split):
 
 
 # %% 
 # 2D-slice single-CNN training
-#!clinicadl train classification -h
-!clinicadl train classification data_oasis/CAPS_example slice_classification_t1 data_oasis/split/4_fold/ data_oasis/maps_classification_2D_slice_resnet18 --n_splits 4 --architecture resnet18 
-
+!clinicadl train classification data_oasis/CAPS_example slice_classification_t1 data_oasis/split/4_fold/ data_oasis/maps_classification_2D_slice_resnet18 --n_splits 4 --architecture resnet18 --batch_size 8 --epochs 5
+# %% [markdown]
+# Then, let's train a **multi-CNN** (i.e.one CNN is trained per slice location).
+# 168 models will be trained, so this command may take a while. If you don't want 
+# to run it, the results can be downloaded a few lines further on.
 # %%
 # 2D-slice multi-CNN training
-!clinicadl train classification data_oasis/CAPS_example slice_classification_t1 data_oasis/split/4_fold/ data_oasis/maps_classification_2D_slice_multi --n_splits 4 --architecture resnet18 --multi_network
+!clinicadl train classification data_oasis/CAPS_example slice_classification_t1 data_oasis/split/4_fold/ data_oasis/maps_classification_2D_slice_multi --n_splits 4 --split 0 --architecture resnet18 --batch_size 2 --epochs 1 --multi_network
 
 # %% [markdown]
 # The `clinicadl train` command outputs a MAPS structure in which there are only
@@ -363,12 +373,12 @@
 #```
 
 # You can find more information about MAPS structure on our
-# [documentation](https://clinicadl.readthedocs.io/en/latest/Introduction/#maps-definition)
+# [documentation](https://clinicadl.readthedocs.io/en/latest/Introduction/#maps-definition).
 
 # %% [markdown]
 # # Inference using pretrained models
 #
-# (If you failed to train the model please uncomment the next cell)
+# If you failed to train the model please uncomment the next cells:
 # %%
 !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/maps_classification_2D_slice_multi.tar.gz -o maps_classification_2D_slice_multi.tar.gz
 !tar xf maps_classification_2D_slice_multi.tar.gz
@@ -377,23 +387,12 @@
 !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/maps_classification_2D_slice_resnet.tar.gz -o maps_classification_2D_slice_resnet.tar.gz
 !tar xf maps_classification_2D_slice_resnet.tar.gz
 
-# %% [markdown]
-# If you failed to train the model, you also need to download the TSV files with 
-# the list of participants for each split used for the training because `clinicadl 
-# tsvtools split` and `clinicadl tsvtools kfold` commands randomly split data so 
-# you can have data leakage error (see previous [notebook](notebooks/labels_extraction.ipynb) 
-# for more information about data leakage).
-
-# %% 
-!curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/split.tar.gz -o training_split.tar.gz
-!tar xf training_split.tar.gz
-
 # %% [markdown]
 # The `predict` functionality performs individual prediction and metrics
 # computation on a set of data using models trained with `clinicadl train` or
 # `clinicadl random-search` tasks. 
 # It can also use any pretrained models if they are structured like a
-# [MAPS](https://clinicadl.readthedocs.io/en/latest/Introduction/#maps-definition)
+# [MAPS](https://clinicadl.readthedocs.io/en/latest/Introduction/#maps-definition).
 
 # %% [markdown]
 # ### Running the task 
@@ -403,33 +402,32 @@
 #   clinicadl predict [OPTIONS] INPUT_MAPS_DIRECTORY DATA_GROUP
 #```
 # where:
-# - INPUT_MAPS_DIRECTORY (Path) is the path to the MAPS of the pretrained model.
-# - DATA_GROUP (str) is the name of the data group used for the prediction.
+# - `INPUT_MAPS_DIRECTORY` (Path) is the path to the MAPS of the pretrained model.
+# - `DATA_GROUP` (str) is the name of the data group used for the prediction.
 
 # ```{warning}
 # For ClinicaDL, a data group is linked to a list of participants / sessions and
 # a CAPS directory. When performing a prediction, interpretation or tensor
-# serialization the user must give a data group. If this data group does not
-# exist, the user MUST give a caps_directory and a participants_tsv. If this
-# data group already exists, the user MUST not give any caps_directory or
-# participants_tsv, or set overwrite to True.
+# serialization, the user must give a data group. If this data group does not
+# exist (in the MAPS), the user MUST give a `caps_directory` and a `participants_tsv`. If this
+# data group already exists, the user MUST not give any `caps_directory` or
+# `participants_tsv`, or set overwrite to True.
 # ```
 
 # If you want to add optional argument you can check the
 # [documentation](https://clinicadl.readthedocs.io/en/latest/Predict/).
 
 # %%
-# !clinicadl predict -h
-!clinicadl predict data_oasis/maps_classification_2D_slice_resnet18 'test-Oasis2' --participants_tsv ./data_oasis/split/test_baseline.tsv --caps_directory data_oasis/CAPS_example
+!clinicadl predict data_oasis/maps_classification_2D_slice_resnet18 'test-Oasis' --participants_tsv ./data_oasis/split/test_baseline.tsv --caps_directory data_oasis/CAPS_example
 
 # %%
 !clinicadl predict data_oasis/maps_classification_2D_slice_multi 'test-Oasis' --participants_tsv ./data_oasis/split/test_baseline.tsv --caps_directory data_oasis/CAPS_example
 
 # %% [markdown]
-# Results are stored in the MAPS of path `model_path`, according to the
+# Results are stored in the MAPS, according to the
 # following file system:
 # ```text
-# model_path>
+# <maps_directory>
 #     ├── split-0  
 #     ├── ...  
 #     └── split-<i>
@@ -447,6 +445,6 @@
 # running the next cell:
 # %%
 import pandas as pd
-metrics = pd.read_csv("data_oasis/maps_classification_2D_slice_resnet18/split-0/best-loss/test-Oasis/test-OASIS_slice_level_metrics.tsv", sep="\t")
+metrics = pd.read_csv("data_oasis/maps_classification_2D_slice_resnet18/split-0/best-loss/test-Oasis/test-Oasis_slice_level_metrics.tsv", sep="\t")
 metrics.head()
 # %%

From 0bf0496ca6bfa01293e96e358ed37ebfc772bf6a Mon Sep 17 00:00:00 2001
From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com>
Date: Tue, 26 Mar 2024 18:13:28 +0100
Subject: [PATCH 22/24] minor typo

---
 src/training_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/training_classification.py b/src/training_classification.py
index 56e99ce..bc313ea 100644
--- a/src/training_classification.py
+++ b/src/training_classification.py
@@ -75,7 +75,7 @@
 # want to get a tensor from a custom filename.
 #
 # When using patch or slice extraction, default values were set according to
-# [Wen et al., 2020](https://doi.org/10.1016/j.media.2020.101694)
+# [Wen et al., 2020](https://doi.org/10.1016/j.media.2020.101694).
 
 # %% [markdown]
 # Output files are stored into a new folder (inside the CAPS) and follows a

From 31a8a3301d80372ad9f6edd8bb6feef22df352d5 Mon Sep 17 00:00:00 2001
From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com>
Date: Wed, 27 Mar 2024 09:35:31 +0100
Subject: [PATCH 23/24] typo

Co-authored-by: Gensollen <nicolas.gensollen@gmail.com>
---
 src/training_classification.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/training_classification.py b/src/training_classification.py
index bc313ea..152d166 100644
--- a/src/training_classification.py
+++ b/src/training_classification.py
@@ -195,7 +195,7 @@
 #
 # The purpose of this notebook is not to fully train a network, but rather to understand
 # how ClinicaDL works. Therefore, we will keep working with a subset of OASIS-1. This new
-# subset contains 10 T1w images, pre-processed with the pipeline `t1-linear`of Clinia. The
+# subset contains 10 T1w images, pre-processed with the pipeline `t1-linear`of Clinica. The
 # `prepare-data` pipeline has already been performed on the dataset.
 #
 # You can remove your old `data_oasis` folder and download the new one:

From 6961ade0d12ff3b7173e7ce32301f5a5cb9d05b4 Mon Sep 17 00:00:00 2001
From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com>
Date: Wed, 27 Mar 2024 10:05:03 +0100
Subject: [PATCH 24/24] add some info on multi-CNN

---
 src/training_classification.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/training_classification.py b/src/training_classification.py
index bc313ea..1f689e7 100644
--- a/src/training_classification.py
+++ b/src/training_classification.py
@@ -318,9 +318,10 @@
 # 2D-slice single-CNN training
 !clinicadl train classification data_oasis/CAPS_example slice_classification_t1 data_oasis/split/4_fold/ data_oasis/maps_classification_2D_slice_resnet18 --n_splits 4 --architecture resnet18 --batch_size 8 --epochs 5
 # %% [markdown]
-# Then, let's train a **multi-CNN** (i.e.one CNN is trained per slice location).
-# 168 models will be trained, so this command may take a while. If you don't want 
-# to run it, the results can be downloaded a few lines further on.
+# Then, let's train a **multi-CNN** (i.e. one CNN is trained per slice location).
+# We will train the models only for the first split, but still there are 168 models, 
+# so this command may take a while. If you don't want to run it, the results can 
+# be downloaded a few lines further on.
 # %%
 # 2D-slice multi-CNN training
 !clinicadl train classification data_oasis/CAPS_example slice_classification_t1 data_oasis/split/4_fold/ data_oasis/maps_classification_2D_slice_multi --n_splits 4 --split 0 --architecture resnet18 --batch_size 2 --epochs 1 --multi_network
@@ -378,11 +379,13 @@
 # %% [markdown]
 # # Inference using pretrained models
 #
-# If you failed to train the model please uncomment the next cells:
+# If you failed to train the model please uncomment the next cell:
 # %%
 !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/maps_classification_2D_slice_multi.tar.gz -o maps_classification_2D_slice_multi.tar.gz
 !tar xf maps_classification_2D_slice_multi.tar.gz
-
+# %% [markdown]
+# For the multi-CNN, to reduce download time, you can only access
+# the results of the models trained on the first 5 slices:
 # %%
 !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/maps_classification_2D_slice_resnet.tar.gz -o maps_classification_2D_slice_resnet.tar.gz
 !tar xf maps_classification_2D_slice_resnet.tar.gz