From 5e658b82d8b17375a65080da0d07b70e09e627a3 Mon Sep 17 00:00:00 2001 From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com> Date: Fri, 15 Mar 2024 18:06:45 +0100 Subject: [PATCH 01/24] typos and broken links --- src/preprocessing.py | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/src/preprocessing.py b/src/preprocessing.py index e5f9ed8..30a0bfa 100644 --- a/src/preprocessing.py +++ b/src/preprocessing.py @@ -25,7 +25,6 @@ # 1. **Organize** your neuroimaging data. # 2. **Preprocess** your neuroimaging data. # 3. Check the preprocessing **quality**. -# 4. **Prepare data** by extracting tensors from your preprocessed data. # %% [markdown] # ## Organization of neuroimaging data: the Brain Imaging Data Structure (BIDS) @@ -128,7 +127,6 @@ # and especially before training a neural network with that data. # - **Registration** helps to standardize the neuroimaging data so that it is # consistent across different subjects, scanners, and imaging modalities. This - # makes it easier for the deep neural network to learn patterns and make # accurate predictions. # - Preprocessing techniques such as **motion correction** and **noise @@ -171,17 +169,17 @@ # %% [markdown] # This notebook presents three possible preprocessing steps using the [Clinica](https://www.clinica.run/doc/) # software: -# - `t1-linear`: Affine registration of T1w images to the MNI standard space -# - `t1-volume`: Volume-based processing of T1-weighted MR images with SPM +# - `t1-linear`: Affine registration of T1w images to the MNI standard space, +# - `t1-volume`: Volume-based processing of T1w images with SPM, # - `pet-linear`: Spatial normalization to the MNI space and intensity -# normalization of PET images +# normalization of PET images. # %% [markdown] # # ## Image preprocessing with the `t1-linear` pipeline # For this tutorial, we propose a "minimal preprocessing" (as described in [(Wen # et al., 2020)](https://doi.org/10.1016/j.media.2020.101694)) implemented in -# the [`t1-linear` pipeline](http://www.clinica.run/doc/Pipelines/T1_Linear/) +# the [`t1-linear` pipeline](https://aramislab.paris.inria.fr/clinica/docs/public/latest/Pipelines/T1_Linear/) # using the [ANTs](http://stnava.github.io/ANTs/) software package [(Avants et # al., 2014)](https://doi.org/10.3389/fninf.2014.00044). This preprocessing # includes: @@ -212,9 +210,9 @@ # where: # - `bids_directory` is the input folder containing the dataset in a -# [BIDS](http://www.clinica.run/doc/BIDS/) hierarchy, +# [BIDS](https://aramislab.paris.inria.fr/clinica/docs/public/latest/BIDS/) hierarchy, # - `caps_directory` is the output folder containing the results in a -# [CAPS](http://www.clinica.run/doc/CAPS/) hierarchy. +# [CAPS](https://aramislab.paris.inria.fr/clinica/docs/public/latest/CAPS/Introduction/) hierarchy. # %% [markdown] # ```{note} @@ -236,7 +234,7 @@ # `_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz`. # %% [markdown] # (If you failed to obtain the preprocessing using the `t1-linear` pipeline, -# please uncomment the next cell) +# please uncomment the next cell). # %% # # !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/CAPS_example.tar.gz -o CAPS_example.tar.gz # # !tar xf CAPS_example.tar.gz @@ -298,9 +296,9 @@ # where: # - `bids_directory` is the input folder containing the dataset in a -# [BIDS](http://www.clinica.run/doc/BIDS/) hierarchy; +# [BIDS](https://aramislab.paris.inria.fr/clinica/docs/public/latest/BIDS/) hierarchy; # - `caps_director` is the output folder containing the results in a -# [CAPS](http://www.clinica.run/doc/CAPS/) hierarchy; +# [CAPS](https://aramislab.paris.inria.fr/clinica/docs/public/latest/CAPS/Introduction/) hierarchy; # - `acq_label` is the label given to the PET acquisition, specifying the tracer # used (trc-). It can be for instance '18FFDG' for # 18F-fluorodeoxyglucose or '18FAV45' for 18F-florbetapir; @@ -323,14 +321,8 @@ # ``` # %% [markdown] # ### Run the pipeline -# Start by downloading a dataset of PET images for 4 subjects from ADNI -# database. The dataset was converted to the BIDS specification using `clinica -# convert adni-to-bids`. - -# %%[markdown] # Please uncomment the next cells to download a dataset of pet images of 4 -# subjects from ADNI in a BIDS format (convert to BIDS with `clinica convert -# adni-to-bids`) +# subjects from ADNI in a BIDS format (convert to BIDS with `clinica convert adni-to-bids`). # %% !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_adni/BIDS_example.tar.gz -o adniBids.tar.gz @@ -403,11 +395,11 @@ # - `preprocessing` corresponds to the preprocessing pipeline whose outputs will # be checked (`t1-linear` or `pet-linear` or `t1-volume`), # - `caps_directory` is the folder containing the results of the preprocessing -# pipeline in a [CAPS](http://www.clinica.run/doc/CAPS/Introduction/) hierarchy, +# pipeline in a [CAPS](https://aramislab.paris.inria.fr/clinica/docs/public/latest/CAPS/Introduction/) hierarchy, # - `output_path` is the path to the output TSV file (or directory for # `t1-volume`) containing QC results. -## +# %% [markdown] # ```{note} # Quality checks pipelines are all different and depend on the chosen # preprocessing. They should not be applied to other preprocessing procedures as From f0e9f8586ad0a0ffb9d1eb668ccf71e88f6c5b78 Mon Sep 17 00:00:00 2001 From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com> Date: Mon, 18 Mar 2024 13:25:48 +0100 Subject: [PATCH 02/24] more typos --- src/label_extraction.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/label_extraction.py b/src/label_extraction.py index 24a0043..a39f2fb 100644 --- a/src/label_extraction.py +++ b/src/label_extraction.py @@ -124,7 +124,7 @@ # of the sessions, for now. # # The whole preprocessing process has been run for you on these datasets. The -# results of the [quality check procedure](./preprocessing.ipynb#quality-check-of-your-preprocessed-data) have been used +# results of the [quality check procedure](./preprocessing.html#quality-check-of-your-preprocessed-data) have been used # to filter sessions. `data_oasis/oasis_after_qc.tsv` and `data_adni/adni_after_qc.tsv` # store the list of the sessions that have been accepted for each dataset. # @@ -136,13 +136,13 @@ #for OASIS-1 dataset !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/iotools_output.tar.gz -o iotools_output.tar.gz !tar xf iotools_output.tar.gz -!curl https://raw.githubusercontent.com/aramis-lab/clinicadl_handbook/main/data/oasis_after_qc.tsv -O data_oasis/oasis_after_qc.tsv +!curl https://raw.githubusercontent.com/aramis-lab/clinicadl_handbook/main/data/oasis_after_qc.tsv --output data_oasis/oasis_after_qc.tsv # %% #for the ADNI dataset !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_adni/iotools_output.tar.gz -o iotools_output.tar.gz !tar xf iotools_output.tar.gz -!curl https://raw.githubusercontent.com/aramis-lab/clinicadl_handbook/main/data/adni_after_qc.tsv -O data_adni/adni_after_qc.tsv +!curl https://raw.githubusercontent.com/aramis-lab/clinicadl_handbook/main/data/adni_after_qc.tsv --output data_adni/adni_after_qc.tsv # %% [markdown] # ### Get the labels @@ -151,12 +151,12 @@ # MCI) can be extracted with ClinicaDL using the command: # # ```bash -# clinicadl tsvtools get-labels bids_directory results_tsv +# clinicadl tsvtools get-labels # ``` # where: # - `bids_directory` the input folder containing the dataset in a BIDS # hierarchy. -# - `results_path` is the path to the tsv file. +# - `results_tsv` is the path to the tsv file. # ```{tip} # You can increase the verbosity of the command by adding -v flag(s). From 5d8bc0b49bb541435e521651a61f3d673229f2d8 Mon Sep 17 00:00:00 2001 From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com> Date: Mon, 18 Mar 2024 15:00:37 +0100 Subject: [PATCH 03/24] more typos and layout issues --- src/label_extraction.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/label_extraction.py b/src/label_extraction.py index a39f2fb..585c956 100644 --- a/src/label_extraction.py +++ b/src/label_extraction.py @@ -318,8 +318,8 @@ def display_table(table_path): # clinicadl tsvtools get-progression [OPTIONS] DATA_TSV # ``` # with : -# - `DATA_TSV` (str) is the TSV file containing the data (output of clinicadl -# tsvtools get-labels|split|kfold). +# - `DATA_TSV` (str) is the TSV file containing the data (output of `clinicadl +# tsvtools get-labels|split|kfold`). # - `--time_horizon` (int) can be added: It is the time horizon in months that # is used to assess the stability of the MCI subjects. Default value: 36. @@ -376,7 +376,7 @@ def display_table(table_path): # ``` # where: # - `data_tsv` is the TSV file with the data that are going to be split -# (output of `clinicadl tsvtools getlabels|split|kfold`). +# (output of `clinicadl tsvtools get-labels|split|kfold`). # # Each diagnosis label is split independently. Random splits are generated # until the differences between age and sex distributions between the test From bcc244fe43ea79e39f058a311437f688505012a2 Mon Sep 17 00:00:00 2001 From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com> Date: Mon, 18 Mar 2024 15:01:00 +0100 Subject: [PATCH 04/24] reduce the size of the test set --- src/label_extraction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/label_extraction.py b/src/label_extraction.py index 585c956..a4a8c23 100644 --- a/src/label_extraction.py +++ b/src/label_extraction.py @@ -392,7 +392,7 @@ def display_table(table_path): # Let's create a test set including 20 subjects: # %% -!clinicadl tsvtools split data_oasis/labels.tsv --n_test 20 --subset_name test +!clinicadl tsvtools split data_oasis/labels.tsv --n_test 0.2 --subset_name test # %% # for Adni dataset From ff06f79b362daf888246abe3235d914ef65e6892 Mon Sep 17 00:00:00 2001 From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com> Date: Mon, 18 Mar 2024 15:14:31 +0100 Subject: [PATCH 05/24] typos --- src/label_extraction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/label_extraction.py b/src/label_extraction.py index a4a8c23..dc34c7c 100644 --- a/src/label_extraction.py +++ b/src/label_extraction.py @@ -437,7 +437,7 @@ def display_table(table_path): # clinicadl tsvtool kfold # ``` # -# where `formatted_data_path` is the output tsv file of `clinicadl tsvtool getlabels|split|kfold`. +# where `formatted_data_path` is the output tsv file of `clinicadl tsvtool get-labels|split|kfold`. # In a similar way as for the test split, three tsv files are written # **per split** for each set: From b03a884845ceb7f19ae17d0dfaadd714ee8dcd3b Mon Sep 17 00:00:00 2001 From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com> Date: Mon, 18 Mar 2024 15:17:13 +0100 Subject: [PATCH 06/24] correction on the number of splits for kfold cross validation --- src/label_extraction.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/label_extraction.py b/src/label_extraction.py index dc34c7c..78dead6 100644 --- a/src/label_extraction.py +++ b/src/label_extraction.py @@ -451,11 +451,11 @@ def display_table(table_path): # across the results of the 5 folds already reduces bias compared to a single # data split. # %% -!clinicadl tsvtools kfold data_oasis/split/train.tsv --n_splits 4 --subset_name validation +!clinicadl tsvtools kfold data_oasis/split/train.tsv --n_splits 5 --subset_name validation # %% # for ADNI dataset -!clinicadl tsvtools kfold data_adni/split/train.tsv --n_splits 4 --subset_name validation +!clinicadl tsvtools kfold data_adni/split/train.tsv --n_splits 5 --subset_name validation # %% [markdown] # ### Check the absence of data leakage # From 8d5353414d1286acea99ae8c92b523a589efb889 Mon Sep 17 00:00:00 2001 From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com> Date: Mon, 18 Mar 2024 15:27:08 +0100 Subject: [PATCH 07/24] correction of bug in data leakage check --- src/label_extraction.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/label_extraction.py b/src/label_extraction.py index 78dead6..e478871 100644 --- a/src/label_extraction.py +++ b/src/label_extraction.py @@ -520,7 +520,7 @@ def check_is_independent(train_path_baseline: Path, test_path_baseline: Path): def run_test_suite(data_tsv: Path, n_splits: int): - _run_test_suite_no_split(data_tsv) if n_splits == 0 else _run_test_suite_multiple_splits(data_tsv) + _run_test_suite_no_split(data_tsv) if n_splits == 0 else _run_test_suite_multiple_splits(data_tsv, n_splits) def _run_test_suite_no_split(data_tsv: Path): @@ -535,7 +535,7 @@ def _run_test_suite_no_split(data_tsv: Path): check_is_independent(train_baseline_tsv, test_baseline_tsv) -def _run_test_suite_multiple_splits(data_tsv: Path): +def _run_test_suite_multiple_splits(data_tsv: Path, n_splits: int): for _ in range(n_splits): for folder, _, files in os.walk(data_tsv): folder = Path(folder) @@ -554,7 +554,7 @@ def _run_test_suite_multiple_splits(data_tsv: Path): run_test_suite(Path("./data_oasis/split"), n_splits=0) # Run check for train / validation splits -run_test_suite(Path("./data_oasis/split/4_fold"), n_splits=4) +run_test_suite(Path("./data_oasis/split/5_fold"), n_splits=5) # %% [markdown] # If no Error was raised then none of the three conditions was broken. It is now # possible to use the train and the validation sets to perform a classification From 2d2e85043ef89045f5e343e0595a9d34fbc3e390 Mon Sep 17 00:00:00 2001 From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com> Date: Mon, 18 Mar 2024 15:29:11 +0100 Subject: [PATCH 08/24] typos --- src/label_extraction.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/label_extraction.py b/src/label_extraction.py index e478871..a44e7e2 100644 --- a/src/label_extraction.py +++ b/src/label_extraction.py @@ -556,7 +556,7 @@ def _run_test_suite_multiple_splits(data_tsv: Path, n_splits: int): # Run check for train / validation splits run_test_suite(Path("./data_oasis/split/5_fold"), n_splits=5) # %% [markdown] -# If no Error was raised then none of the three conditions was broken. It is now +# If no Error was raised, then none of the three conditions was broken. It is now # possible to use the train and the validation sets to perform a classification # task, and then to evaluate correctly the performance of the classifier on the # test set. @@ -572,6 +572,6 @@ def _run_test_suite_multiple_splits(data_tsv: Path, n_splits: int): # # %% [markdown] -# Now that you have your train, test and validation split, you can train a +# Now that you have your train, test and validation splits, you can train a # network for classification, regression or reconstruction with clinicaDL. # %% From 26f84cbb79a4f01cb99503e073baa5c5c99e877b Mon Sep 17 00:00:00 2001 From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com> Date: Tue, 19 Mar 2024 09:41:55 +0100 Subject: [PATCH 09/24] typos --- src/generate.py | 20 ++++++++++---------- src/label_extraction.py | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/generate.py b/src/generate.py index 3fcfe84..652f9f3 100644 --- a/src/generate.py +++ b/src/generate.py @@ -20,22 +20,22 @@ # # Generate a synthetic dataset # -# When looking for new networks architecture to improve the performance of the -# deep learning tasks implies to tests different sets of hyperparameters. This -# takes a lot of time and frequently we finish with networks that don't +# Looking for new network architectures to improve performance on a +# deep learning task implies testing different sets of hyperparameters. This +# takes a lot of time and we often end up with networks that don't # converge. To avoid this pitfall, it is often advised to simplify the problem: -# focus on a subset of data / classification task that is more tractable than +# focus on a subset of data or a task that is more tractable than # the one that is currently explored. This is the purpose of `clinicadl # generate` which creates synthetic, tractable data from real data to # check that developed networks are working on this simple case before going # further. # -# With Clinicadl, you can generate three types of synthetic data sets for a -# binary classification depending on the option chosen: trivial, random or -# shepplogan. +# With ClinicaDL, you can generate three types of synthetic data sets for a +# binary classification task depending on the option chosen: `trivial`, `random` or +# `shepplogan`. # # If you ran the previous notebook, you must have a folder called -# `CAPS_example` in the data_oasis directory (otherwise uncomment the next cell +# `CAPS_example` in the `data_oasis` directory (otherwise uncomment the next cell # to download a local version of the necessary folders). # %% # !curl -k https://aramislab.paris.inria.fr/clinicadl/files/data/handbook_2023/data_oasis/CAPS_example.tar.gz -o oasisCaps.tar.gz @@ -68,8 +68,8 @@ # where: # - `caps_directory` is the output folder containing the results in a -# [CAPS](http://www.clinica.run/doc/CAPS/) hierarchy. -# - `output_directory` is the folder where the synthetic CAPS is stored. +# [CAPS](https://aramislab.paris.inria.fr/clinica/docs/public/latest/CAPS/Introduction/) hierarchy, +# - `output_directory` is the folder where the synthetic CAPS is stored, # - `n_subjects` is the number of subjects per label in the synthetic dataset. # Default value: 300. diff --git a/src/label_extraction.py b/src/label_extraction.py index a44e7e2..083b948 100644 --- a/src/label_extraction.py +++ b/src/label_extraction.py @@ -573,5 +573,5 @@ def _run_test_suite_multiple_splits(data_tsv: Path, n_splits: int): # %% [markdown] # Now that you have your train, test and validation splits, you can train a -# network for classification, regression or reconstruction with clinicaDL. +# network for classification, regression or reconstruction with ClinicaDL. # %% From b25135062d3ab707a68a3293cb19562001b31fea Mon Sep 17 00:00:00 2001 From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com> Date: Tue, 19 Mar 2024 09:46:53 +0100 Subject: [PATCH 10/24] remove superfluous line in data leakage check --- src/label_extraction.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/label_extraction.py b/src/label_extraction.py index 083b948..e07a010 100644 --- a/src/label_extraction.py +++ b/src/label_extraction.py @@ -390,7 +390,7 @@ def display_table(table_path): # In OASIS there is no longitudinal follow-up, hence the last two TSV files are # identical. -# Let's create a test set including 20 subjects: +# Let's create a test set including 20% of the subjects: # %% !clinicadl tsvtools split data_oasis/labels.tsv --n_test 0.2 --subset_name test @@ -520,7 +520,7 @@ def check_is_independent(train_path_baseline: Path, test_path_baseline: Path): def run_test_suite(data_tsv: Path, n_splits: int): - _run_test_suite_no_split(data_tsv) if n_splits == 0 else _run_test_suite_multiple_splits(data_tsv, n_splits) + _run_test_suite_no_split(data_tsv) if n_splits == 0 else _run_test_suite_multiple_splits(data_tsv) def _run_test_suite_no_split(data_tsv: Path): @@ -535,18 +535,17 @@ def _run_test_suite_no_split(data_tsv: Path): check_is_independent(train_baseline_tsv, test_baseline_tsv) -def _run_test_suite_multiple_splits(data_tsv: Path, n_splits: int): - for _ in range(n_splits): - for folder, _, files in os.walk(data_tsv): - folder = Path(folder) - for file in files: - if file[-3:] == "tsv": - check_is_subject_unique(folder / file) - train_baseline_tsv = folder / "train_baseline.tsv" - test_baseline_tsv = folder / "validation_baseline.tsv" - if train_baseline_tsv.exists(): - if test_baseline_tsv.exists(): - check_is_independent(train_baseline_tsv, test_baseline_tsv) +def _run_test_suite_multiple_splits(data_tsv: Path): + for folder, _, files in os.walk(data_tsv): + folder = Path(folder) + for file in files: + if file[-3:] == "tsv": + check_is_subject_unique(folder / file) + train_baseline_tsv = folder / "train_baseline.tsv" + test_baseline_tsv = folder / "validation_baseline.tsv" + if train_baseline_tsv.exists(): + if test_baseline_tsv.exists(): + check_is_independent(train_baseline_tsv, test_baseline_tsv) From c4fa99c0e1317092de23a9ee0cf8b6264ee7b484 Mon Sep 17 00:00:00 2001 From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com> Date: Tue, 19 Mar 2024 14:38:16 +0100 Subject: [PATCH 11/24] add missing directory --- src/generate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/generate.py b/src/generate.py index 652f9f3..f831290 100644 --- a/src/generate.py +++ b/src/generate.py @@ -98,7 +98,7 @@ # folder as the BIDS folder. # %% !mkdir data/fake_bids -!clinicadl tsvtools get-labels data/fake_bids --missing_mods data/synthetic/missing_mods --merged_tsv data/synthetic/data.tsv --modality synthetic +!clinicadl tsvtools get-labels data/fake_bids data --missing_mods data/synthetic/missing_mods --merged_tsv data/synthetic/data.tsv --modality synthetic # %% # Split train and test data !clinicadl tsvtools split data/labels.tsv --n_test 0.25 --subset_name test From 5e6cccc1adaefb9794c8f49c9718b991b6b31614 Mon Sep 17 00:00:00 2001 From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com> Date: Tue, 19 Mar 2024 14:48:10 +0100 Subject: [PATCH 12/24] typos --- src/generate.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/generate.py b/src/generate.py index f831290..de25b10 100644 --- a/src/generate.py +++ b/src/generate.py @@ -57,7 +57,7 @@ # ```{warning} # You need to execute the `clinica run` and `clinicadl prepare-data` pipelines # before running this task. Moreover, the trivial option can synthesize at -# most $n$ images per label, where $n$ is the total number of images in the +# most n images per label, where n is the total number of images in the # input CAPS. # ``` # ### Running the task @@ -85,12 +85,12 @@ # In order to train a network, meta data must be organized in a file system # generated by `clinicadl tsvtools`. For more information on the following -# commands, please refer to the section ["Define your -# population"](./label_extraction.ipynb). +# commands, please refer to the section [Define your +# population](./label_extraction.ipynb). # %% [markdown] -# #### Get the labels AD and CN. -# This command needs a BIDS folder as an argument in order to create the -# `missing_mods_directory` and the `merged.tsv` file, but if you already +# #### Get the labels AD and CN +# `get-labels` command needs a BIDS folder as an argument in order to create the +# `missing_mods` directory and the `merged_tsv` file, but if you already # have these, you can give an empty folder as argument and provide the paths # to the required files separately as keyword arguments. @@ -103,7 +103,7 @@ # Split train and test data !clinicadl tsvtools split data/labels.tsv --n_test 0.25 --subset_name test # %% -# Split train and validation data in a 5-fold cross-validation +# Split train and validation data in a 3-fold cross-validation !clinicadl tsvtools kfold data/split/train.tsv --n_splits 3 # %% [markdown] # ## Train a model on synthetic data From 1a2c9535f9717b702d79d150329e3e0a6c3da006 Mon Sep 17 00:00:00 2001 From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com> Date: Tue, 19 Mar 2024 14:49:34 +0100 Subject: [PATCH 13/24] remove outdated comment (output dir is now an argument) --- src/generate.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/generate.py b/src/generate.py index de25b10..20706b9 100644 --- a/src/generate.py +++ b/src/generate.py @@ -94,8 +94,6 @@ # have these, you can give an empty folder as argument and provide the paths # to the required files separately as keyword arguments. -# Be careful, the output of the command (`labels.tsv`) is saved in the same -# folder as the BIDS folder. # %% !mkdir data/fake_bids !clinicadl tsvtools get-labels data/fake_bids data --missing_mods data/synthetic/missing_mods --merged_tsv data/synthetic/data.tsv --modality synthetic From e6aadd50f53aea364404c67cff10a6fac558f371 Mon Sep 17 00:00:00 2001 From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com> Date: Tue, 19 Mar 2024 14:52:29 +0100 Subject: [PATCH 14/24] clarification on caps --- src/generate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/generate.py b/src/generate.py index 20706b9..6f09144 100644 --- a/src/generate.py +++ b/src/generate.py @@ -67,7 +67,7 @@ # ``` # where: -# - `caps_directory` is the output folder containing the results in a +# - `caps_directory` is the output folder containing the results of `clinica run` in a # [CAPS](https://aramislab.paris.inria.fr/clinica/docs/public/latest/CAPS/Introduction/) hierarchy, # - `output_directory` is the folder where the synthetic CAPS is stored, # - `n_subjects` is the number of subjects per label in the synthetic dataset. From b92cab3b9a54414099e1f2cb12badaceb2ca5d19 Mon Sep 17 00:00:00 2001 From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com> Date: Tue, 19 Mar 2024 14:57:03 +0100 Subject: [PATCH 15/24] remove mention of prepare-data (not used yet) --- src/generate.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/generate.py b/src/generate.py index 6f09144..fd30b76 100644 --- a/src/generate.py +++ b/src/generate.py @@ -55,10 +55,9 @@ # generate trivial # ```{warning} -# You need to execute the `clinica run` and `clinicadl prepare-data` pipelines -# before running this task. Moreover, the trivial option can synthesize at -# most n images per label, where n is the total number of images in the -# input CAPS. +# You need to execute the `clinica run` pipeline before running this task. +# Moreover, the trivial option can synthesize at most n images per label, +# where n is the total number of images in the input CAPS. # ``` # ### Running the task # From 42729134d7f08272dc4594239d52f4ef01b5e6f8 Mon Sep 17 00:00:00 2001 From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com> Date: Tue, 19 Mar 2024 17:10:35 +0100 Subject: [PATCH 16/24] clarify training section --- src/generate.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/src/generate.py b/src/generate.py index fd30b76..515d924 100644 --- a/src/generate.py +++ b/src/generate.py @@ -105,26 +105,34 @@ # %% [markdown] # ## Train a model on synthetic data -# Once data was generated and split it is possible to train a model using +# Once data was generated and split, it is possible to train a model using # `clinicadl train` and evaluate its performance with `clinicadl interpret`. For # more information on the following command lines please read the sections # [Classification with a CNN on 2D slice](./training_classification.ipynb) and # [Regression with 3D images](./training_regression.ipynb). # -# The following command uses a pre-build architecture of ClinicaDL `Conv4_FC3`. +# The following `clinicadl train` command uses a pre-build architecture of ClinicaDL `Conv4_FC3`. # You can also implement your own models by following the instructions of [this # section](./training_custom.ipynb). # -# If you failed to generate a trivial dataset, please uncomment the next cell. -# %% -# !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data/synthetic.tar.gz -o synthetic.tar.gz -# !tar xf synthetic.tar.gz -# %% -# Prepare data (extraction of image tensors) +# First, we need to run `prepare-data` to extract the tensors from the images: +# %% !clinicadl prepare-data image data/synthetic t1-linear --extract_json extract_T1linear_image +# %% [markdown] +# Then, we will train the network with the synthetic data. If you failed to generate a trivial dataset, +# please uncomment the next cell. +# %% +# # !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data/synthetic.tar.gz -o synthetic.tar.gz +# # !mkdir data +# # !tar xf synthetic.tar.gz -C data +# # !mkdir data/fake_bids +# # !clinicadl tsvtools get-labels data/fake_bids data --missing_mods data/synthetic/missing_mods --merged_tsv data/synthetic/data.tsv --modality synthetic +# # !clinicadl tsvtools split data/labels.tsv --n_test 0.25 --subset_name test +# # !clinicadl tsvtools kfold data/split/train.tsv --n_splits 3 +# # no need to run prepare-data # %% -# Train a network with synthetic data -!clinicadl train classification data/synthetic extract_T1linear_image data/split/3_fold data/synthetic_maps --architecture Conv4_FC3 --n_splits 3 --split 0 +# Train a network with synthetic data (remove --no-gpu option if you do have access to a gpu) +!clinicadl train classification data/synthetic extract_T1linear_image data/split/3_fold data/synthetic_maps --architecture Conv4_FC3 --n_splits 3 --split 0 --no-gpu # %% [markdown] # As the number of images is very small (4 per class), we do not rely on the # accuracy to select the model. Instead we evaluate the model which obtained the From bdf56da74bb269e0bbf3c6bfb717e9f415e3fbe7 Mon Sep 17 00:00:00 2001 From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com> Date: Tue, 19 Mar 2024 17:33:20 +0100 Subject: [PATCH 17/24] reduce size of an image --- src/generate.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/generate.py b/src/generate.py index 515d924..6d1d26a 100644 --- a/src/generate.py +++ b/src/generate.py @@ -165,18 +165,19 @@ # generate random # ```{warning} -# You need to execute the `clinica run` and `clinicadl prepare-data` pipelines -# prior to running this task. Moreover, the random option can synthesize as +# You need to execute the `clinica run` pipeline prior to running this task. +# Moreover, the random option can synthesize as # many images as wanted with only one input image. # ``` -# %% [markdown] -# ###Running the task +# ### Running the task +# # ```bash # clinicadl generate random # ``` # where: -# - `caps_directory` is the output folder containing the results in a [CAPS](http://www.clinica.run/doc/CAPS/) hierarchy. +# - `caps_directory` is the output folder containing the results of `clinica run` in a +# [CAPS](https://aramislab.paris.inria.fr/clinica/docs/public/latest/CAPS/Introduction/) hierarchy, # - `generated_caps_directory` is the folder where the synthetic CAPS is stored. @@ -201,7 +202,7 @@ # - **subtype 1**: Top region has its maximum size but Bottom is atrophied, # - **subtype 2**: Bottom region has its maximum size but Top is atrophied. -# generate shepplogan +# generate shepplogan # These three subtypes are spread between two labels which mimic the binary # classification between Alzheimer's disease patients (AD) with heterogeneous From 88447b35a67663592d4df969251762fcc4b8ad8c Mon Sep 17 00:00:00 2001 From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com> Date: Mon, 25 Mar 2024 14:38:33 +0100 Subject: [PATCH 18/24] typos and layout --- src/training_classification.py | 80 ++++++++++++++++------------------ 1 file changed, 38 insertions(+), 42 deletions(-) diff --git a/src/training_classification.py b/src/training_classification.py index 959366a..30a5d99 100644 --- a/src/training_classification.py +++ b/src/training_classification.py @@ -17,9 +17,9 @@ # !pip install clinicadl==1.3.0 # %% [markdown] -# # Classification with a CNN on 2D slice. +# # Classification with a CNN on 2D slice # -# The objective of the *classification* task is to attribute a class to input +# The objective of the `classification` task is to attribute a class to input # images. A CNN takes as input an image and outputs a vector of size `C`, # corresponding to the number of different labels existing in the dataset. More # precisely, this vector contains a value for each class that is often @@ -28,7 +28,7 @@ # given image corresponds to the class with the highest probability in the # output vector. # -# The cross-entropy loss between the ground truth and the network output is used +# The `cross-entropy` loss between the ground truth and the network output is used # to quantify the error made by the network during the training process, which # becomes null if the network outputs 100% probability for the true class. # @@ -37,7 +37,7 @@ # as pooling, batch normalization, dropout and fully-connected layers are also # used. The default CNN used for classification in ClinicaDL is `Conv5_FC3` # which is a convolutional neural network with 5 convolution and 3 -# fully-connected layer but in this notebook we will use the `resnet18`: +# fully-connected layer, but in this notebook we will use the `resnet18`: #
# resnet18 architecture @@ -61,19 +61,17 @@ # for each patch. # %% [markdown] -# Here, as you will use slice-level, you simply need to type the following -# command line: +# You need to run the following command line: # ```bash # clinicadl prepare-data {image/patch/roi/slice} # ``` # where: -# - `caps_directory` is the folder containing the results of the [`t1-linear` -# pipeline](#preprocessing:t1-linear) and the output of the present command, -# both in a CAPS hierarchy. +# - `caps_directory` is the folder in a CAPS hierarchy containing the images +# corresponding to the `modality` asked, # - `modality` is the name of the preprocessing performed on the original -# images. It can be `t1-linear` or `pet-linear`. You can choose custom if you +# images (e.g. `t1-linear`). You can choose custom if you # want to get a tensor from a custom filename. # # When using patch or slice extraction, default values were set according to @@ -87,29 +85,29 @@ # deeplearning_prepare_data # ├── image_based # │ └── t1_linear -# │ └── sub-_ses-_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.pt +# │ └── sub-_ses-_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.pt # ├── slice_based # │ └── t1_linear -# │ ├── sub-_ses-_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_axis-axi_channel-rgb_slice-0_T1w.pt -# │ ├── sub-_ses-_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_axis-axi_channel-rgb_slice-1_T1w.pt +# │ ├── sub-_ses-_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_axis-axi_channel-rgb_slice-0_T1w.pt +# │ ├── sub-_ses-_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_axis-axi_channel-rgb_slice-1_T1w.pt # │ ├── ... -# │ └── sub-_ses-_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_axis-axi_channel-rgb_slice-N_T1w.pt +# │ └── sub-_ses-_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_axis-axi_channel-rgb_slice-N_T1w.pt # ├── patch_based # │ └── pet-linear -# │ ├── sub-_ses-_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_axis-axi_channel-rgb_patch-0_T1w.pt -# │ ├── sub-_ses-_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_axis-axi_channel-rgb_patch-1_T1w.pt +# │ ├── sub-_ses-_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_axis-axi_channel-rgb_patch-0_T1w.pt +# │ ├── sub-_ses-_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_axis-axi_channel-rgb_patch-1_T1w.pt # │ ├── ... -# │ └── sub-_ses-_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_axis-axi_channel-rgb_patch-N_T1w.pt +# │ └── sub-_ses-_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_axis-axi_channel-rgb_patch-N_T1w.pt # └── roi_based # └── t1_linear -# └── sub-_ses-_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.pt +# └── sub-_ses-_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.pt # ``` # %% [markdown] # In short, there is a folder for each feature (**image**, **slice**, **roi** or **patch**) # and inside the numbered tensor files with the corresponding feature. -# Files are saved with the .pt extension and contains tensors in PyTorch format. -# A JSON file is also stored in the CAPS hierarchy under the tensor_extraction +# Files are saved with the **.pt** extension and contains tensors in PyTorch format. +# A JSON file is also stored in the CAPS hierarchy under the `tensor_extraction` # folder: # ```text @@ -117,15 +115,15 @@ # └── tensor_extraction # └── .json #``` -# These files are compulsory to run the train command. They provide all the -# details of the processing performed by the prepare-data command that will be +# This file is compulsory to run the train command. It provides all the +# details of the processing performed by the `prepare-data` command that will be # necessary when reading the tensors. # %% [markdown] # ```{warning} -# The default behavior of the pipeline is to only extract images even if another +# The default behavior of the pipeline is to only extract images, even if another # extraction method is specified. However, all the options will be saved in the -# preprocessing JSON file and then the extraction is done when data is loaded +# preprocessing JSON file and then, the extraction is done when data is loaded # during the training. If you want to save the extracted method tensors in the # CAPS, you have to add the `--save-features` flag. # ``` @@ -144,14 +142,14 @@ # %% [markdown] # ## Before starting # If you failed to obtain the preprocessing using the `t1-linear` pipeline, -# please uncomment the next cell. You can extract tensors from this CAPS but +# please uncomment the next cell. You can extract tensors from this CAPS, but # for the training part you will need a bigger dataset. # %% -# !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/CAPS_example_prepared.tar.gz -o oasisCaps.tar.gz +# !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/CAPS_example.tar.gz -o oasisCaps.tar.gz # !tar xf oasisCaps.tar.gz # %% [markdown] -# If you have already download the full dataset and converted it to +# If you have already downloaded the full dataset and converted it to # CAPS, you can give the path to the dataset directory by changing # the CAPS path. If not, just run it as written but the results will # not be relevant. @@ -180,10 +178,9 @@ # ``` # If you already know the models implemented in `clinicadl`, you can directly -# jump to the `train custom` to implement your own custom experiment! +# jump to [this section](./training_custom.ipynb) to implement your own custom experiment! # %% -from pyrsistent import v import torch # Check if a GPU is available @@ -191,26 +188,25 @@ # %% [markdown] -# ### Data used for training. +# ### Data used for training # # Because they are time-costly, the preprocessing steps presented in the # beginning of this tutorial were only executed on a subset of OASIS-1, but # obviously two participants are insufficient to train a network! To obtain more # meaningful results, you should retrieve the whole OASIS-1 dataset and run the training -# based on the labels and splits performed in the previous section. Of course, -# you can use another dataset, but then you will have to perform again -# "./label_extraction.ipynb" the extraction of labels and data splits on this -# dataset. +# based on the labels and splits obtained in the [previous section](./label_extraction.ipynb). +# Of course, you can use another dataset, on which you will also have to perform +# labels extraction and data splitting. # %% [markdown] -# ## `train CLASSIFICATION` +# ## `train classification` # This functionality mainly relies on the PyTorch deep learning library # [[Paszke et al., 2019](https://papers.nips.cc/paper/9015-pytorch-an-imperative-style-high-performance-deep-learning-library)]. # # Different tasks can be learnt by a network: `classification`, `reconstruction` -# and `regression`, in this notebook, we focus on the `classification` task. +# and `regression`. In this notebook, we focus on the `classification` task. # %% [markdown] @@ -223,7 +219,7 @@ # and used in a transfer learning fashion. Other advantages are the increased # number of training samples as many slices can be extracted from a single 3D # image, and a lower memory usage compared to using the full MR image as -# input.This paradigm can be divided into two different frameworks: +# input. This paradigm can be divided into two different frameworks: # - **single-CNN**: one CNN is trained on all slice locations. # - **multi-CNN**: one CNN is trained per slice location. @@ -232,7 +228,7 @@ # framework), however the CNNs may be more accurate as they are specialized for # one slice location. # -# During training, the gradients update are done based on the loss computed at +# During training, gradient updates are done based on the loss computed at # the slice level. Final performance metric are computed at the subject level by # combining the outputs of the slices of the same subject. # %% [markdown] @@ -260,7 +256,7 @@ # This will be used to load the correct tensor inputs with the wanted # preprocessing. # - `TSV_DIRECTORY` (Path) is the input folder of a TSV file tree generated by -# `clinicadl tsvtool {split|kfold}`. +# `clinicadl tsvtools {split|kfold}`. # In case of multi-cohort training, must be a path to a TSV file. # - `OUTPUT_MAPS_DIRECTORY` (Path) is the folder where the results are stored. # @@ -288,7 +284,7 @@ # soft-voting. It is only taken into account if several images are extracted # from the same original 3D image (i.e. `num_networks` > 1). Default: `0`. # - `--loss` (str) is the name of the loss used to optimize the classification -# task. Must correspond to a Pytorch class. Default: `CrossEntropyLoss`. +# task. Must correspond to a PyTorch class. Default: `CrossEntropyLoss`. # %% [markdown] # ```{note} @@ -320,14 +316,14 @@ !clinicadl train classification data_oasis/CAPS_example slice_classification_t1 data_oasis/split/4_fold/ data_oasis/maps_classification_2D_slice_multi --n_splits 4 --architecture resnet18 --multi_network # %% [markdown] -# The clinicadl train command outputs a MAPS structure in which there are only +# The `clinicadl train command outputs` a MAPS structure in which there are only # two data groups: train and validation. # A MAPS folder contains all the elements obtained during the training and other # post-processing procedures applied to a particular deep learning framework. # The hierarchy is organized according to the fold, selection metric and data # group used. -# An example of a MAPS structure is given below +# An example of a MAPS structure is given below: #```text # # ├── environment.txt From 40153942f4f2562b504129eb112e835a214c5221 Mon Sep 17 00:00:00 2001 From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com> Date: Mon, 25 Mar 2024 14:45:53 +0100 Subject: [PATCH 19/24] minor change --- src/training_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/training_classification.py b/src/training_classification.py index 30a5d99..428bf71 100644 --- a/src/training_classification.py +++ b/src/training_classification.py @@ -316,7 +316,7 @@ !clinicadl train classification data_oasis/CAPS_example slice_classification_t1 data_oasis/split/4_fold/ data_oasis/maps_classification_2D_slice_multi --n_splits 4 --architecture resnet18 --multi_network # %% [markdown] -# The `clinicadl train command outputs` a MAPS structure in which there are only +# The `clinicadl train` command outputs a MAPS structure in which there are only # two data groups: train and validation. # A MAPS folder contains all the elements obtained during the training and other # post-processing procedures applied to a particular deep learning framework. From 89a1ba1ab74b1cbb7df8f0e29c1936bb204a67e0 Mon Sep 17 00:00:00 2001 From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com> Date: Tue, 26 Mar 2024 10:18:47 +0100 Subject: [PATCH 20/24] correction on wrong CAPS folder --- src/training_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/training_classification.py b/src/training_classification.py index 428bf71..1338c50 100644 --- a/src/training_classification.py +++ b/src/training_classification.py @@ -164,7 +164,7 @@ # next cell. # %% -# !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/CAPS_extracted.tar.gz -o oasisCaps.tar.gz +# !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/CAPS_example_prepared.tar.gz -o oasisCaps.tar.gz # !tar xf oasisCaps.tar.gz # %% !tree -L 3 data_oasis/CAPS_example/subjects/sub-OASIS10*/ses-M000/deeplearning_prepare_data/ From 729ef093335561546932405c808dc18ca898b722 Mon Sep 17 00:00:00 2001 From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com> Date: Tue, 26 Mar 2024 17:24:14 +0100 Subject: [PATCH 21/24] add new training dataset --- src/training_classification.py | 86 +++++++++++++++++----------------- 1 file changed, 42 insertions(+), 44 deletions(-) diff --git a/src/training_classification.py b/src/training_classification.py index 1338c50..56e99ce 100644 --- a/src/training_classification.py +++ b/src/training_classification.py @@ -142,17 +142,11 @@ # %% [markdown] # ## Before starting # If you failed to obtain the preprocessing using the `t1-linear` pipeline, -# please uncomment the next cell. You can extract tensors from this CAPS, but -# for the training part you will need a bigger dataset. +# please uncomment the next cell. # %% # !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/CAPS_example.tar.gz -o oasisCaps.tar.gz # !tar xf oasisCaps.tar.gz -# %% [markdown] -# If you have already downloaded the full dataset and converted it to -# CAPS, you can give the path to the dataset directory by changing -# the CAPS path. If not, just run it as written but the results will -# not be relevant. # %% [markdown] # To perform the feature extraction for our dataset, run the following cell: # %% @@ -198,6 +192,17 @@ # based on the labels and splits obtained in the [previous section](./label_extraction.ipynb). # Of course, you can use another dataset, on which you will also have to perform # labels extraction and data splitting. +# +# The purpose of this notebook is not to fully train a network, but rather to understand +# how ClinicaDL works. Therefore, we will keep working with a subset of OASIS-1. This new +# subset contains 10 T1w images, pre-processed with the pipeline `t1-linear`of Clinia. The +# `prepare-data` pipeline has already been performed on the dataset. +# +# You can remove your old `data_oasis` folder and download the new one: + +# %% +# !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/CAPS_example_train.tar.gz -o oasisCaps.tar.gz +# !tar xf oasisCaps.tar.gz # %% [markdown] # ## `train classification` @@ -234,9 +239,9 @@ # %% [markdown] # ### Prerequisites # -# You need to execute `clinicadl tsvtools get-labels` and `clinicadl tsvtools -# {split|kfold}` commands prior to running this task to have the correct TSV file -# organization. Moreover, there should be a CAPS, obtained running the +# If you use your own dataset, you need to execute `clinicadl tsvtools get-labels` +# and `clinicadl tsvtools {split|kfold}` commands prior to running this task to have +# the correct TSV file organization. Moreover, there should be a CAPS, obtained running the # preprocessing pipeline wanted. # %% [markdown] # ### Running the task @@ -252,7 +257,7 @@ # [CAPS](https://aramislab.paris.inria.fr/clinica/docs/public/latest/CAPS/Introduction/) # hierarchy. In case of multi-cohort training, must be a path to a TSV file. # - `PREPROCESSING_JSON` (str) is the name of the preprocessing json file stored -# in the `CAPS_DIRECTORY` that corresponds to the `clinicadl extract` output. +# in the `CAPS_DIRECTORY` that corresponds to the `clinicadl prepare-data` output. # This will be used to load the correct tensor inputs with the wanted # preprocessing. # - `TSV_DIRECTORY` (Path) is the input folder of a TSV file tree generated by @@ -299,21 +304,26 @@ # The default label for the classification task is `diagnosis` but as long as it # is a categorical variable, it can be of any type. # %% [markdown] -# The next cell train a `resnet18` to classify 2D slices of t1-linear MRI by +# The next cells train `resnet18` networks to classify 2D slices of t1-linear MRI by # diagnosis (AD or CN). -# Please note that the purpose of this notebook is not to fully train a network -# because we don't have enough data. The objective is to understand how ClinicaDL -# works and make inferences using pretrained models in the next section. +# Please note once again that we don't expect any interesting results with a +# network trained on only 10 MRI images. That's why we will train the networks for +# only few epochs. +# +# Let's first train a **single-CNN** on all slice locations (actually 4 networks are +# trained, one for each split): # %% # 2D-slice single-CNN training -#!clinicadl train classification -h -!clinicadl train classification data_oasis/CAPS_example slice_classification_t1 data_oasis/split/4_fold/ data_oasis/maps_classification_2D_slice_resnet18 --n_splits 4 --architecture resnet18 - +!clinicadl train classification data_oasis/CAPS_example slice_classification_t1 data_oasis/split/4_fold/ data_oasis/maps_classification_2D_slice_resnet18 --n_splits 4 --architecture resnet18 --batch_size 8 --epochs 5 +# %% [markdown] +# Then, let's train a **multi-CNN** (i.e.one CNN is trained per slice location). +# 168 models will be trained, so this command may take a while. If you don't want +# to run it, the results can be downloaded a few lines further on. # %% # 2D-slice multi-CNN training -!clinicadl train classification data_oasis/CAPS_example slice_classification_t1 data_oasis/split/4_fold/ data_oasis/maps_classification_2D_slice_multi --n_splits 4 --architecture resnet18 --multi_network +!clinicadl train classification data_oasis/CAPS_example slice_classification_t1 data_oasis/split/4_fold/ data_oasis/maps_classification_2D_slice_multi --n_splits 4 --split 0 --architecture resnet18 --batch_size 2 --epochs 1 --multi_network # %% [markdown] # The `clinicadl train` command outputs a MAPS structure in which there are only @@ -363,12 +373,12 @@ #``` # You can find more information about MAPS structure on our -# [documentation](https://clinicadl.readthedocs.io/en/latest/Introduction/#maps-definition) +# [documentation](https://clinicadl.readthedocs.io/en/latest/Introduction/#maps-definition). # %% [markdown] # # Inference using pretrained models # -# (If you failed to train the model please uncomment the next cell) +# If you failed to train the model please uncomment the next cells: # %% !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/maps_classification_2D_slice_multi.tar.gz -o maps_classification_2D_slice_multi.tar.gz !tar xf maps_classification_2D_slice_multi.tar.gz @@ -377,23 +387,12 @@ !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/maps_classification_2D_slice_resnet.tar.gz -o maps_classification_2D_slice_resnet.tar.gz !tar xf maps_classification_2D_slice_resnet.tar.gz -# %% [markdown] -# If you failed to train the model, you also need to download the TSV files with -# the list of participants for each split used for the training because `clinicadl -# tsvtools split` and `clinicadl tsvtools kfold` commands randomly split data so -# you can have data leakage error (see previous [notebook](notebooks/labels_extraction.ipynb) -# for more information about data leakage). - -# %% -!curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/split.tar.gz -o training_split.tar.gz -!tar xf training_split.tar.gz - # %% [markdown] # The `predict` functionality performs individual prediction and metrics # computation on a set of data using models trained with `clinicadl train` or # `clinicadl random-search` tasks. # It can also use any pretrained models if they are structured like a -# [MAPS](https://clinicadl.readthedocs.io/en/latest/Introduction/#maps-definition) +# [MAPS](https://clinicadl.readthedocs.io/en/latest/Introduction/#maps-definition). # %% [markdown] # ### Running the task @@ -403,33 +402,32 @@ # clinicadl predict [OPTIONS] INPUT_MAPS_DIRECTORY DATA_GROUP #``` # where: -# - INPUT_MAPS_DIRECTORY (Path) is the path to the MAPS of the pretrained model. -# - DATA_GROUP (str) is the name of the data group used for the prediction. +# - `INPUT_MAPS_DIRECTORY` (Path) is the path to the MAPS of the pretrained model. +# - `DATA_GROUP` (str) is the name of the data group used for the prediction. # ```{warning} # For ClinicaDL, a data group is linked to a list of participants / sessions and # a CAPS directory. When performing a prediction, interpretation or tensor -# serialization the user must give a data group. If this data group does not -# exist, the user MUST give a caps_directory and a participants_tsv. If this -# data group already exists, the user MUST not give any caps_directory or -# participants_tsv, or set overwrite to True. +# serialization, the user must give a data group. If this data group does not +# exist (in the MAPS), the user MUST give a `caps_directory` and a `participants_tsv`. If this +# data group already exists, the user MUST not give any `caps_directory` or +# `participants_tsv`, or set overwrite to True. # ``` # If you want to add optional argument you can check the # [documentation](https://clinicadl.readthedocs.io/en/latest/Predict/). # %% -# !clinicadl predict -h -!clinicadl predict data_oasis/maps_classification_2D_slice_resnet18 'test-Oasis2' --participants_tsv ./data_oasis/split/test_baseline.tsv --caps_directory data_oasis/CAPS_example +!clinicadl predict data_oasis/maps_classification_2D_slice_resnet18 'test-Oasis' --participants_tsv ./data_oasis/split/test_baseline.tsv --caps_directory data_oasis/CAPS_example # %% !clinicadl predict data_oasis/maps_classification_2D_slice_multi 'test-Oasis' --participants_tsv ./data_oasis/split/test_baseline.tsv --caps_directory data_oasis/CAPS_example # %% [markdown] -# Results are stored in the MAPS of path `model_path`, according to the +# Results are stored in the MAPS, according to the # following file system: # ```text -# model_path> +# # ├── split-0 # ├── ... # └── split- @@ -447,6 +445,6 @@ # running the next cell: # %% import pandas as pd -metrics = pd.read_csv("data_oasis/maps_classification_2D_slice_resnet18/split-0/best-loss/test-Oasis/test-OASIS_slice_level_metrics.tsv", sep="\t") +metrics = pd.read_csv("data_oasis/maps_classification_2D_slice_resnet18/split-0/best-loss/test-Oasis/test-Oasis_slice_level_metrics.tsv", sep="\t") metrics.head() # %% From 0bf0496ca6bfa01293e96e358ed37ebfc772bf6a Mon Sep 17 00:00:00 2001 From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com> Date: Tue, 26 Mar 2024 18:13:28 +0100 Subject: [PATCH 22/24] minor typo --- src/training_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/training_classification.py b/src/training_classification.py index 56e99ce..bc313ea 100644 --- a/src/training_classification.py +++ b/src/training_classification.py @@ -75,7 +75,7 @@ # want to get a tensor from a custom filename. # # When using patch or slice extraction, default values were set according to -# [Wen et al., 2020](https://doi.org/10.1016/j.media.2020.101694) +# [Wen et al., 2020](https://doi.org/10.1016/j.media.2020.101694). # %% [markdown] # Output files are stored into a new folder (inside the CAPS) and follows a From 31a8a3301d80372ad9f6edd8bb6feef22df352d5 Mon Sep 17 00:00:00 2001 From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com> Date: Wed, 27 Mar 2024 09:35:31 +0100 Subject: [PATCH 23/24] typo Co-authored-by: Gensollen --- src/training_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/training_classification.py b/src/training_classification.py index bc313ea..152d166 100644 --- a/src/training_classification.py +++ b/src/training_classification.py @@ -195,7 +195,7 @@ # # The purpose of this notebook is not to fully train a network, but rather to understand # how ClinicaDL works. Therefore, we will keep working with a subset of OASIS-1. This new -# subset contains 10 T1w images, pre-processed with the pipeline `t1-linear`of Clinia. The +# subset contains 10 T1w images, pre-processed with the pipeline `t1-linear`of Clinica. The # `prepare-data` pipeline has already been performed on the dataset. # # You can remove your old `data_oasis` folder and download the new one: From 6961ade0d12ff3b7173e7ce32301f5a5cb9d05b4 Mon Sep 17 00:00:00 2001 From: thibaultdvx <154365476+thibaultdvx@users.noreply.github.com> Date: Wed, 27 Mar 2024 10:05:03 +0100 Subject: [PATCH 24/24] add some info on multi-CNN --- src/training_classification.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/training_classification.py b/src/training_classification.py index bc313ea..1f689e7 100644 --- a/src/training_classification.py +++ b/src/training_classification.py @@ -318,9 +318,10 @@ # 2D-slice single-CNN training !clinicadl train classification data_oasis/CAPS_example slice_classification_t1 data_oasis/split/4_fold/ data_oasis/maps_classification_2D_slice_resnet18 --n_splits 4 --architecture resnet18 --batch_size 8 --epochs 5 # %% [markdown] -# Then, let's train a **multi-CNN** (i.e.one CNN is trained per slice location). -# 168 models will be trained, so this command may take a while. If you don't want -# to run it, the results can be downloaded a few lines further on. +# Then, let's train a **multi-CNN** (i.e. one CNN is trained per slice location). +# We will train the models only for the first split, but still there are 168 models, +# so this command may take a while. If you don't want to run it, the results can +# be downloaded a few lines further on. # %% # 2D-slice multi-CNN training !clinicadl train classification data_oasis/CAPS_example slice_classification_t1 data_oasis/split/4_fold/ data_oasis/maps_classification_2D_slice_multi --n_splits 4 --split 0 --architecture resnet18 --batch_size 2 --epochs 1 --multi_network @@ -378,11 +379,13 @@ # %% [markdown] # # Inference using pretrained models # -# If you failed to train the model please uncomment the next cells: +# If you failed to train the model please uncomment the next cell: # %% !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/maps_classification_2D_slice_multi.tar.gz -o maps_classification_2D_slice_multi.tar.gz !tar xf maps_classification_2D_slice_multi.tar.gz - +# %% [markdown] +# For the multi-CNN, to reduce download time, you can only access +# the results of the models trained on the first 5 slices: # %% !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/maps_classification_2D_slice_resnet.tar.gz -o maps_classification_2D_slice_resnet.tar.gz !tar xf maps_classification_2D_slice_resnet.tar.gz