diff --git a/src/training_classification.py b/src/training_classification.py index 1338c50..957d794 100644 --- a/src/training_classification.py +++ b/src/training_classification.py @@ -75,7 +75,7 @@ # want to get a tensor from a custom filename. # # When using patch or slice extraction, default values were set according to -# [Wen et al., 2020](https://doi.org/10.1016/j.media.2020.101694) +# [Wen et al., 2020](https://doi.org/10.1016/j.media.2020.101694). # %% [markdown] # Output files are stored into a new folder (inside the CAPS) and follows a @@ -142,17 +142,11 @@ # %% [markdown] # ## Before starting # If you failed to obtain the preprocessing using the `t1-linear` pipeline, -# please uncomment the next cell. You can extract tensors from this CAPS, but -# for the training part you will need a bigger dataset. +# please uncomment the next cell. # %% # !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/CAPS_example.tar.gz -o oasisCaps.tar.gz # !tar xf oasisCaps.tar.gz -# %% [markdown] -# If you have already downloaded the full dataset and converted it to -# CAPS, you can give the path to the dataset directory by changing -# the CAPS path. If not, just run it as written but the results will -# not be relevant. # %% [markdown] # To perform the feature extraction for our dataset, run the following cell: # %% @@ -198,6 +192,17 @@ # based on the labels and splits obtained in the [previous section](./label_extraction.ipynb). # Of course, you can use another dataset, on which you will also have to perform # labels extraction and data splitting. +# +# The purpose of this notebook is not to fully train a network, but rather to understand +# how ClinicaDL works. Therefore, we will keep working with a subset of OASIS-1. This new +# subset contains 10 T1w images, pre-processed with the pipeline `t1-linear`of Clinica. The +# `prepare-data` pipeline has already been performed on the dataset. +# +# You can remove your old `data_oasis` folder and download the new one: + +# %% +# !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/CAPS_example_train.tar.gz -o oasisCaps.tar.gz +# !tar xf oasisCaps.tar.gz # %% [markdown] # ## `train classification` @@ -234,9 +239,9 @@ # %% [markdown] # ### Prerequisites # -# You need to execute `clinicadl tsvtools get-labels` and `clinicadl tsvtools -# {split|kfold}` commands prior to running this task to have the correct TSV file -# organization. Moreover, there should be a CAPS, obtained running the +# If you use your own dataset, you need to execute `clinicadl tsvtools get-labels` +# and `clinicadl tsvtools {split|kfold}` commands prior to running this task to have +# the correct TSV file organization. Moreover, there should be a CAPS, obtained running the # preprocessing pipeline wanted. # %% [markdown] # ### Running the task @@ -252,7 +257,7 @@ # [CAPS](https://aramislab.paris.inria.fr/clinica/docs/public/latest/CAPS/Introduction/) # hierarchy. In case of multi-cohort training, must be a path to a TSV file. # - `PREPROCESSING_JSON` (str) is the name of the preprocessing json file stored -# in the `CAPS_DIRECTORY` that corresponds to the `clinicadl extract` output. +# in the `CAPS_DIRECTORY` that corresponds to the `clinicadl prepare-data` output. # This will be used to load the correct tensor inputs with the wanted # preprocessing. # - `TSV_DIRECTORY` (Path) is the input folder of a TSV file tree generated by @@ -299,21 +304,27 @@ # The default label for the classification task is `diagnosis` but as long as it # is a categorical variable, it can be of any type. # %% [markdown] -# The next cell train a `resnet18` to classify 2D slices of t1-linear MRI by +# The next cells train `resnet18` networks to classify 2D slices of t1-linear MRI by # diagnosis (AD or CN). -# Please note that the purpose of this notebook is not to fully train a network -# because we don't have enough data. The objective is to understand how ClinicaDL -# works and make inferences using pretrained models in the next section. +# Please note once again that we don't expect any interesting results with a +# network trained on only 10 MRI images. That's why we will train the networks for +# only few epochs. +# +# Let's first train a **single-CNN** on all slice locations (actually 4 networks are +# trained, one for each split): # %% # 2D-slice single-CNN training -#!clinicadl train classification -h -!clinicadl train classification data_oasis/CAPS_example slice_classification_t1 data_oasis/split/4_fold/ data_oasis/maps_classification_2D_slice_resnet18 --n_splits 4 --architecture resnet18 - +!clinicadl train classification data_oasis/CAPS_example slice_classification_t1 data_oasis/split/4_fold/ data_oasis/maps_classification_2D_slice_resnet18 --n_splits 4 --architecture resnet18 --batch_size 8 --epochs 5 +# %% [markdown] +# Then, let's train a **multi-CNN** (i.e. one CNN is trained per slice location). +# We will train the models only for the first split, but still there are 168 models, +# so this command may take a while. If you don't want to run it, the results can +# be downloaded a few lines further on. # %% # 2D-slice multi-CNN training -!clinicadl train classification data_oasis/CAPS_example slice_classification_t1 data_oasis/split/4_fold/ data_oasis/maps_classification_2D_slice_multi --n_splits 4 --architecture resnet18 --multi_network +!clinicadl train classification data_oasis/CAPS_example slice_classification_t1 data_oasis/split/4_fold/ data_oasis/maps_classification_2D_slice_multi --n_splits 4 --split 0 --architecture resnet18 --batch_size 2 --epochs 1 --multi_network # %% [markdown] # The `clinicadl train` command outputs a MAPS structure in which there are only @@ -363,37 +374,28 @@ #``` # You can find more information about MAPS structure on our -# [documentation](https://clinicadl.readthedocs.io/en/latest/Introduction/#maps-definition) +# [documentation](https://clinicadl.readthedocs.io/en/latest/Introduction/#maps-definition). # %% [markdown] # # Inference using pretrained models # -# (If you failed to train the model please uncomment the next cell) +# If you failed to train the model please uncomment the next cell: # %% !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/maps_classification_2D_slice_multi.tar.gz -o maps_classification_2D_slice_multi.tar.gz !tar xf maps_classification_2D_slice_multi.tar.gz - +# %% [markdown] +# For the multi-CNN, to reduce download time, you can only access +# the results of the models trained on the first 5 slices: # %% !curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/maps_classification_2D_slice_resnet.tar.gz -o maps_classification_2D_slice_resnet.tar.gz !tar xf maps_classification_2D_slice_resnet.tar.gz -# %% [markdown] -# If you failed to train the model, you also need to download the TSV files with -# the list of participants for each split used for the training because `clinicadl -# tsvtools split` and `clinicadl tsvtools kfold` commands randomly split data so -# you can have data leakage error (see previous [notebook](notebooks/labels_extraction.ipynb) -# for more information about data leakage). - -# %% -!curl -k https://aramislab.paris.inria.fr/clinicadl/files/handbook_2023/data_oasis/split.tar.gz -o training_split.tar.gz -!tar xf training_split.tar.gz - # %% [markdown] # The `predict` functionality performs individual prediction and metrics # computation on a set of data using models trained with `clinicadl train` or # `clinicadl random-search` tasks. # It can also use any pretrained models if they are structured like a -# [MAPS](https://clinicadl.readthedocs.io/en/latest/Introduction/#maps-definition) +# [MAPS](https://clinicadl.readthedocs.io/en/latest/Introduction/#maps-definition). # %% [markdown] # ### Running the task @@ -403,33 +405,32 @@ # clinicadl predict [OPTIONS] INPUT_MAPS_DIRECTORY DATA_GROUP #``` # where: -# - INPUT_MAPS_DIRECTORY (Path) is the path to the MAPS of the pretrained model. -# - DATA_GROUP (str) is the name of the data group used for the prediction. +# - `INPUT_MAPS_DIRECTORY` (Path) is the path to the MAPS of the pretrained model. +# - `DATA_GROUP` (str) is the name of the data group used for the prediction. # ```{warning} # For ClinicaDL, a data group is linked to a list of participants / sessions and # a CAPS directory. When performing a prediction, interpretation or tensor -# serialization the user must give a data group. If this data group does not -# exist, the user MUST give a caps_directory and a participants_tsv. If this -# data group already exists, the user MUST not give any caps_directory or -# participants_tsv, or set overwrite to True. +# serialization, the user must give a data group. If this data group does not +# exist (in the MAPS), the user MUST give a `caps_directory` and a `participants_tsv`. If this +# data group already exists, the user MUST not give any `caps_directory` or +# `participants_tsv`, or set overwrite to True. # ``` # If you want to add optional argument you can check the # [documentation](https://clinicadl.readthedocs.io/en/latest/Predict/). # %% -# !clinicadl predict -h -!clinicadl predict data_oasis/maps_classification_2D_slice_resnet18 'test-Oasis2' --participants_tsv ./data_oasis/split/test_baseline.tsv --caps_directory data_oasis/CAPS_example +!clinicadl predict data_oasis/maps_classification_2D_slice_resnet18 'test-Oasis' --participants_tsv ./data_oasis/split/test_baseline.tsv --caps_directory data_oasis/CAPS_example # %% !clinicadl predict data_oasis/maps_classification_2D_slice_multi 'test-Oasis' --participants_tsv ./data_oasis/split/test_baseline.tsv --caps_directory data_oasis/CAPS_example # %% [markdown] -# Results are stored in the MAPS of path `model_path`, according to the +# Results are stored in the MAPS, according to the # following file system: # ```text -# model_path> +# # ├── split-0 # ├── ... # └── split- @@ -447,6 +448,6 @@ # running the next cell: # %% import pandas as pd -metrics = pd.read_csv("data_oasis/maps_classification_2D_slice_resnet18/split-0/best-loss/test-Oasis/test-OASIS_slice_level_metrics.tsv", sep="\t") +metrics = pd.read_csv("data_oasis/maps_classification_2D_slice_resnet18/split-0/best-loss/test-Oasis/test-Oasis_slice_level_metrics.tsv", sep="\t") metrics.head() # %%