From 645fd8c631b8847c68d2b611c6bc4df26fcefabe Mon Sep 17 00:00:00 2001 From: Gensollen Date: Tue, 9 Apr 2024 13:12:41 +0200 Subject: [PATCH] [ENH] Check that BIDS folders have a `dataset_description.json` file (#1127) * Check that BIDS folders have a dataset_description.json file * make the _check_bids_is_not_empty function a bit more robust --- clinica/utils/inputs.py | 78 +++++++++++++++++++---- test/unittests/utils/test_utils_inputs.py | 78 +++++++++++++++++------ 2 files changed, 123 insertions(+), 33 deletions(-) diff --git a/clinica/utils/inputs.py b/clinica/utils/inputs.py index 8325e4037..11f55889c 100644 --- a/clinica/utils/inputs.py +++ b/clinica/utils/inputs.py @@ -3,10 +3,19 @@ import hashlib import os from collections import namedtuple +from enum import Enum from functools import partial from pathlib import Path from typing import Callable, Dict, List, Optional, Tuple, Union + +class DatasetType(str, Enum): + """Defines the possible types of datasets in Clinica.""" + + BIDS = "BIDS" + CAPS = "CAPS" + + RemoteFileStructure = namedtuple("RemoteFileStructure", ["filename", "url", "checksum"]) @@ -81,7 +90,8 @@ def _list_subjects_sub_folders( def _validate_folder_existence( - directory: Union[str, os.PathLike], folder_type: str + directory: Union[str, os.PathLike], + folder_type: DatasetType, ) -> Path: """Utility function which performs checks common to BIDS and CAPS folder structures. @@ -90,7 +100,7 @@ def _validate_folder_existence( directory : PathLike or str Directory to check. - folder_type : {"BIDS", "CAPS"} + folder_type : DatasetType The type of directory. Returns @@ -104,12 +114,14 @@ def _validate_folder_existence( directory = Path(directory) except TypeError: raise TypeError( - f"Argument you provided to check_{folder_type.lower()}_folder() is not a valid folder name." + f"Argument you provided to check_{folder_type.value.lower()}_folder() is not a valid folder name." ) if not directory.is_dir(): - raise (ClinicaBIDSError if folder_type == "BIDS" else ClinicaCAPSError)( - f"The {folder_type} directory you gave is not a folder.\n" + raise ( + ClinicaBIDSError if folder_type == DatasetType.BIDS else ClinicaCAPSError + )( + f"The {folder_type.value} directory you gave is not a folder.\n" "Error explanations:\n" f"\t- Clinica expected the following path to be a folder: {directory}\n" "\t- If you gave relative path, did you run Clinica on the good folder?" @@ -119,10 +131,10 @@ def _validate_folder_existence( _validate_bids_folder_existence = partial( - _validate_folder_existence, folder_type="BIDS" + _validate_folder_existence, folder_type=DatasetType.BIDS ) _validate_caps_folder_existence = partial( - _validate_folder_existence, folder_type="CAPS" + _validate_folder_existence, folder_type=DatasetType.CAPS ) @@ -148,9 +160,35 @@ def check_bids_folder(bids_directory: Union[str, os.PathLike]) -> None: If the provided folder does not contain at least one directory whose name starts with 'sub-'. """ - from clinica.utils.exceptions import ClinicaBIDSError - bids_directory = _validate_bids_folder_existence(bids_directory) + _check_dataset_description_exists_in_bids(bids_directory) + _check_bids_is_not_caps(bids_directory) + _check_bids_is_not_empty(bids_directory) + _check_bids_has_at_least_one_subject_folder(bids_directory) + + +def _check_dataset_description_exists(directory: Path, folder_type: DatasetType): + from clinica.utils.exceptions import ClinicaBIDSError, ClinicaCAPSError + + if not (directory / "dataset_description.json").exists(): + raise ( + ClinicaBIDSError if folder_type == DatasetType.BIDS else ClinicaCAPSError + )( + f"The {folder_type.value} directory ({directory}) you provided is missing " + "a dataset_description.json file." + ) + + +_check_dataset_description_exists_in_bids = partial( + _check_dataset_description_exists, folder_type=DatasetType.BIDS +) +_check_dataset_description_exists_in_caps = partial( + _check_dataset_description_exists, folder_type=DatasetType.CAPS +) + + +def _check_bids_is_not_caps(bids_directory: Path): + from clinica.utils.exceptions import ClinicaBIDSError if (bids_directory / "subjects").is_dir(): raise ClinicaBIDSError( @@ -158,13 +196,29 @@ def check_bids_folder(bids_directory: Union[str, os.PathLike]) -> None: "be a CAPS directory due to the presence of a 'subjects' folder." ) - if len([f for f in bids_directory.iterdir()]) == 0: + +def _check_bids_is_not_empty(bids_directory: Path): + from clinica.utils.exceptions import ClinicaBIDSError + + if ( + len( + [ + f + for f in bids_directory.iterdir() + if f.name != "dataset_description.json" + ] + ) + == 0 + ): raise ClinicaBIDSError( f"The BIDS directory you provided is empty. ({bids_directory})." ) - subj = [f for f in bids_directory.iterdir() if f.name.startswith("sub-")] - if len(subj) == 0: + +def _check_bids_has_at_least_one_subject_folder(bids_directory: Path): + from clinica.utils.exceptions import ClinicaBIDSError + + if len([f for f in bids_directory.iterdir() if f.name.startswith("sub-")]) == 0: raise ClinicaBIDSError( "Your BIDS directory does not contains a single folder whose name " "starts with 'sub-'. Check that your folder follow BIDS standard." diff --git a/test/unittests/utils/test_utils_inputs.py b/test/unittests/utils/test_utils_inputs.py index ee02d4621..e9735ffd3 100644 --- a/test/unittests/utils/test_utils_inputs.py +++ b/test/unittests/utils/test_utils_inputs.py @@ -1,8 +1,11 @@ import os +import re from pathlib import Path import pytest +from clinica.utils.exceptions import ClinicaBIDSError, ClinicaCAPSError +from clinica.utils.inputs import DatasetType from clinica.utils.testing_utils import ( build_bids_directory, build_caps_directory, @@ -260,9 +263,8 @@ def test_determine_caps_or_bids(tmp_path): assert determine_caps_or_bids(tmp_path) -@pytest.mark.parametrize("folder_type", ["BIDS", "CAPS"]) +@pytest.mark.parametrize("folder_type", DatasetType) def test_validate_folder_existence(folder_type): - from clinica.utils.exceptions import ClinicaBIDSError, ClinicaCAPSError from clinica.utils.inputs import _validate_folder_existence with pytest.raises( @@ -272,43 +274,82 @@ def test_validate_folder_existence(folder_type): _validate_folder_existence(1, folder_type) # noqa with pytest.raises( - ClinicaBIDSError if folder_type == "BIDS" else ClinicaCAPSError, - match=f"The {folder_type} directory you gave is not a folder.", + ClinicaBIDSError if folder_type == DatasetType.BIDS else ClinicaCAPSError, + match=f"The {folder_type.value} directory you gave is not a folder.", ): _validate_folder_existence(Path("fooooo"), folder_type) -def test_check_bids_folder(tmp_path): - from clinica.utils.exceptions import ClinicaBIDSError +def test_check_bids_folder_missing_dataset_description_error(tmp_path): from clinica.utils.inputs import check_bids_folder + with pytest.raises( + ClinicaBIDSError, + match=re.escape( + f"The BIDS directory ({tmp_path}) you provided is missing a dataset_description.json file." + ), + ): + check_bids_folder(tmp_path) + + +def test_check_bids_folder_mismatch_with_caps_error(tmp_path): + from clinica.utils.inputs import check_bids_folder + + (tmp_path / "dataset_description.json").touch() (tmp_path / "subjects").mkdir() - (tmp_path / "subjects" / "foo.txt").mkdir() + with pytest.raises( ClinicaBIDSError, - match="The BIDS directory", + match=re.escape( + f"The BIDS directory ({tmp_path}) you provided seems to be a CAPS " + "directory due to the presence of a 'subjects' folder." + ), ): check_bids_folder(tmp_path) - rmtree(tmp_path / "subjects") - (tmp_path / "data").mkdir() + + +def test_check_bids_folder_empty_error(tmp_path): + from clinica.utils.inputs import check_bids_folder + + bids = tmp_path / "bids" + bids.mkdir() + (bids / "dataset_description.json").touch() + with pytest.raises( ClinicaBIDSError, match="The BIDS directory you provided is empty.", ): - check_bids_folder(tmp_path / "data") - (tmp_path / "data" / "foo").mkdir() + check_bids_folder(bids) + + +def test_check_bids_folder_no_subject_folder_error(tmp_path): + from clinica.utils.inputs import check_bids_folder + + bids = tmp_path / "bids" + bids.mkdir() + (bids / "dataset_description.json").touch() + (bids / "foo").mkdir() + with pytest.raises( ClinicaBIDSError, match="Your BIDS directory does not contains a single folder whose name", ): - check_bids_folder(tmp_path / "data") - (tmp_path / "data" / "sub-01").mkdir() - assert check_bids_folder(tmp_path / "data") is None + check_bids_folder(bids) + + +def test_check_bids_folder(tmp_path): + from clinica.utils.inputs import check_bids_folder + + bids = tmp_path / "bids" + bids.mkdir() + (bids / "dataset_description.json").touch() + (bids / "sub-01").mkdir() + + assert check_bids_folder(bids) is None def test_check_caps_folder(tmp_path): """Test function `check_caps_folder`.""" - from clinica.utils.exceptions import ClinicaCAPSError from clinica.utils.inputs import check_caps_folder (tmp_path / "subjects").mkdir() @@ -470,7 +511,6 @@ def test_format_errors(): @pytest.mark.parametrize("data_type", ["T1w", "flair"]) def test_clinica_file_reader_bids_directory(tmp_path, data_type): """Test reading from a BIDS directory with function `clinica_file_reader`.""" - from clinica.utils.exceptions import ClinicaBIDSError from clinica.utils.inputs import clinica_file_reader config = { @@ -553,7 +593,6 @@ def test_clinica_file_reader_bids_directory(tmp_path, data_type): def test_clinica_file_reader_caps_directory(tmp_path): """Test reading from a CAPS directory with function `clinica_file_reader`.""" - from clinica.utils.exceptions import ClinicaCAPSError from clinica.utils.inputs import clinica_file_reader config = { @@ -645,7 +684,6 @@ def test_clinica_file_reader_caps_directory(tmp_path): def test_clinica_file_reader_dwi_dti_error(tmp_path): - from clinica.utils.exceptions import ClinicaCAPSError from clinica.utils.input_files import dwi_dti from clinica.utils.inputs import clinica_file_reader @@ -693,7 +731,6 @@ def test_clinica_file_reader_dwi_dti(tmp_path): def test_clinica_list_of_files_reader(tmp_path): - from clinica.utils.exceptions import ClinicaBIDSError from clinica.utils.inputs import clinica_list_of_files_reader config = { @@ -756,7 +793,6 @@ def test_clinica_list_of_files_reader(tmp_path): def test_clinica_group_reader(tmp_path): - from clinica.utils.exceptions import ClinicaCAPSError from clinica.utils.inputs import clinica_group_reader config = {