Skip to content

Commit

Permalink
[ENH] Check that BIDS folders have a dataset_description.json file (#…
Browse files Browse the repository at this point in the history
…1127)

* Check that BIDS folders have a dataset_description.json file

* make the _check_bids_is_not_empty function a bit more robust
  • Loading branch information
NicolasGensollen authored Apr 9, 2024
1 parent 4b1c2a4 commit 645fd8c
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 33 deletions.
78 changes: 66 additions & 12 deletions clinica/utils/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,19 @@
import hashlib
import os
from collections import namedtuple
from enum import Enum
from functools import partial
from pathlib import Path
from typing import Callable, Dict, List, Optional, Tuple, Union


class DatasetType(str, Enum):
"""Defines the possible types of datasets in Clinica."""

BIDS = "BIDS"
CAPS = "CAPS"


RemoteFileStructure = namedtuple("RemoteFileStructure", ["filename", "url", "checksum"])


Expand Down Expand Up @@ -81,7 +90,8 @@ def _list_subjects_sub_folders(


def _validate_folder_existence(
directory: Union[str, os.PathLike], folder_type: str
directory: Union[str, os.PathLike],
folder_type: DatasetType,
) -> Path:
"""Utility function which performs checks common to BIDS and CAPS folder structures.
Expand All @@ -90,7 +100,7 @@ def _validate_folder_existence(
directory : PathLike or str
Directory to check.
folder_type : {"BIDS", "CAPS"}
folder_type : DatasetType
The type of directory.
Returns
Expand All @@ -104,12 +114,14 @@ def _validate_folder_existence(
directory = Path(directory)
except TypeError:
raise TypeError(
f"Argument you provided to check_{folder_type.lower()}_folder() is not a valid folder name."
f"Argument you provided to check_{folder_type.value.lower()}_folder() is not a valid folder name."
)

if not directory.is_dir():
raise (ClinicaBIDSError if folder_type == "BIDS" else ClinicaCAPSError)(
f"The {folder_type} directory you gave is not a folder.\n"
raise (
ClinicaBIDSError if folder_type == DatasetType.BIDS else ClinicaCAPSError
)(
f"The {folder_type.value} directory you gave is not a folder.\n"
"Error explanations:\n"
f"\t- Clinica expected the following path to be a folder: {directory}\n"
"\t- If you gave relative path, did you run Clinica on the good folder?"
Expand All @@ -119,10 +131,10 @@ def _validate_folder_existence(


_validate_bids_folder_existence = partial(
_validate_folder_existence, folder_type="BIDS"
_validate_folder_existence, folder_type=DatasetType.BIDS
)
_validate_caps_folder_existence = partial(
_validate_folder_existence, folder_type="CAPS"
_validate_folder_existence, folder_type=DatasetType.CAPS
)


Expand All @@ -148,23 +160,65 @@ def check_bids_folder(bids_directory: Union[str, os.PathLike]) -> None:
If the provided folder does not contain at least one directory whose
name starts with 'sub-'.
"""
from clinica.utils.exceptions import ClinicaBIDSError

bids_directory = _validate_bids_folder_existence(bids_directory)
_check_dataset_description_exists_in_bids(bids_directory)
_check_bids_is_not_caps(bids_directory)
_check_bids_is_not_empty(bids_directory)
_check_bids_has_at_least_one_subject_folder(bids_directory)


def _check_dataset_description_exists(directory: Path, folder_type: DatasetType):
from clinica.utils.exceptions import ClinicaBIDSError, ClinicaCAPSError

if not (directory / "dataset_description.json").exists():
raise (
ClinicaBIDSError if folder_type == DatasetType.BIDS else ClinicaCAPSError
)(
f"The {folder_type.value} directory ({directory}) you provided is missing "
"a dataset_description.json file."
)


_check_dataset_description_exists_in_bids = partial(
_check_dataset_description_exists, folder_type=DatasetType.BIDS
)
_check_dataset_description_exists_in_caps = partial(
_check_dataset_description_exists, folder_type=DatasetType.CAPS
)


def _check_bids_is_not_caps(bids_directory: Path):
from clinica.utils.exceptions import ClinicaBIDSError

if (bids_directory / "subjects").is_dir():
raise ClinicaBIDSError(
f"The BIDS directory ({bids_directory}) you provided seems to "
"be a CAPS directory due to the presence of a 'subjects' folder."
)

if len([f for f in bids_directory.iterdir()]) == 0:

def _check_bids_is_not_empty(bids_directory: Path):
from clinica.utils.exceptions import ClinicaBIDSError

if (
len(
[
f
for f in bids_directory.iterdir()
if f.name != "dataset_description.json"
]
)
== 0
):
raise ClinicaBIDSError(
f"The BIDS directory you provided is empty. ({bids_directory})."
)

subj = [f for f in bids_directory.iterdir() if f.name.startswith("sub-")]
if len(subj) == 0:

def _check_bids_has_at_least_one_subject_folder(bids_directory: Path):
from clinica.utils.exceptions import ClinicaBIDSError

if len([f for f in bids_directory.iterdir() if f.name.startswith("sub-")]) == 0:
raise ClinicaBIDSError(
"Your BIDS directory does not contains a single folder whose name "
"starts with 'sub-'. Check that your folder follow BIDS standard."
Expand Down
78 changes: 57 additions & 21 deletions test/unittests/utils/test_utils_inputs.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import os
import re
from pathlib import Path

import pytest

from clinica.utils.exceptions import ClinicaBIDSError, ClinicaCAPSError
from clinica.utils.inputs import DatasetType
from clinica.utils.testing_utils import (
build_bids_directory,
build_caps_directory,
Expand Down Expand Up @@ -260,9 +263,8 @@ def test_determine_caps_or_bids(tmp_path):
assert determine_caps_or_bids(tmp_path)


@pytest.mark.parametrize("folder_type", ["BIDS", "CAPS"])
@pytest.mark.parametrize("folder_type", DatasetType)
def test_validate_folder_existence(folder_type):
from clinica.utils.exceptions import ClinicaBIDSError, ClinicaCAPSError
from clinica.utils.inputs import _validate_folder_existence

with pytest.raises(
Expand All @@ -272,43 +274,82 @@ def test_validate_folder_existence(folder_type):
_validate_folder_existence(1, folder_type) # noqa

with pytest.raises(
ClinicaBIDSError if folder_type == "BIDS" else ClinicaCAPSError,
match=f"The {folder_type} directory you gave is not a folder.",
ClinicaBIDSError if folder_type == DatasetType.BIDS else ClinicaCAPSError,
match=f"The {folder_type.value} directory you gave is not a folder.",
):
_validate_folder_existence(Path("fooooo"), folder_type)


def test_check_bids_folder(tmp_path):
from clinica.utils.exceptions import ClinicaBIDSError
def test_check_bids_folder_missing_dataset_description_error(tmp_path):
from clinica.utils.inputs import check_bids_folder

with pytest.raises(
ClinicaBIDSError,
match=re.escape(
f"The BIDS directory ({tmp_path}) you provided is missing a dataset_description.json file."
),
):
check_bids_folder(tmp_path)


def test_check_bids_folder_mismatch_with_caps_error(tmp_path):
from clinica.utils.inputs import check_bids_folder

(tmp_path / "dataset_description.json").touch()
(tmp_path / "subjects").mkdir()
(tmp_path / "subjects" / "foo.txt").mkdir()

with pytest.raises(
ClinicaBIDSError,
match="The BIDS directory",
match=re.escape(
f"The BIDS directory ({tmp_path}) you provided seems to be a CAPS "
"directory due to the presence of a 'subjects' folder."
),
):
check_bids_folder(tmp_path)
rmtree(tmp_path / "subjects")
(tmp_path / "data").mkdir()


def test_check_bids_folder_empty_error(tmp_path):
from clinica.utils.inputs import check_bids_folder

bids = tmp_path / "bids"
bids.mkdir()
(bids / "dataset_description.json").touch()

with pytest.raises(
ClinicaBIDSError,
match="The BIDS directory you provided is empty.",
):
check_bids_folder(tmp_path / "data")
(tmp_path / "data" / "foo").mkdir()
check_bids_folder(bids)


def test_check_bids_folder_no_subject_folder_error(tmp_path):
from clinica.utils.inputs import check_bids_folder

bids = tmp_path / "bids"
bids.mkdir()
(bids / "dataset_description.json").touch()
(bids / "foo").mkdir()

with pytest.raises(
ClinicaBIDSError,
match="Your BIDS directory does not contains a single folder whose name",
):
check_bids_folder(tmp_path / "data")
(tmp_path / "data" / "sub-01").mkdir()
assert check_bids_folder(tmp_path / "data") is None
check_bids_folder(bids)


def test_check_bids_folder(tmp_path):
from clinica.utils.inputs import check_bids_folder

bids = tmp_path / "bids"
bids.mkdir()
(bids / "dataset_description.json").touch()
(bids / "sub-01").mkdir()

assert check_bids_folder(bids) is None


def test_check_caps_folder(tmp_path):
"""Test function `check_caps_folder`."""
from clinica.utils.exceptions import ClinicaCAPSError
from clinica.utils.inputs import check_caps_folder

(tmp_path / "subjects").mkdir()
Expand Down Expand Up @@ -470,7 +511,6 @@ def test_format_errors():
@pytest.mark.parametrize("data_type", ["T1w", "flair"])
def test_clinica_file_reader_bids_directory(tmp_path, data_type):
"""Test reading from a BIDS directory with function `clinica_file_reader`."""
from clinica.utils.exceptions import ClinicaBIDSError
from clinica.utils.inputs import clinica_file_reader

config = {
Expand Down Expand Up @@ -553,7 +593,6 @@ def test_clinica_file_reader_bids_directory(tmp_path, data_type):

def test_clinica_file_reader_caps_directory(tmp_path):
"""Test reading from a CAPS directory with function `clinica_file_reader`."""
from clinica.utils.exceptions import ClinicaCAPSError
from clinica.utils.inputs import clinica_file_reader

config = {
Expand Down Expand Up @@ -645,7 +684,6 @@ def test_clinica_file_reader_caps_directory(tmp_path):


def test_clinica_file_reader_dwi_dti_error(tmp_path):
from clinica.utils.exceptions import ClinicaCAPSError
from clinica.utils.input_files import dwi_dti
from clinica.utils.inputs import clinica_file_reader

Expand Down Expand Up @@ -693,7 +731,6 @@ def test_clinica_file_reader_dwi_dti(tmp_path):


def test_clinica_list_of_files_reader(tmp_path):
from clinica.utils.exceptions import ClinicaBIDSError
from clinica.utils.inputs import clinica_list_of_files_reader

config = {
Expand Down Expand Up @@ -756,7 +793,6 @@ def test_clinica_list_of_files_reader(tmp_path):


def test_clinica_group_reader(tmp_path):
from clinica.utils.exceptions import ClinicaCAPSError
from clinica.utils.inputs import clinica_group_reader

config = {
Expand Down

0 comments on commit 645fd8c

Please sign in to comment.