Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Implement dataset description for CAPS datasets #1158

Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
09bbd8f
add caps module with basic logic
NicolasGensollen Apr 25, 2024
68b2fec
try linking to the engine
NicolasGensollen Apr 25, 2024
cea5f68
add unit tests for caps module
NicolasGensollen Apr 25, 2024
de102d3
add unit tests for anat pipeline (might change to more general later...)
NicolasGensollen Apr 25, 2024
501151a
fix broken unit tests
NicolasGensollen May 27, 2024
881ec84
trigger CI
NicolasGensollen Jul 11, 2024
bc625a7
post rebase fixes
NicolasGensollen Aug 2, 2024
1dd2318
add suggestion for basic dataset_description.json in error
NicolasGensollen Aug 2, 2024
661a22c
add some doc
NicolasGensollen Aug 2, 2024
4bf9cbf
fix permission errors for non regression tests
NicolasGensollen Aug 2, 2024
59af136
update documentation
NicolasGensollen Aug 5, 2024
4c3a18e
rework CAPS dataset_description.json
NicolasGensollen Aug 6, 2024
394e81c
write additional processing
NicolasGensollen Aug 6, 2024
4e2f1ca
fix input dir
NicolasGensollen Aug 6, 2024
2d489ef
fix permission errors
NicolasGensollen Aug 6, 2024
a81b4bb
use log_and_warn function
NicolasGensollen Aug 6, 2024
ad77d3b
permission issues on CI machines
NicolasGensollen Aug 6, 2024
8d76987
improvements
NicolasGensollen Aug 7, 2024
77fea0f
update documentation
NicolasGensollen Aug 7, 2024
4e9492f
provide more flexibility for comparing different versions of the specs
NicolasGensollen Aug 7, 2024
5a974bf
remove processing_path attribute
NicolasGensollen Aug 12, 2024
9ef04e7
allow multiple processing with same name if input paths are different
NicolasGensollen Aug 12, 2024
0eef866
allow users to specify the name of the CAPS dataset for pipelines tha…
NicolasGensollen Aug 12, 2024
29071af
update documentation
NicolasGensollen Aug 13, 2024
1acc1f3
small modification to the docs
NicolasGensollen Aug 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 49 additions & 3 deletions clinica/iotools/bids_dataset_description.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@
from enum import Enum
import json
from pathlib import Path
from typing import IO

from attrs import define, fields
from cattr.gen import make_dict_unstructure_fn, override
from cattr.preconf.json import make_converter
from packaging.version import InvalidVersion, Version

from clinica.utils.bids import BIDS_VERSION
from clinica.utils.exceptions import ClinicaBIDSError
from clinica.utils.inputs import DatasetType
from clinica.utils.stream import log_and_raise

__all__ = [
"BIDSDatasetDescription",
"get_bids_version",
]


@define
Expand All @@ -17,7 +26,7 @@ class BIDSDatasetDescription:
"""

name: str
bids_version: str = BIDS_VERSION
bids_version: Version = BIDS_VERSION
dataset_type: DatasetType = DatasetType.RAW

def write(self, to: IO[str]):
Expand All @@ -39,7 +48,7 @@ def _rename(name: str) -> str:

# Register a JSON converter for the BIDS dataset description model.
converter = make_converter()

converter.register_unstructure_hook(Version, lambda dt: str(dt))
converter.register_unstructure_hook(
BIDSDatasetDescription,
make_dict_unstructure_fn(
Expand All @@ -51,3 +60,40 @@ def _rename(name: str) -> str:
},
),
)


def get_bids_version(dataset_folder: Path) -> Version:
"""Returns the BIDS version number of a BIDS or CAPS dataset."""
try:
with open(dataset_folder / "dataset_description.json", "r") as fp:
bids_metadata = json.load(fp)
return Version(bids_metadata["BIDSVersion"])
except InvalidVersion as e:
log_and_raise(
(
f"File {dataset_folder / 'dataset_description.json'} has a "
f"BIDS version number not properly formatted:\n{e}"
),
ClinicaBIDSError,
)
except FileNotFoundError:
log_and_raise(
(
f"File {dataset_folder / 'dataset_description.json'} is missing "
"while it is mandatory for a BIDS/CAPS dataset."
),
ClinicaBIDSError,
)
except KeyError:
log_and_raise(
(
f"File {dataset_folder / 'dataset_description.json'} is missing a "
"'BIDSVersion' key while it is mandatory."
),
ClinicaBIDSError,
)
except json.JSONDecodeError as e:
log_and_raise(
f"File {dataset_folder / 'dataset_description.json'} is not formatted correctly:\n{e}.",
ClinicaBIDSError,
)
33 changes: 32 additions & 1 deletion clinica/pipelines/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,11 @@ def __init__(
from pathlib import Path
from tempfile import mkdtemp

from clinica.utils.caps import (
build_caps_dataset_description,
write_caps_dataset_description,
)
from clinica.utils.exceptions import ClinicaCAPSError
from clinica.utils.inputs import check_bids_folder, check_caps_folder

self._is_built: bool = False
Expand Down Expand Up @@ -461,11 +466,37 @@ def __init__(
f"The {self._name} pipeline does not contain "
"BIDS nor CAPS directory at the initialization."
)
check_caps_folder(self._caps_directory)
try:
check_caps_folder(self._caps_directory)
except ClinicaCAPSError as e:
desc = build_caps_dataset_description(
self._caps_directory,
self._caps_directory,
self._name,
f"subjects/*/*/{self._name.replace('-', '_')}",
)
raise ClinicaCAPSError(
f"{e}\nYou might want to create a 'dataset_description.json' "
f"file with the following content:\n{desc}"
)
self.is_bids_dir = False
else:
check_bids_folder(self._bids_directory)
self.is_bids_dir = True
if self._caps_directory is not None:
if (
not self._caps_directory.exists()
or len([f for f in self._caps_directory.iterdir()]) == 0
):
self._caps_directory.mkdir(parents=True, exist_ok=True)
if self._caps_directory:
write_caps_dataset_description(
self.input_dir,
self._caps_directory,
self._name,
f"subjects/*/*/{self._name.replace('-', '_')}",
)
check_caps_folder(self._caps_directory)
self._compute_subjects_and_sessions()
self._init_nodes()

Expand Down
4 changes: 3 additions & 1 deletion clinica/utils/bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from pathlib import Path
from typing import Dict, Tuple, Union

from packaging.version import Version

__all__ = [
"BIDSLabel",
"BIDSFileName",
Expand All @@ -12,7 +14,7 @@
"Suffix",
]

BIDS_VERSION = "1.7.0"
BIDS_VERSION = Version("1.7.0")


class Extension(str, Enum):
Expand Down
Loading
Loading