Skip to content

Commit

Permalink
[ENH] Implement dataset description for CAPS datasets (#1158)
Browse files Browse the repository at this point in the history
* add caps module with basic logic

* try linking to the engine

* add unit tests for caps module

* add unit tests for anat pipeline (might change to more general later...)

* fix broken unit tests

* trigger CI

* post rebase fixes

* add suggestion for basic dataset_description.json in error

* add some doc

* fix permission errors for non regression tests

* update documentation

* rework CAPS dataset_description.json

* write additional processing

* fix input dir

* fix permission errors

* use log_and_warn function

* permission issues on CI machines

* improvements

* update documentation

* provide more flexibility for comparing different versions of the specs

* remove processing_path attribute

* allow multiple processing with same name if input paths are different

* allow users to specify the name of the CAPS dataset for pipelines that create a CAPS

* update documentation

* small modification to the docs
  • Loading branch information
NicolasGensollen authored Aug 19, 2024
1 parent a4fc1a3 commit 8bd07fc
Show file tree
Hide file tree
Showing 30 changed files with 1,676 additions and 341 deletions.
52 changes: 49 additions & 3 deletions clinica/iotools/bids_dataset_description.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@
from enum import Enum
import json
from pathlib import Path
from typing import IO

from attrs import define, fields
from cattr.gen import make_dict_unstructure_fn, override
from cattr.preconf.json import make_converter
from packaging.version import InvalidVersion, Version

from clinica.utils.bids import BIDS_VERSION
from clinica.utils.exceptions import ClinicaBIDSError
from clinica.utils.inputs import DatasetType
from clinica.utils.stream import log_and_raise

__all__ = [
"BIDSDatasetDescription",
"get_bids_version",
]


@define
Expand All @@ -17,7 +26,7 @@ class BIDSDatasetDescription:
"""

name: str
bids_version: str = BIDS_VERSION
bids_version: Version = BIDS_VERSION
dataset_type: DatasetType = DatasetType.RAW

def write(self, to: IO[str]):
Expand All @@ -39,7 +48,7 @@ def _rename(name: str) -> str:

# Register a JSON converter for the BIDS dataset description model.
converter = make_converter()

converter.register_unstructure_hook(Version, lambda dt: str(dt))
converter.register_unstructure_hook(
BIDSDatasetDescription,
make_dict_unstructure_fn(
Expand All @@ -51,3 +60,40 @@ def _rename(name: str) -> str:
},
),
)


def get_bids_version(dataset_folder: Path) -> Version:
"""Returns the BIDS version number of a BIDS or CAPS dataset."""
try:
with open(dataset_folder / "dataset_description.json", "r") as fp:
bids_metadata = json.load(fp)
return Version(bids_metadata["BIDSVersion"])
except InvalidVersion as e:
log_and_raise(
(
f"File {dataset_folder / 'dataset_description.json'} has a "
f"BIDS version number not properly formatted:\n{e}"
),
ClinicaBIDSError,
)
except FileNotFoundError:
log_and_raise(
(
f"File {dataset_folder / 'dataset_description.json'} is missing "
"while it is mandatory for a BIDS/CAPS dataset."
),
ClinicaBIDSError,
)
except KeyError:
log_and_raise(
(
f"File {dataset_folder / 'dataset_description.json'} is missing a "
"'BIDSVersion' key while it is mandatory."
),
ClinicaBIDSError,
)
except json.JSONDecodeError as e:
log_and_raise(
f"File {dataset_folder / 'dataset_description.json'} is not formatted correctly:\n{e}.",
ClinicaBIDSError,
)
3 changes: 3 additions & 0 deletions clinica/pipelines/anatomical/freesurfer/t1/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
@option.global_option_group
@option.n_procs
@click.pass_context
@cli_param.option.caps_name
def cli(
ctx: click.Context,
bids_directory: str,
Expand All @@ -45,6 +46,7 @@ def cli(
overwrite_outputs: bool = False,
yes: bool = False,
atlas_path: Optional[str] = None,
caps_name: Optional[str] = None,
) -> None:
"""Cross-sectional pre-processing of T1w images with FreeSurfer.
Expand All @@ -68,6 +70,7 @@ def cli(
},
name=pipeline_name,
overwrite_caps=overwrite_outputs,
caps_name=caps_name,
)
exec_pipeline = (
pipeline.run(plugin="MultiProc", plugin_args={"n_procs": n_procs})
Expand Down
12 changes: 12 additions & 0 deletions clinica/pipelines/cli_param/option.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,18 @@
),
)

caps_name = option(
"-cn",
"--caps-name",
type=str,
help=(
"The name of the CAPS dataset that will be created by the pipeline. "
"This is not the name of the folder itself, but the name in the metadata, "
"which can be different if desired. If the CAPS folder already exists and "
"already has a name, this will have no effect and the existing name will be kept."
),
)

dartel_tissues = option(
"-dt",
"--dartel_tissues",
Expand Down
3 changes: 3 additions & 0 deletions clinica/pipelines/dwi/preprocessing/fmap/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
@cli_param.option_group.advanced_pipeline_options
@cli_param.option.use_cuda
@cli_param.option.initrand
@cli_param.option.caps_name
def cli(
bids_directory: str,
caps_directory: str,
Expand All @@ -32,6 +33,7 @@ def cli(
n_procs: Optional[int] = None,
use_cuda: bool = False,
initrand: bool = False,
caps_name: Optional[str] = None,
) -> None:
"""Preprocessing of raw DWI datasets using a phase difference image.
Expand All @@ -56,6 +58,7 @@ def cli(
base_dir=working_directory,
parameters=parameters,
name=pipeline_name,
caps_name=caps_name,
)

exec_pipeline = (
Expand Down
3 changes: 3 additions & 0 deletions clinica/pipelines/dwi/preprocessing/t1/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
@cli_param.option.initrand
@cli_param.option.delete_cache
@cli_param.option.random_seed
@cli_param.option.caps_name
def cli(
bids_directory: str,
caps_directory: str,
Expand All @@ -36,6 +37,7 @@ def cli(
initrand: bool = False,
delete_cache: bool = False,
random_seed: Optional[int] = None,
caps_name: Optional[str] = None,
) -> None:
"""Preprocessing of raw DWI datasets using a T1w image.
Expand Down Expand Up @@ -63,6 +65,7 @@ def cli(
base_dir=working_directory,
parameters=parameters,
name=pipeline_name,
caps_name=caps_name,
)

exec_pipeline = (
Expand Down
35 changes: 34 additions & 1 deletion clinica/pipelines/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,7 @@ def __init__(
parameters: Optional[dict] = None,
name: Optional[str] = None,
ignore_dependencies: Optional[List[str]] = None,
caps_name: Optional[str] = None,
):
"""Init a Pipeline object.
Expand Down Expand Up @@ -423,6 +424,11 @@ def __init__(
from pathlib import Path
from tempfile import mkdtemp

from clinica.utils.caps import (
build_caps_dataset_description,
write_caps_dataset_description,
)
from clinica.utils.exceptions import ClinicaCAPSError
from clinica.utils.inputs import check_bids_folder, check_caps_folder

self._is_built: bool = False
Expand Down Expand Up @@ -454,18 +460,45 @@ def __init__(
self._name = name or self.__class__.__name__
self._parameters = parameters or {}
self._ignore_dependencies = ignore_dependencies or []
self.caps_name = caps_name

if not self._bids_directory:
if not self._caps_directory:
raise RuntimeError(
f"The {self._name} pipeline does not contain "
"BIDS nor CAPS directory at the initialization."
)
check_caps_folder(self._caps_directory)
try:
check_caps_folder(self._caps_directory)
except ClinicaCAPSError as e:
desc = build_caps_dataset_description(
input_dir=self._caps_directory,
output_dir=self._caps_directory,
processing_name=self._name,
dataset_name=self.caps_name,
)
raise ClinicaCAPSError(
f"{e}\nYou might want to create a 'dataset_description.json' "
f"file with the following content:\n{desc}"
)
self.is_bids_dir = False
else:
check_bids_folder(self._bids_directory)
self.is_bids_dir = True
if self._caps_directory is not None:
if (
not self._caps_directory.exists()
or len([f for f in self._caps_directory.iterdir()]) == 0
):
self._caps_directory.mkdir(parents=True, exist_ok=True)
if self._caps_directory:
write_caps_dataset_description(
input_dir=self.input_dir,
output_dir=self._caps_directory,
processing_name=self._name,
dataset_name=self.caps_name,
)
check_caps_folder(self._caps_directory)
self._compute_subjects_and_sessions()
self._init_nodes()

Expand Down
2 changes: 2 additions & 0 deletions clinica/pipelines/t1_linear/anat_linear_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def __init__(
name: Optional[str] = None,
ignore_dependencies: Optional[List[str]] = None,
use_antspy: bool = False,
caps_name: Optional[str] = None,
):
from clinica.utils.stream import cprint

Expand All @@ -47,6 +48,7 @@ def __init__(
parameters=parameters,
ignore_dependencies=ignore_dependencies,
name=name,
caps_name=caps_name,
)
self.use_antspy = use_antspy
if self.use_antspy:
Expand Down
3 changes: 3 additions & 0 deletions clinica/pipelines/t1_linear/flair_linear_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
is_flag=True,
help="Use ANTsPy instead of ANTs.",
)
@cli_param.option.caps_name
def cli(
bids_directory: str,
caps_directory: str,
Expand All @@ -40,6 +41,7 @@ def cli(
working_directory: Optional[str] = None,
n_procs: Optional[int] = None,
use_antspy: bool = False,
caps_name: Optional[str] = None,
) -> None:
"""Affine registration of Flair images to the MNI standard space.
Expand All @@ -66,6 +68,7 @@ def cli(
parameters=parameters,
name=pipeline_name,
use_antspy=use_antspy,
caps_name=caps_name,
)

exec_pipeline = (
Expand Down
3 changes: 3 additions & 0 deletions clinica/pipelines/t1_linear/t1_linear_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
is_flag=True,
help="Use ANTsPy instead of ANTs.",
)
@cli_param.option.caps_name
def cli(
bids_directory: str,
caps_directory: str,
Expand All @@ -40,6 +41,7 @@ def cli(
working_directory: Optional[str] = None,
n_procs: Optional[int] = None,
use_antspy: bool = False,
caps_name: Optional[str] = None,
) -> None:
"""Affine registration of T1w images to the MNI standard space.
Expand All @@ -66,6 +68,7 @@ def cli(
parameters=parameters,
name=pipeline_name,
use_antspy=use_antspy,
caps_name=caps_name,
)

exec_pipeline = (
Expand Down
6 changes: 6 additions & 0 deletions clinica/pipelines/t1_volume/t1_volume_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
@option.global_option_group
@option.n_procs
@click.pass_context
@cli_param.option.caps_name
def cli(
ctx: click.Context,
bids_directory: str,
Expand All @@ -50,6 +51,7 @@ def cli(
working_directory: Optional[str] = None,
n_procs: Optional[int] = None,
yes: bool = False,
caps_name: Optional[str] = None,
) -> None:
"""Volume-based processing of T1-weighted MR images.
Expand Down Expand Up @@ -103,6 +105,7 @@ def cli(
working_directory=working_directory,
n_procs=n_procs,
yes=yes,
caps_name=caps_name,
)

cprint("Part 2/4: Running t1-volume-create-dartel pipeline.")
Expand All @@ -115,6 +118,7 @@ def cli(
subjects_sessions_tsv=subjects_sessions_tsv,
working_directory=working_directory,
n_procs=n_procs,
caps_name=caps_name,
)

cprint("Part 3/4: Running t1-volume-dartel2mni pipeline.")
Expand All @@ -130,6 +134,7 @@ def cli(
subjects_sessions_tsv=subjects_sessions_tsv,
working_directory=working_directory,
n_procs=n_procs,
caps_name=caps_name,
)

cprint("Part 4/4: Running t1-volume-parcellation pipeline.")
Expand All @@ -140,6 +145,7 @@ def cli(
subjects_sessions_tsv=subjects_sessions_tsv,
working_directory=working_directory,
n_procs=n_procs,
caps_name=caps_name,
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
@option.n_procs
@cli_param.option_group.advanced_pipeline_options
@cli_param.option.dartel_tissues
@cli_param.option.caps_name
def cli(
bids_directory: str,
caps_directory: str,
Expand All @@ -29,6 +30,7 @@ def cli(
subjects_sessions_tsv: Optional[str] = None,
working_directory: Optional[str] = None,
n_procs: Optional[int] = None,
caps_name: Optional[str] = None,
) -> None:
"""Inter-subject registration using Dartel (creating a new Dartel template).
Expand All @@ -51,6 +53,7 @@ def cli(
base_dir=working_directory,
parameters=parameters,
name=pipeline_name,
caps_name=caps_name,
)

exec_pipeline = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
@cli_param.option.tissues
@cli_param.option.modulate
@cli_param.option.voxel_size
@cli_param.option.caps_name
def cli(
bids_directory: str,
caps_directory: str,
Expand All @@ -36,6 +37,7 @@ def cli(
subjects_sessions_tsv: Optional[str] = None,
working_directory: Optional[str] = None,
n_procs: Optional[int] = None,
caps_name: Optional[str] = None,
) -> None:
"""Register DARTEL template to MNI space.
Expand Down Expand Up @@ -64,6 +66,7 @@ def cli(
base_dir=working_directory,
parameters=parameters,
name=pipeline_name,
caps_name=caps_name,
)

exec_pipeline = (
Expand Down
Loading

0 comments on commit 8bd07fc

Please sign in to comment.