kirby-to-bids first implementation

aramis-lab · Sep 18, 2024 · a72bf66 · a72bf66
1 parent 073e68c
commit a72bf66
Show file tree

Hide file tree

Showing 7 changed files with 242 additions and 0 deletions.
diff --git a/clinica/iotools/bids_utils.py b/clinica/iotools/bids_utils.py
@@ -26,6 +26,7 @@ class StudyName(str, Enum):
     OASIS3 = "OASIS3"
     UKB = "UKB"
     IXI = "IXI"
+    KIRBY = "KIRBY"
 
 
 BIDS_VALIDATOR_CONFIG = {
@@ -93,6 +94,8 @@ def bids_id_factory(study: StudyName) -> Type[BIDSSubjectID]:
         return HABSBIDSSubjectID
     if study == StudyName.IXI:
         return IXIBIDSSubjectID
+    if study == StudyName.KIRBY:
+        return KIRBYBIDSSubjectID
 
 
 class ADNIBIDSSubjectID(BIDSSubjectID):
@@ -319,6 +322,30 @@ def from_original_study_id(cls, study_id: str) -> str:
     def to_original_study_id(self) -> str:
         return str(self.replace("sub-", ""))
 
+class KIRBYBIDSSubjectID(BIDSSubjectID):
+    """Implementation for KIRBY of the BIDSSubjectIDClass, allowing to go from the source id KIRBY###
+    to a bids id sub-KKI### and reciprocally."""
+
+    def validate(self, value: str) -> str:
+        if re.fullmatch(r"sub-KKI\d{3}", value):
+            return value
+        raise ValueError(
+            f"BIDS KKI subject ID {value} is not properly formatted. "
+            "Expecting a 'sub-KKI' format."
+        )
+
+    @classmethod
+    def from_original_study_id(cls, study_id: str) -> str:
+        if re.fullmatch(r"KKI\d{3}", study_id):
+            return f"sub-{study_id}"
+        raise ValueError(
+            f"Raw KIRBY subject ID {study_id} is not properly formatted. "
+            "Expecting a 'Y' format."
+        )
+
+    def to_original_study_id(self) -> str:
+        return str(self.replace("sub-", ""))
+
 
 # -- Methods for the clinical data --
 def create_participants_df(

diff --git a/clinica/iotools/converters/cli.py b/clinica/iotools/converters/cli.py
@@ -9,6 +9,7 @@
 from .oasis3_to_bids import oasis3_to_bids_cli
 from .oasis_to_bids import oasis_to_bids_cli
 from .ukb_to_bids import ukb_to_bids_cli
+from .kirby_to_bids import kirby_to_bids_cli
 
 
 @click.group("convert")
@@ -26,6 +27,7 @@ def cli() -> None:
 cli.add_command(ukb_to_bids_cli.cli)
 cli.add_command(genfi_to_bids_cli.cli)
 cli.add_command(ixi_to_bids_cli.cli)
+cli.add_command(kirby_to_bids_cli.cli)
 
 if __name__ == "__main__":
     cli()
diff --git a/clinica/iotools/converters/factory.py b/clinica/iotools/converters/factory.py
@@ -62,4 +62,6 @@ def converter_factory(study: Union[str, StudyName]) -> Callable:
         from .ukb_to_bids import convert
     if study == StudyName.IXI:
         from .ixi_to_bids import convert
+    if study == StudyName.KIRBY:
+        from .kirby_to_bids import convert
     return convert
diff --git a/clinica/iotools/converters/kirby_to_bids/__init__.py b/clinica/iotools/converters/kirby_to_bids/__init__.py
@@ -0,0 +1,3 @@
+from .kirby_to_bids import convert
+
+__all__ = ["convert"]
diff --git a/clinica/iotools/converters/kirby_to_bids/kirby_to_bids.py b/clinica/iotools/converters/kirby_to_bids/kirby_to_bids.py
@@ -0,0 +1,120 @@
+"""Convert KIRBY dataset (https://brain-development.org/ixi-dataset/) to BIDS."""
+
+from pathlib import Path
+from typing import Optional
+
+import nibabel as nb
+import numpy as np
+import csv
+
+from clinica.iotools.bids_utils import write_modality_agnostic_files
+from clinica.iotools.converters.kirby_to_bids.kirby_to_bids_utils import (
+  create_bids_structure,
+  find_nii_files,
+  normalize_dashes,
+  replace_dashes_with_underscore,
+)
+from clinica.utils.filemanip import UserProvidedPath
+
+__all__ = ["convert"]
+
+
+import os
+import pandas as pd
+
+
+def convert(
+    path_to_dataset: UserProvidedPath,
+    bids_dir: UserProvidedPath,
+    path_to_clinical: UserProvidedPath,
+    subjects: Optional[UserProvidedPath] = None,
+    n_procs: Optional[int] = 1,
+    **kwargs,
+):
+    # Initialize participants_data dictionary to store extracted participant info
+    # Load clinical data from the Excel 
+    # Find the clinical data CSV in the specified folder
+    clinical_data_file = None
+    for file in os.listdir(path_to_clinical):
+        if file.endswith('.xlsx'):
+            clinical_data_file = os.path.join(path_to_clinical, file)
+            break
+
+    if not clinical_data_file:
+        raise FileNotFoundError(f"No clinical data xls found in {path_to_clinical}")
+
+    clinical_data = pd.read_excel(clinical_data_file)
+
+    # Filter the clinical data to only include relevant columns: 'MPRAGE', 'Age', 'Sex', 'Fiducial'
+    clinical_data_filtered = clinical_data[['MPRAGE', 'Age', 'Sex', 'Fiducial', 'Subject ID', 'Visit ID']]
+    # Strip any potential whitespace from all values in the MPRAGE column for consistent comparison
+    clinical_data_filtered['MPRAGE'] = clinical_data_filtered['MPRAGE'].apply(replace_dashes_with_underscore).str.strip()
+    clinical_data_filtered.reset_index(drop=True, inplace=True)
+
+    participants_data = {}
+
+    # Get all .nii files (directly or within subfolders)
+    nii_files = find_nii_files(path_to_dataset)
+
+    # Traverse found nii files
+    for file_path in nii_files:
+        file = os.path.basename(file_path).replace('.nii', '').strip()  # Remove file extension and strip whitespaces
+        file_normalized = replace_dashes_with_underscore(file)
+
+        if 'MPRAGE' in file_normalized:
+            # Find the exact match in the clinical data's MPRAGE column
+            clinical_row = clinical_data_filtered[clinical_data_filtered['MPRAGE'] == file_normalized]
+
+            # Check if any matching rows were found
+            if clinical_row.empty:
+                print(f"No matching clinical data found for file: {file_normalized}")
+                continue
+
+            # Extract the first matching row (in case multiple matches are found)
+            clinical_row = clinical_row.iloc[0]
+
+
+            # Extract relevant clinical information
+            subject_id = clinical_row['Subject ID']
+            session_id = clinical_row['Visit ID']
+            age = clinical_row['Age']
+            sex = clinical_row['Sex']
+            handedness = clinical_row['Fiducial']
+
+            # Create BIDS structure and move the file
+            create_bids_structure(subject_id, session_id, file_path, bids_dir)
+
+            # Store participant data (with baseline age if needed)
+            if subject_id not in participants_data:
+                participants_data[subject_id] = {'participant_id': f"sub-KKI{subject_id}",
+                                                'sex': sex,
+                                                'age': age,
+                                                'handedness': handedness}
+
+    # Write participants.csv
+    participants_csv = os.path.join(bids_dir, 'participants.csv')
+    with open(participants_csv, 'w', newline='') as participants_file:
+        participants_writer = csv.writer(participants_file)
+        participants_writer.writerow(['participant_id', 'sex', 'age', 'handedness'])
+
+        for participant_info in participants_data.values():
+            participants_writer.writerow([participant_info['participant_id'],
+                                        participant_info['sex'],
+                                        participant_info['age'],
+                                        participant_info['handedness']])
+
+    # Write sessions.tsv for each subject
+    subject_sessions = clinical_data_filtered.groupby('Subject ID')
+
+    for subject_id, sessions in subject_sessions:
+        sessions_file = os.path.join(bids_dir, f"sub-KKI{subject_id}", 'sessions.tsv')
+        os.makedirs(os.path.dirname(sessions_file), exist_ok=True)
+
+        with open(sessions_file, 'w', newline='') as session_file:
+            session_writer = csv.writer(session_file, delimiter='\t')
+            session_writer.writerow(['session_id', 'age'])
+
+            for _, row in sessions.iterrows():
+                session_writer.writerow([f"ses-{row['Visit ID']}", row['Age']])
+
+    print(f"BIDS conversion completed using the clinical data from {clinical_data_file}.")
diff --git a/clinica/iotools/converters/kirby_to_bids/kirby_to_bids_cli.py b/clinica/iotools/converters/kirby_to_bids/kirby_to_bids_cli.py
@@ -0,0 +1,27 @@
+from os import PathLike
+from typing import Optional
+
+import click
+
+from clinica.iotools.converters import cli_param
+
+
+@click.command(name="kirby-to-bids")
+@cli_param.dataset_directory
+@cli_param.bids_directory
+@cli_param.clinical_data_directory
+@cli_param.subjects_list
+def cli(
+    dataset_directory: PathLike,
+    bids_directory: PathLike,
+    clinical_data_directory: PathLike,
+    subjects_list: Optional[PathLike] = None,
+) -> None:
+    """KIRBY to BIDS converter."""
+    from .kirby_to_bids import convert
+
+    convert(dataset_directory, bids_directory, clinical_data_directory, subjects_list)
+
+
+if __name__ == "__main__":
+    cli()
diff --git a/clinica/iotools/converters/kirby_to_bids/kirby_to_bids_utils.py b/clinica/iotools/converters/kirby_to_bids/kirby_to_bids_utils.py
@@ -0,0 +1,61 @@
+import json
+import re
+import shutil
+from enum import Enum
+from pathlib import Path
+from typing import List, Optional, Union
+import os
+
+import nibabel as nib
+import pandas as pd
+from nilearn.image import concat_imgs
+
+from clinica.iotools.bids_utils import StudyName, bids_id_factory
+from clinica.utils.stream import cprint, log_and_raise
+
+__all__ = [
+    "read_clinical_data",
+    "define_participants",
+    "write_subject_data",
+    "write_sessions",
+    "write_scans",
+    "write_participants",
+    "check_modalities",
+]
+
+
+# Helper function to create BIDS folders and move files
+def create_bids_structure(subject_id, session_id, input_file, output_dir):
+    sub_id = f"sub-KKI{subject_id}"
+    ses_id = f"ses-{session_id}"
+
+    # Create output directory for this subject/session
+    anat_dir = os.path.join(output_dir, sub_id, ses_id, 'anat')
+    os.makedirs(anat_dir, exist_ok=True)
+
+    # Destination filename in BIDS format
+    bids_filename = f"{sub_id}_{ses_id}_T1w.nii.gz"
+
+    # Copy and rename the file to BIDS format
+    shutil.copy(input_file, os.path.join(anat_dir, bids_filename))
+
+# Function to recursively find all files with .nii extension in input directory
+def find_nii_files(directory):
+    nii_files = []
+    for root, _, files in os.walk(directory):
+        for file in files:
+            if file.endswith('.nii') and 'MPRAGE' in file:
+                nii_files.append(os.path.join(root, file))
+    return nii_files
+
+# Function to normalize dashes (replace any type of dash with a standard hyphen '-')
+def normalize_dashes(text):
+    if isinstance(text, str):
+        return re.sub(r'[\u2013\u2014\u2212]', '-', text)  # Replaces en dash, em dash, and other similar symbols
+    return text
+
+# Function to replace dashes with underscores
+def replace_dashes_with_underscore(text):
+    if isinstance(text, str):
+        return text.replace('-', '_')
+    return text
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from .kirby_to_bids import convert

		__all__ = ["convert"]