diff --git a/clinica/iotools/bids_utils.py b/clinica/iotools/bids_utils.py index 848efb33f..449a3e1aa 100644 --- a/clinica/iotools/bids_utils.py +++ b/clinica/iotools/bids_utils.py @@ -2,9 +2,12 @@ import json import os +import re +from abc import ABC, abstractmethod +from collections import UserString from enum import Enum from pathlib import Path -from typing import BinaryIO, List, Optional, Union +from typing import BinaryIO, List, Optional, Type, Union import pandas as pd @@ -49,6 +52,246 @@ class StudyName(str, Enum): } +class BIDSSubjectID(ABC, UserString): + """This is the interface that BIDS subject IDs have to implement.""" + + def __init__(self, value: str): + instance = super().__init__(self.validate(value)) + return instance + + @abstractmethod + def validate(self, value: str) -> str: + raise NotImplementedError + + @classmethod + @abstractmethod + def from_original_study_id(cls, study_id: str) -> str: + raise NotImplementedError + + @abstractmethod + def to_original_study_id(self) -> str: + raise NotImplementedError + + +def bids_id_factory(study: StudyName) -> Type[BIDSSubjectID]: + if study == StudyName.ADNI: + return ADNIBIDSSubjectID + if study == StudyName.NIFD: + return NIFDBIDSSubjectID + if study == StudyName.AIBL: + return AIBLBIDSSubjectID + if study == StudyName.UKB: + return UKBBIDSSubjectID + if study == StudyName.GENFI: + return GENFIBIDSSubjectID + if study == StudyName.OASIS: + return OASISBIDSSubjectID + if study == StudyName.OASIS3: + return OASIS3BIDSSubjectID + if study == StudyName.HABS: + return HABSBIDSSubjectID + + +class ADNIBIDSSubjectID(BIDSSubjectID): + """Implementation for ADNI of the BIDSSubjectIDClass, allowing to go from the source id XXX_S_XXXX + to a bids id sub-ADNIXXXSXXX and reciprocally.""" + + def validate(self, value: str) -> str: + if re.fullmatch(r"sub-ADNI\d{3}S\d{4}", value): + return value + raise ValueError( + f"BIDS ADNI subject ID {value} is not properly formatted. " + "Expecting a 'sub-ADNIXXXSXXXX' format." + ) + + @classmethod + def from_original_study_id(cls, study_id: str) -> str: + if re.fullmatch(r"\d{3}_S_\d{4}", study_id): + return "sub-ADNI" + study_id.replace("_", "") + raise ValueError( + f"Raw ADNI subject ID {study_id} is not properly formatted. " + "Expecting a 'XXX_S_XXXX' format." + ) + + def to_original_study_id(self) -> str: + return "_S_".join(self.split("ADNI")[1].split("S")) + + +class NIFDBIDSSubjectID(BIDSSubjectID): + """Implementation for NIFD of the BIDSSubjectIDClass, allowing to go from the source id X_S_XXXX + to a bids id sub-NIFDXSXXX and reciprocally.""" + + def validate(self, value: str) -> str: + if re.fullmatch(r"sub-NIFD\dS\d{4}", value): + return value + raise ValueError( + f"BIDS NIFD subject ID {value} is not properly formatted. " + "Expecting a 'sub-NIFDXSXXXX' format." + ) + + @classmethod + def from_original_study_id(cls, study_id: str) -> str: + if re.fullmatch(r"\d_S_\d{4}", study_id): + return "sub-NIFD" + study_id.replace("_", "") + raise ValueError( + f"Raw NIFD subject ID {study_id} is not properly formatted. " + "Expecting a 'X_S_XXXX' format." + ) + + def to_original_study_id(self) -> str: + return "_S_".join(self.split("NIFD")[1].split("S")) + + +class AIBLBIDSSubjectID(BIDSSubjectID): + """Implementation for AIBL of the BIDSSubjectIDClass, allowing to go from the source id Y + to a bids id sub-ADNIY and reciprocally.""" + + def validate(self, value: str) -> str: + if re.fullmatch(r"sub-AIBL\d*", value): + return value + raise ValueError( + f"BIDS AIBL subject ID {value} is not properly formatted. " + "Expecting a 'sub-AIBLY' format." + ) + + @classmethod + def from_original_study_id(cls, study_id: str) -> str: + if re.fullmatch(r"\d*", study_id): + return "sub-AIBL" + study_id + raise ValueError( + f"Raw AIBL subject ID {study_id} is not properly formatted. " + "Expecting a 'Y' format where Y is a combination of digits." + ) + + def to_original_study_id(self) -> str: + return self.split("AIBL")[1] + + +class UKBBIDSSubjectID(BIDSSubjectID): + """Implementation for UKB of the BIDSSubjectIDClass, allowing to go from the source id Y + to a bids id sub-ADNIY and reciprocally.""" + + def validate(self, value: str) -> str: + if re.fullmatch(r"sub-UKB\d*", value): + return value + raise ValueError( + f"BIDS UKB subject ID {value} is not properly formatted. " + "Expecting a 'sub-UKBY' format." + ) + + @classmethod + def from_original_study_id(cls, study_id: str) -> str: + if re.fullmatch(r"\d*", study_id): + return "sub-UKB" + study_id + raise ValueError( + f"Raw UKB subject ID {study_id} is not properly formatted. " + "Expecting a 'Y' format where Y is a combination of digits." + ) + + def to_original_study_id(self) -> str: + return self.split("UKB")[1] + + +class GENFIBIDSSubjectID(BIDSSubjectID): + """Implementation for GENFI of the BIDSSubjectIDClass, allowing to go from the source id Y + to a bids id sub-Y and reciprocally.""" + + def validate(self, value: str) -> str: + if re.fullmatch(r"sub-\w*", value): + return value + raise ValueError( + f"BIDS GENFI subject ID {value} is not properly formatted. " + "Expecting a 'sub-Y' format." + ) + + @classmethod + def from_original_study_id(cls, study_id: str) -> str: + if re.fullmatch(r"\w*", study_id): + return "sub-" + study_id + raise ValueError( + f"Raw GENFI subject ID {study_id} is not properly formatted. " + "Expecting a 'Y' format where Y is a combination of letters and digits." + ) + + def to_original_study_id(self) -> str: + return self.split("-")[1] + + +class OASISBIDSSubjectID(BIDSSubjectID): + """Implementation for OASIS1 of the BIDSSubjectIDClass, allowing to go from the source id OAS1_XXXX_MR1/2 + to a bids id sub-OASIS1XXXX and reciprocally.""" + + def validate(self, value: str) -> str: + if re.fullmatch(r"sub-OASIS1\d{4}", value): + return value + raise ValueError( + f"BIDS OASIS1 subject ID {value} is not properly formatted. " + "Expecting a 'sub-OASIS1XXXX' format." + ) + + @classmethod + def from_original_study_id(cls, study_id: str) -> str: + if re.fullmatch(r"OAS1_\d{4}_MR\d", study_id): + return "sub-OASIS1" + study_id.split("_")[1] + raise ValueError( + f"Raw OASIS1 subject ID {study_id} is not properly formatted. " + "Expecting a 'OAS1_XXXX_MR1/2' format." + ) + + def to_original_study_id(self) -> str: + return "OAS1" + self.split("OASIS1")[1] + "MR1" + + +class OASIS3BIDSSubjectID(BIDSSubjectID): + """Implementation for OASIS3 of the BIDSSubjectIDClass, allowing to go from the source id XXXX + to a bids id sub-OAS3XXXX and reciprocally.""" + + def validate(self, value: str) -> str: + if re.fullmatch(r"sub-OAS3\d{4}", value): + return value + raise ValueError( + f"BIDS OASIS3 subject ID {value} is not properly formatted. " + "Expecting a 'sub-OAS3XXXX' format." + ) + + @classmethod + def from_original_study_id(cls, study_id: str) -> str: + if re.fullmatch(r"OAS3\d{4}", study_id): + return "sub-" + study_id + raise ValueError( + f"Raw OASIS3 subject ID {study_id} is not properly formatted. " + "Expecting a 'OAS3XXXX' format." + ) + + def to_original_study_id(self) -> str: + return self.split("-")[1] + + +class HABSBIDSSubjectID(BIDSSubjectID): + """Implementation for HABS of the BIDSSubjectIDClass, allowing to go from the source id P_Y + to a bids id sub-HABSY and reciprocally.""" + + def validate(self, value: str) -> str: + if re.fullmatch(r"sub-HABS\w*", value): + return value + raise ValueError( + f"BIDS HABS subject ID {value} is not properly formatted. " + "Expecting a 'sub-HABSY' format." + ) + + @classmethod + def from_original_study_id(cls, study_id: str) -> str: + if re.fullmatch(r"P_\w*", study_id): + return study_id.replace("P_", "sub-HABS") + raise ValueError( + f"Raw HABS subject ID {study_id} is not properly formatted. " + "Expecting a 'P_Y' format." + ) + + def to_original_study_id(self) -> str: + return str(self.replace("sub-HABS", "P_")) + + # -- Methods for the clinical data -- def create_participants_df( study_name: StudyName, @@ -166,15 +409,10 @@ def create_participants_df( # Adding participant_id column with BIDS ids for i in range(0, len(participant_df)): - if study_name == StudyName.OASIS: - value = (participant_df["alternative_id_1"][i].split("_"))[1] - elif study_name == StudyName.OASIS3: - value = participant_df["alternative_id_1"][i].replace("OAS3", "") - else: - value = remove_space_and_symbols(participant_df["alternative_id_1"][i]) - + value = bids_id_factory(study_name).from_original_study_id( + participant_df["alternative_id_1"][i] + ) bids_id = [s for s in bids_ids if value in s] - if len(bids_id) == 0: index_to_drop.append(i) subjects_to_drop.append(value) @@ -289,11 +527,7 @@ def create_sessions_dict_oasis( if subj_id.dtype == np.int64: subj_id = str(subj_id) # Removes all the - from - subj_id_alpha = remove_space_and_symbols(subj_id) - if study_name == StudyName.OASIS: - subj_id_alpha = str(subj_id[0:3] + "IS" + subj_id[3] + subj_id[5:9]) - if study_name == StudyName.OASIS3: - subj_id_alpha = str(subj_id[0:3] + "IS" + subj_id[3:]) + subj_id_alpha = str(subj_id[0:3] + "IS" + subj_id[3] + subj_id[5:9]) # Extract the corresponding BIDS id and create the output file if doesn't exist subj_bids = [s for s in bids_ids if subj_id_alpha in s] diff --git a/clinica/iotools/converters/adni_to_bids/adni_json.py b/clinica/iotools/converters/adni_to_bids/adni_json.py index a670504f2..bcf90d074 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_json.py +++ b/clinica/iotools/converters/adni_to_bids/adni_json.py @@ -31,12 +31,12 @@ def _bids_id_to_loni(bids_id: str) -> Optional[str]: """Convert a subject id of the form sub-ADNI000S0000 back to original format 000_S_0000 """ - import re + from clinica.iotools.bids_utils import StudyName, bids_id_factory - ids = re.findall(r"\d+", bids_id) - if len(ids) == 2: - return ids[0] + "_S_" + ids[1] - return None + try: + return bids_id_factory(StudyName.ADNI)(bids_id).to_original_study_id() + except ValueError: + return None def _read_xml_files( diff --git a/clinica/iotools/converters/adni_to_bids/adni_to_bids.py b/clinica/iotools/converters/adni_to_bids/adni_to_bids.py index 2010acbee..9241c26c1 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_to_bids.py +++ b/clinica/iotools/converters/adni_to_bids/adni_to_bids.py @@ -235,6 +235,8 @@ def _get_bids_subjects_info( out_path: Path, subjects: Optional[Path] = None, ) -> tuple[list[str], list[Path]]: + from clinica.iotools.bids_utils import StudyName, bids_id_factory + from .adni_utils import load_clinical_csv # Read optional list of participants. @@ -246,7 +248,9 @@ def _get_bids_subjects_info( # Filter participants if requested. participants = sorted(participants & subjects if subjects else participants) # Compute their corresponding BIDS IDs and paths. - bids_ids = [f"sub-ADNI{p.replace('_', '')}" for p in participants] + bids_ids = [ + bids_id_factory(StudyName.ADNI).from_original_study_id(p) for p in participants + ] bids_paths = [out_path / bids_id for bids_id in bids_ids] return bids_ids, bids_paths diff --git a/clinica/iotools/converters/adni_to_bids/adni_utils.py b/clinica/iotools/converters/adni_to_bids/adni_utils.py index 4127bef8c..e42894f36 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_utils.py +++ b/clinica/iotools/converters/adni_to_bids/adni_utils.py @@ -183,8 +183,6 @@ def _write_adni_sessions_tsv( df_subj_sessions: global dataframe containing clinical sessions data for all subjects bids_subjs_paths: a list with the path to all bids subjects """ - import os - from os import path df_subj_sessions["adas_memory"] = ( df_subj_sessions["adas_Q1"] @@ -268,6 +266,7 @@ def _filter_subj_bids( # Depending on the file that needs to be open, identify and # preprocess the column that contains the subjects ids. + # todo : use id class here ? bids_ids = [x[8:] for x in bids_ids if "sub-ADNI" in x] if location == "ADNIMERGE.csv": df_files["RID"] = df_files["PTID"].apply( @@ -529,6 +528,7 @@ def create_adni_scans_files(conversion_path: Path, bids_subjs_paths: list[Path]) """ from os import path + from clinica.iotools.bids_utils import StudyName, bids_id_factory from clinica.utils.stream import cprint scans_fields_bids = ["filename", "scan_id", "mri_field"] @@ -552,7 +552,7 @@ def create_adni_scans_files(conversion_path: Path, bids_subjs_paths: list[Path]) for bids_subject_path in bids_subjs_paths: # Create the file bids_id = bids_subject_path.resolve().name - subject_id = "_S_".join(bids_id[8::].split("S")) + subject_id = bids_id_factory(StudyName.ADNI)(bids_id).to_original_study_id() for session_path in bids_subject_path.glob("ses-*"): viscode = _session_label_to_viscode(session_path.name[4::]) tsv_name = f"{bids_id}_{session_path.name}_scans.tsv" @@ -768,7 +768,7 @@ def _create_file( import numpy as np from clinica.cmdline import setup_clinica_logging - from clinica.iotools.bids_utils import run_dcm2niix + from clinica.iotools.bids_utils import StudyName, bids_id_factory, run_dcm2niix from clinica.iotools.converter_utils import viscode_to_session from clinica.iotools.utils.data_handling import center_nifti_origin from clinica.utils.stream import cprint @@ -805,12 +805,10 @@ def _create_file( # If the original image is a DICOM, check if contains two DICOM inside the same folder if image.Is_Dicom: image_path = _check_two_dcm_folder(image_path, bids_dir, image_id) - bids_subj = subject.replace("_", "") - output_path = ( - bids_dir / f"sub-ADNI{bids_subj}" / session / _get_output_path(modality) - ) + bids_id = bids_id_factory(StudyName.ADNI).from_original_study_id(subject) + output_path = bids_dir / bids_id / session / _get_output_path(modality) output_filename = ( - f"sub-ADNI{bids_subj}_{session}{_get_output_filename(modality, image_tracer)}" + f"{bids_id}_{session}{_get_output_filename(modality, image_tracer)}" ) output_path.mkdir(parents=True, exist_ok=True) diff --git a/clinica/iotools/converters/aibl_to_bids/utils/bids.py b/clinica/iotools/converters/aibl_to_bids/utils/bids.py index 8d02c3255..88cfc4327 100644 --- a/clinica/iotools/converters/aibl_to_bids/utils/bids.py +++ b/clinica/iotools/converters/aibl_to_bids/utils/bids.py @@ -630,12 +630,14 @@ def _create_file( Path or None : Path to file """ - from clinica.iotools.bids_utils import json_from_dcm + from clinica.iotools.bids_utils import StudyName, bids_id_factory, json_from_dcm from clinica.iotools.converter_utils import viscode_to_session from clinica.iotools.utils.data_handling import center_nifti_origin from clinica.utils.stream import cprint - participant_id = f"sub-AIBL{image.Subjects_ID}" + participant_id = bids_id_factory(StudyName.AIBL).from_original_study_id( + image.Subjects_ID + ) session_id = image.Session_ID image_path = image[modality.name_of_path] diff --git a/clinica/iotools/converters/aibl_to_bids/utils/clinical.py b/clinica/iotools/converters/aibl_to_bids/utils/clinical.py index 639be045c..9411b23b1 100644 --- a/clinica/iotools/converters/aibl_to_bids/utils/clinical.py +++ b/clinica/iotools/converters/aibl_to_bids/utils/clinical.py @@ -41,7 +41,7 @@ def create_participants_tsv_file( import numpy as np - from clinica.iotools.bids_utils import StudyName + from clinica.iotools.bids_utils import StudyName, bids_id_factory fields_bids = ["participant_id"] fields_dataset = [] @@ -111,10 +111,9 @@ def create_participants_tsv_file( participant_df[participant_fields_bids[i]] = pd.Series(field_col_values) # Compute BIDS-compatible participant ID. - participant_df["participant_id"] = ( - f"sub-{StudyName.AIBL.value}" + participant_df["alternative_id_1"] + participant_df["participant_id"] = participant_df["alternative_id_1"].apply( + lambda x: bids_id_factory(StudyName.AIBL).from_original_study_id(x) ) - # Keep year-of-birth only. participant_df["date_of_birth"] = participant_df["date_of_birth"].str.extract( r"/(\d{4}).*" @@ -169,7 +168,7 @@ def create_sessions_tsv_file( """ import glob - from clinica.iotools.bids_utils import StudyName + from clinica.iotools.bids_utils import StudyName, bids_id_factory specifications = _load_specifications( clinical_specifications_folder, "sessions.tsv" @@ -251,12 +250,12 @@ def create_sessions_tsv_file( cols = sessions.columns.tolist() sessions = sessions[cols[-1:] + cols[:-1]] - bids_paths = input_path / f"sub-{StudyName.AIBL.value}{rid}" + bids_id = bids_id_factory(StudyName.AIBL).from_original_study_id(str(rid)) + + bids_paths = input_path / bids_id if bids_paths.exists(): sessions.to_csv( - input_path - / f"sub-{StudyName.AIBL.value}{rid}" - / f"sub-{StudyName.AIBL.value}{rid}_sessions.tsv", + input_path / bids_id / f"{bids_id}_sessions.tsv", sep="\t", index=False, encoding="utf8", diff --git a/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_utils.py b/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_utils.py index 88cd197cc..0cab86f30 100644 --- a/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_utils.py +++ b/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_utils.py @@ -455,7 +455,13 @@ def _compute_session_numbers(df: DataFrame) -> DataFrame: def _compute_participant_id(df: DataFrame) -> DataFrame: """Compute the 'participant_id' column from the 'source_id' column.""" - return df.assign(participant_id=lambda x: x.source_id.apply(lambda y: f"sub-{y}")) + from clinica.iotools.bids_utils import StudyName, bids_id_factory + + return df.assign( + participant_id=df.source_id.apply( + lambda x: bids_id_factory(StudyName.GENFI).from_original_study_id(x) + ) + ) def _compute_modality(df: DataFrame) -> DataFrame: diff --git a/clinica/iotools/converters/habs_to_bids/habs_to_bids.py b/clinica/iotools/converters/habs_to_bids/habs_to_bids.py index a39099f34..955369c3e 100644 --- a/clinica/iotools/converters/habs_to_bids/habs_to_bids.py +++ b/clinica/iotools/converters/habs_to_bids/habs_to_bids.py @@ -3,6 +3,7 @@ import pandas as pd +from clinica.iotools.bids_utils import StudyName, bids_id_factory from clinica.utils.filemanip import UserProvidedPath __all__ = ["convert"] @@ -83,11 +84,6 @@ def convert( ) -def _source_participant_id_to_bids(dataframe: pd.DataFrame) -> pd.Series: - # HABS participant format prefixed with `P_` - return dataframe.source_participant_id.str.replace("P_", "sub-HABS", regex=False) - - def _source_session_id_to_bids(dataframe: pd.DataFrame) -> pd.Series: import re @@ -132,7 +128,11 @@ def _read_clinical_data(path: Path, rename_columns: dict[str, str]) -> pd.DataFr pd.read_csv(path) .rename(columns=rename_columns) .assign(date=lambda df: pd.to_datetime(df.date)) - .assign(participant_id=_source_participant_id_to_bids) + .assign( + participant_id=lambda df: df.source_participant_id.apply( + lambda x: bids_id_factory(StudyName.HABS).from_original_study_id(x) + ) + ) .drop(columns="source_participant_id") .assign(session_id=_source_session_id_to_bids) .drop(columns="source_session_id") @@ -177,7 +177,11 @@ def _parse_imaging_data(paths: list[tuple[str, str]]) -> Optional[pd.DataFrame]: return None # Compute BIDS participant ID, session ID and filename. df = ( - df.assign(participant_id=_source_participant_id_to_bids) + df.assign( + participant_id=lambda df: df.source_participant_id.apply( + lambda x: bids_id_factory(StudyName.HABS).from_original_study_id(x) + ) + ) .drop(columns="source_participant_id") .assign(session_id=_source_session_id_to_bids) .drop(columns="source_session_id") diff --git a/clinica/iotools/converters/nifd_to_bids/nifd_utils.py b/clinica/iotools/converters/nifd_to_bids/nifd_utils.py index ba3160f73..3906ef4d9 100644 --- a/clinica/iotools/converters/nifd_to_bids/nifd_utils.py +++ b/clinica/iotools/converters/nifd_to_bids/nifd_utils.py @@ -24,6 +24,8 @@ def _find_clinical_data(clinical_data_directory: Path) -> Optional[pd.DataFrame] def read_clinical_data(clinical_data_directory: Path) -> pd.DataFrame: + from clinica.iotools.bids_utils import StudyName, bids_id_factory + if (dataframe := _find_clinical_data(clinical_data_directory)) is None: raise FileNotFoundError("Clinical data not found") # Compute participant and session IDs. @@ -31,8 +33,12 @@ def read_clinical_data(clinical_data_directory: Path) -> pd.DataFrame: index={"loni_id": "participant_id", "visit_number": "session_id"} ) dataframe.index = dataframe.index.map( - lambda x: (f"sub-NIFD{x[0].replace('_', '')}", f"ses-M{(6 * (x[1] - 1)):03d}") + lambda x: ( + bids_id_factory(StudyName.NIFD).from_original_study_id(x[0]), + f"ses-M{(6 * (x[1] - 1)):03d}", + ) ) + # Keep relevant columns and rename them. dataframe = ( dataframe[["dx", "site", "education", "race", "cdr_box_score", "mmse_tot"]] @@ -167,6 +173,8 @@ def dataset_to_bids( imaging_data: pd.DataFrame, clinical_data: Optional[pd.DataFrame] = None, ) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: + from clinica.iotools.bids_utils import StudyName, bids_id_factory + # Parse preprocessing information from scan descriptions. preprocessing = imaging_data.description.apply(_parse_preprocessing).apply( pd.Series @@ -201,7 +209,7 @@ def dataset_to_bids( # Compute the BIDS-compliant participant, session and scan IDs. scans = scans.assign( participant_id=lambda df: df.subject.apply( - lambda x: f"sub-NIFD{x.replace('_', '')}" + lambda x: bids_id_factory(StudyName.NIFD).from_original_study_id(x) ), session_id=lambda df: df.visit.apply( lambda x: ( diff --git a/clinica/iotools/converters/oasis3_to_bids/oasis3_utils.py b/clinica/iotools/converters/oasis3_to_bids/oasis3_utils.py index fc6f838ca..1340eefcc 100644 --- a/clinica/iotools/converters/oasis3_to_bids/oasis3_utils.py +++ b/clinica/iotools/converters/oasis3_to_bids/oasis3_utils.py @@ -48,6 +48,8 @@ def _get_df_based_on_index_name( def read_imaging_data(imaging_data_directory: Path) -> pd.DataFrame: + from clinica.iotools.bids_utils import StudyName, bids_id_factory + source_path_series = pd.Series( _find_imaging_data(imaging_data_directory), name="source_path" ) @@ -77,7 +79,11 @@ def read_imaging_data(imaging_data_directory: Path) -> pd.DataFrame: .drop_duplicates() .sort_values(by=["source_path"]) ) - df_source = df_source.assign(participant_id=lambda df: "sub-" + df.Subject) + df_source = df_source.assign( + participant_id=lambda df: df.Subject.apply( + lambda x: bids_id_factory(StudyName.OASIS3).from_original_study_id(x) + ) + ) df_source["modality"] = df_source[["modality", "modality_2"]].apply( "_".join, axis=1 ) diff --git a/clinica/iotools/converters/oasis_to_bids/oasis_to_bids.py b/clinica/iotools/converters/oasis_to_bids/oasis_to_bids.py index 86d4765e2..f5d281562 100644 --- a/clinica/iotools/converters/oasis_to_bids/oasis_to_bids.py +++ b/clinica/iotools/converters/oasis_to_bids/oasis_to_bids.py @@ -176,12 +176,14 @@ def _create_modality_agnostic_files(self, bids_dir: Path): @staticmethod def convert_single_subject(subj_folder: Path, dest_dir: Path): + from clinica.iotools.bids_utils import StudyName, bids_id_factory from clinica.utils.stream import cprint t1_folder = subj_folder / "PROCESSED" / "MPRAGE" / "SUBJ_111" cprint(f"Converting {subj_folder.name}", lvl="info") - numerical_id = (subj_folder.name.split("_"))[1] - participant_id = f"sub-OASIS1{numerical_id}" + participant_id = bids_id_factory(StudyName.OASIS).from_original_study_id( + subj_folder.name + ) bids_subj_folder = dest_dir / participant_id if not bids_subj_folder.is_dir(): bids_subj_folder.mkdir(parents=True) diff --git a/clinica/iotools/converters/ukb_to_bids/ukb_utils.py b/clinica/iotools/converters/ukb_to_bids/ukb_utils.py index ca11f7809..a942dbcb0 100644 --- a/clinica/iotools/converters/ukb_to_bids/ukb_utils.py +++ b/clinica/iotools/converters/ukb_to_bids/ukb_utils.py @@ -145,9 +145,12 @@ def merge_imaging_and_clinical_data( def _complete_clinical(df_clinical: pd.DataFrame) -> pd.DataFrame: """This function uses the existing data to create the columns needed for the bids hierarchy (subject_id, ses, age_at _sessions, etc.)""" + from clinica.iotools.bids_utils import StudyName, bids_id_factory df_clinical = df_clinical.assign( - participant_id=lambda df: ("sub-UKB" + df.source_id.astype("str")) + participant_id=lambda df: df.source_id.astype("str").apply( + lambda x: bids_id_factory(StudyName.UKB).from_original_study_id(x) + ) ) df_clinical = df_clinical.assign( sessions=lambda df: "ses-" + df.source_sessions_number.astype("str") diff --git a/test/unittests/iotools/converters/adni_to_bids/test_adni_json.py b/test/unittests/iotools/converters/adni_to_bids/test_adni_json.py index 15a0778c0..4448830bc 100644 --- a/test/unittests/iotools/converters/adni_to_bids/test_adni_json.py +++ b/test/unittests/iotools/converters/adni_to_bids/test_adni_json.py @@ -15,16 +15,16 @@ [ ("sub-ADNI000S0000", "000_S_0000"), ("sub-ADNI123S4567", "123_S_4567"), - ("sub-ADNI12S4567", "12_S_4567"), - ("sub-ADNI123X4567", "123_S_4567"), - ("sub-ADNI123XYZ4567", "123_S_4567"), - ("sub-ADNI123XYZ_TT4567", "123_S_4567"), + ("sub-ADNI12S4567", None), + ("sub-ADNI123X4567", None), + ("sub-ADNI123XYZ4567", None), + ("sub-ADNI123XYZ_TT4567", None), ("sub-ADNI123XYZ12TT4567", None), ("", None), ("foo", None), ("12", None), - ("123_S_4567", "123_S_4567"), - ("1_XY_22", "1_S_22"), + ("123_S_4567", None), + ("1_XY_22", None), ], ) def test_bids_id_to_loni(input_value, expected): diff --git a/test/unittests/iotools/test_bids_utils.py b/test/unittests/iotools/test_bids_utils.py index 406f44232..04a8628ff 100644 --- a/test/unittests/iotools/test_bids_utils.py +++ b/test/unittests/iotools/test_bids_utils.py @@ -41,6 +41,25 @@ ) +@pytest.mark.parametrize( + "study,study_id,expected", + [ + (StudyName.ADNI, "001_S_0001", "sub-ADNI001S0001"), + (StudyName.NIFD, "1_S_0001", "sub-NIFD1S0001"), + (StudyName.AIBL, "10", "sub-AIBL10"), + (StudyName.UKB, "0101001", "sub-UKB0101001"), + (StudyName.GENFI, "MAPT009", "sub-MAPT009"), + (StudyName.OASIS3, "OAS30001", "sub-OAS30001"), + (StudyName.HABS, "P_INIBUB", "sub-HABSINIBUB"), + (StudyName.OASIS, "OAS1_0001_MR1", "sub-OASIS10001"), + ], +) +def test_study_to_bids_id_passing(study, study_id, expected): + from clinica.iotools.bids_utils import bids_id_factory + + assert bids_id_factory(study).from_original_study_id(study_id) == expected + + def create_clinical_data(tmp_path: Path, study_name: StudyName) -> Path: spec_df = pd.DataFrame( {