From 95bcd3ce86898a2e34c1f7ff9798153416289b1c Mon Sep 17 00:00:00 2001 From: AliceJoubert <158147135+AliceJoubert@users.noreply.github.com> Date: Wed, 2 Oct 2024 14:42:34 +0200 Subject: [PATCH] [ENH] Allow the T1-Linear pipeline to go over (subject, session) couples that do not have data (#1285) * First pass on issue * Make it work * Add todos for more complex refactoring * Testing wip * Proposition 2 * Touch clinica_list_of_files_reader * Change to warning * Changes upon suggestions * add todos * Changes upon suggestions * WIP generalization * WIP 2 * Fix clinica_list_of_files_reader * Generalization/Replacement for clinica_file_reader * Add forgotten condition * Changes upon suggestions #1 --- .../anatomical/freesurfer/atlas/pipeline.py | 10 +- .../longitudinal/correction/pipeline.py | 38 ++-- .../longitudinal/template/pipeline.py | 22 +- .../anatomical/freesurfer/t1/pipeline.py | 15 +- .../dwi/preprocessing/fmap/pipeline.py | 2 +- .../dwi/preprocessing/t1/pipeline.py | 2 +- .../spatial_svm_pipeline.py | 25 +- clinica/pipelines/pet/linear/pipeline.py | 60 +++-- clinica/pipelines/pet/volume/pipeline.py | 107 ++++----- .../pet_surface/pet_surface_pipeline.py | 214 ++++++------------ .../pipelines/statistics_surface/pipeline.py | 29 +-- .../statistics_volume_pipeline.py | 18 +- .../t1_linear/anat_linear_pipeline.py | 25 +- .../t1_volume_create_dartel_pipeline.py | 42 ++-- .../t1_volume_dartel2mni_pipeline.py | 57 ++--- .../t1_volume_parcellation_pipeline.py | 27 +-- .../t1_volume_register_dartel_pipeline.py | 34 +-- .../t1_volume_tissue_segmentation_pipeline.py | 14 +- clinica/utils/inputs.py | 182 ++++++++------- test/unittests/utils/test_utils_inputs.py | 185 ++++++++------- 20 files changed, 498 insertions(+), 610 deletions(-) diff --git a/clinica/pipelines/anatomical/freesurfer/atlas/pipeline.py b/clinica/pipelines/anatomical/freesurfer/atlas/pipeline.py index ea172c5e8..18c236765 100644 --- a/clinica/pipelines/anatomical/freesurfer/atlas/pipeline.py +++ b/clinica/pipelines/anatomical/freesurfer/atlas/pipeline.py @@ -84,10 +84,16 @@ def get_to_process_with_atlases( - set(t1_freesurfer_longitudinal_output_atlas) ) t1_freesurfer_output, _ = clinica_file_reader( - subjects, sessions, caps_directory, T1_FS_DESTRIEUX, False + subjects, + sessions, + caps_directory, + T1_FS_DESTRIEUX, ) t1_freesurfer_files, _ = clinica_file_reader( - subjects, sessions, caps_directory, atlas_info, False + subjects, + sessions, + caps_directory, + atlas_info, ) image_ids = extract_image_ids(t1_freesurfer_files) image_ids_2 = extract_image_ids(t1_freesurfer_output) diff --git a/clinica/pipelines/anatomical/freesurfer/longitudinal/correction/pipeline.py b/clinica/pipelines/anatomical/freesurfer/longitudinal/correction/pipeline.py index 6d157e7cf..3fa7114da 100644 --- a/clinica/pipelines/anatomical/freesurfer/longitudinal/correction/pipeline.py +++ b/clinica/pipelines/anatomical/freesurfer/longitudinal/correction/pipeline.py @@ -60,7 +60,10 @@ def _build_input_node(self): ) from clinica.utils.exceptions import ClinicaException from clinica.utils.input_files import T1_FS_DESTRIEUX, T1_FS_T_DESTRIEUX - from clinica.utils.inputs import clinica_file_reader + from clinica.utils.inputs import ( + clinica_file_reader, + format_clinica_file_reader_errors, + ) from clinica.utils.stream import cprint from .utils import ( @@ -117,26 +120,19 @@ def _build_input_node(self): to_process_ids ) - all_errors = [] - try: - # Check that t1-freesurfer has run on the CAPS directory - clinica_file_reader( - self.subjects, self.sessions, self.caps_directory, T1_FS_DESTRIEUX - ) - except ClinicaException as e: - all_errors.append(e) - try: - # Check that t1-freesurfer-template has run on the CAPS directory - clinica_file_reader( - self.subjects, list_long_id, self.caps_directory, T1_FS_T_DESTRIEUX - ) - except ClinicaException as e: - all_errors.append(e) - if len(all_errors) > 0: - error_message = "Clinica faced errors while trying to read files in your CAPS directory.\n" - for msg in all_errors: - error_message += str(msg) - raise ClinicaException(error_message) + _, errors_destrieux = clinica_file_reader( + self.subjects, self.sessions, self.caps_directory, T1_FS_DESTRIEUX + ) + _, errors_t_destrieux = clinica_file_reader( + self.subjects, list_long_id, self.caps_directory, T1_FS_T_DESTRIEUX + ) + all_errors = [errors_destrieux, errors_t_destrieux] + + if any(all_errors): + message = "Clinica faced errors while trying to read files in your CAPS directory.\n" + for error, info in zip(all_errors, [T1_FS_DESTRIEUX, T1_FS_T_DESTRIEUX]): + message += format_clinica_file_reader_errors(error, info) + raise ClinicaException(message) save_part_sess_long_ids_to_tsv( self.subjects, self.sessions, list_long_id, self.base_dir / self.name diff --git a/clinica/pipelines/anatomical/freesurfer/longitudinal/template/pipeline.py b/clinica/pipelines/anatomical/freesurfer/longitudinal/template/pipeline.py index e63664cb4..a1d5b34e8 100644 --- a/clinica/pipelines/anatomical/freesurfer/longitudinal/template/pipeline.py +++ b/clinica/pipelines/anatomical/freesurfer/longitudinal/template/pipeline.py @@ -32,11 +32,7 @@ def get_processed_images( image_ids: List[str] = [] if caps_directory.is_dir(): t1_freesurfer_files, _ = clinica_file_reader( - list_participant_id, - list_long_id, - caps_directory, - T1_FS_T_DESTRIEUX, - False, + list_participant_id, list_long_id, caps_directory, T1_FS_T_DESTRIEUX ) image_ids = [ re.search(r"(sub-[a-zA-Z0-9]+)_(long-[a-zA-Z0-9]+)", file).group() @@ -95,7 +91,7 @@ def _build_input_node(self): from clinica.utils.exceptions import ClinicaCAPSError, ClinicaException from clinica.utils.filemanip import extract_subjects_sessions_from_filename from clinica.utils.input_files import T1_FS_DESTRIEUX - from clinica.utils.inputs import clinica_file_reader + from clinica.utils.inputs import clinica_file_filter from clinica.utils.longitudinal import ( get_long_id, get_participants_long_id, @@ -153,16 +149,10 @@ def _build_input_node(self): self.subjects, self.sessions = extract_subjects_sessions_from_filename( to_process_ids ) - try: - clinica_file_reader( - self.subjects, self.sessions, self.caps_directory, T1_FS_DESTRIEUX - ) - except ClinicaException as e: - err_msg = ( - "Clinica faced error(s) while trying to read files in your CAPS directory.\n" - + str(e) - ) - raise ClinicaCAPSError(err_msg) + + _, self.subjects, self.sessions = clinica_file_filter( + self.subjects, self.sessions, self.caps_directory, T1_FS_DESTRIEUX + ) long_ids = get_participants_long_id(self.subjects, self.sessions) save_part_sess_long_ids_to_tsv( diff --git a/clinica/pipelines/anatomical/freesurfer/t1/pipeline.py b/clinica/pipelines/anatomical/freesurfer/t1/pipeline.py index a9f08aebb..f69d529d1 100644 --- a/clinica/pipelines/anatomical/freesurfer/t1/pipeline.py +++ b/clinica/pipelines/anatomical/freesurfer/t1/pipeline.py @@ -27,7 +27,7 @@ def get_processed_images( image_ids: List[str] = [] if caps_directory.is_dir(): t1_freesurfer_files, _ = clinica_file_reader( - subjects, sessions, caps_directory, T1_FS_DESTRIEUX, False + subjects, sessions, caps_directory, T1_FS_DESTRIEUX ) image_ids = extract_image_ids(t1_freesurfer_files) return image_ids @@ -97,7 +97,7 @@ def _build_input_node(self): save_participants_sessions, ) from clinica.utils.input_files import T1W_NII - from clinica.utils.inputs import clinica_file_reader + from clinica.utils.inputs import clinica_file_filter from clinica.utils.stream import cprint from clinica.utils.ux import print_images_to_process @@ -130,17 +130,10 @@ def _build_input_node(self): to_process_ids ) - t1w_files, error_message = clinica_file_reader( - self.subjects, - self.sessions, - self.bids_directory, - T1W_NII, - raise_exception=False, + t1w_files, self.subjects, self.sessions = clinica_file_filter( + self.subjects, self.sessions, self.bids_directory, T1W_NII ) - if error_message: - cprint(error_message, lvl="warning") - if not t1w_files: raise ClinicaException("Empty dataset or already processed") diff --git a/clinica/pipelines/dwi/preprocessing/fmap/pipeline.py b/clinica/pipelines/dwi/preprocessing/fmap/pipeline.py index adbf9bfae..b40fb2d52 100644 --- a/clinica/pipelines/dwi/preprocessing/fmap/pipeline.py +++ b/clinica/pipelines/dwi/preprocessing/fmap/pipeline.py @@ -36,7 +36,7 @@ def get_processed_images( image_ids: List[str] = [] if caps_directory.is_dir(): preproc_files, _ = clinica_file_reader( - subjects, sessions, caps_directory, DWI_PREPROC_NII, False + subjects, sessions, caps_directory, DWI_PREPROC_NII ) image_ids = extract_image_ids(preproc_files) return image_ids diff --git a/clinica/pipelines/dwi/preprocessing/t1/pipeline.py b/clinica/pipelines/dwi/preprocessing/t1/pipeline.py index b1e632f7d..0882cf714 100644 --- a/clinica/pipelines/dwi/preprocessing/t1/pipeline.py +++ b/clinica/pipelines/dwi/preprocessing/t1/pipeline.py @@ -40,7 +40,7 @@ def get_processed_images( image_ids: List[str] = [] if caps_directory.is_dir(): preproc_files, _ = clinica_file_reader( - subjects, sessions, caps_directory, DWI_PREPROC_NII, False + subjects, sessions, caps_directory, DWI_PREPROC_NII ) image_ids = extract_image_ids(preproc_files) return image_ids diff --git a/clinica/pipelines/machine_learning_spatial_svm/spatial_svm_pipeline.py b/clinica/pipelines/machine_learning_spatial_svm/spatial_svm_pipeline.py index bf8a8dd18..546dd3c2f 100644 --- a/clinica/pipelines/machine_learning_spatial_svm/spatial_svm_pipeline.py +++ b/clinica/pipelines/machine_learning_spatial_svm/spatial_svm_pipeline.py @@ -63,7 +63,11 @@ def _build_input_node(self): pet_volume_normalized_suvr_pet, t1_volume_final_group_template, ) - from clinica.utils.inputs import clinica_file_reader, clinica_group_reader + from clinica.utils.inputs import ( + clinica_file_reader, + clinica_group_reader, + format_clinica_file_reader_errors, + ) from clinica.utils.ux import print_groups_in_caps_directory if not ( @@ -118,15 +122,16 @@ def _build_input_node(self): f"Image type {self.parameters['orig_input_data_ml']} unknown." ) - try: - input_image, _ = clinica_file_reader( - self.subjects, - self.sessions, - self.caps_directory, - caps_files_information, + input_image, caps_error = clinica_file_reader( + self.subjects, + self.sessions, + self.caps_directory, + caps_files_information, + ) + if caps_error: + all_errors.append( + format_clinica_file_reader_errors(caps_error, caps_files_information) ) - except ClinicaException as e: - all_errors.append(e) try: dartel_input = clinica_group_reader( @@ -137,7 +142,7 @@ def _build_input_node(self): all_errors.append(e) # Raise all errors if some happened - if len(all_errors) > 0: + if any(all_errors): error_message = "Clinica faced errors while trying to read files in your CAPS directories.\n" for msg in all_errors: error_message += str(msg) diff --git a/clinica/pipelines/pet/linear/pipeline.py b/clinica/pipelines/pet/linear/pipeline.py index 9e877c5a8..ea9decc38 100644 --- a/clinica/pipelines/pet/linear/pipeline.py +++ b/clinica/pipelines/pet/linear/pipeline.py @@ -67,7 +67,10 @@ def _build_input_node(self): ) from clinica.utils.image import get_mni_template from clinica.utils.input_files import T1W_NII, T1W_TO_MNI_TRANSFORM - from clinica.utils.inputs import clinica_file_reader + from clinica.utils.inputs import ( + clinica_file_reader, + format_clinica_file_reader_errors, + ) from clinica.utils.stream import cprint from clinica.utils.ux import print_images_to_process @@ -75,44 +78,39 @@ def _build_input_node(self): self.ref_mask = get_suvr_mask(self.parameters["suvr_reference_region"]) # Inputs from BIDS directory - try: - pet_files, _ = clinica_file_reader( - self.subjects, - self.sessions, - self.bids_directory, - self._get_pet_scans_query(), - ) - except ClinicaException as e: - err = ( - "Clinica faced error(s) while trying to read pet files in your BIDS directory.\n" - + str(e) + pet_files, pet_errors = clinica_file_reader( + self.subjects, + self.sessions, + self.bids_directory, + self._get_pet_scans_query(), + ) + if pet_errors: + raise ClinicaBIDSError( + format_clinica_file_reader_errors( + pet_errors, self._get_pet_scans_query() + ) ) - raise ClinicaBIDSError(err) # T1w file: - try: - t1w_files, _ = clinica_file_reader( - self.subjects, self.sessions, self.bids_directory, T1W_NII - ) - except ClinicaException as e: - err = ( - "Clinica faced error(s) while trying to read t1w files in your BIDS directory.\n" - + str(e) + t1w_files, t1w_errors = clinica_file_reader( + self.subjects, self.sessions, self.bids_directory, T1W_NII + ) + if t1w_errors: + raise ClinicaBIDSError( + format_clinica_file_reader_errors(t1w_errors, T1W_NII) ) - raise ClinicaBIDSError(err) # Inputs from t1-linear pipeline # Transformation files from T1w files to MNI: - try: - t1w_to_mni_transformation_files, _ = clinica_file_reader( - self.subjects, self.sessions, self.caps_directory, T1W_TO_MNI_TRANSFORM - ) - except ClinicaException as e: - err = ( - "Clinica faced error(s) while trying to read transformation files in your CAPS directory.\n" - + str(e) + t1w_to_mni_transformation_files, t1w_to_mni_errors = clinica_file_reader( + self.subjects, self.sessions, self.caps_directory, T1W_TO_MNI_TRANSFORM + ) + if t1w_to_mni_errors: + raise ClinicaCAPSError( + format_clinica_file_reader_errors( + t1w_to_mni_errors, T1W_TO_MNI_TRANSFORM + ) ) - raise ClinicaCAPSError(err) if len(self.subjects): print_images_to_process(self.subjects, self.sessions) diff --git a/clinica/pipelines/pet/volume/pipeline.py b/clinica/pipelines/pet/volume/pipeline.py index f22360b16..76e7d7bbe 100644 --- a/clinica/pipelines/pet/volume/pipeline.py +++ b/clinica/pipelines/pet/volume/pipeline.py @@ -97,7 +97,12 @@ def _build_input_node(self): t1_volume_native_tpm, t1_volume_native_tpm_in_mni, ) - from clinica.utils.inputs import clinica_file_reader, clinica_group_reader + from clinica.utils.inputs import ( + clinica_file_reader, + clinica_group_reader, + clinica_list_of_files_reader, + format_clinica_file_reader_errors, + ) from clinica.utils.stream import cprint from clinica.utils.ux import ( print_groups_in_caps_directory, @@ -124,55 +129,56 @@ def _build_input_node(self): ) # PET from BIDS directory + # Native T1w-MRI + try: - pet_bids, _ = clinica_file_reader( + pet_bids, t1w_bids = clinica_list_of_files_reader( self.subjects, self.sessions, self.bids_directory, - self._get_pet_scans_query(), + [ + self._get_pet_scans_query(), + T1W_NII, + ], ) except ClinicaException as e: - all_errors.append(e) - - # Native T1w-MRI - try: - t1w_bids, _ = clinica_file_reader( - self.subjects, self.sessions, self.bids_directory, T1W_NII - ) - except ClinicaException as e: - all_errors.append(e) + all_errors += e # mask_tissues - tissues_input = [] - for tissue_number in self.parameters["mask_tissues"]: - try: - current_file, _ = clinica_file_reader( - self.subjects, - self.sessions, - self.caps_directory, - t1_volume_native_tpm_in_mni(tissue_number, False), - ) - tissues_input.append(current_file) - except ClinicaException as e: - all_errors.append(e) - # Tissues_input has a length of len(self.parameters['mask_tissues']). Each of these elements has a size of - # len(self.subjects). We want the opposite: a list of size len(self.subjects) whose elements have a size of - # len(self.parameters['mask_tissues']. The trick is to iter on elements with zip(*my_list) - tissues_input_final = [] - for subject_tissue_list in zip(*tissues_input): - tissues_input_final.append(subject_tissue_list) - tissues_input = tissues_input_final - - # Flowfields try: - flowfields_caps, _ = clinica_file_reader( + tissues_input = clinica_list_of_files_reader( self.subjects, self.sessions, self.caps_directory, - t1_volume_deformation_to_template(self.parameters["group_label"]), + [ + t1_volume_native_tpm_in_mni(tissue_number, False) + for tissue_number in self.parameters["mask_tissues"] + ], ) + # Tissues_input has a length of len(self.parameters['mask_tissues']). Each of these elements has a size of + # len(self.subjects). We want the opposite: a list of size len(self.subjects) whose elements have a size of + # len(self.parameters['mask_tissues']. The trick is to iter on elements with zip(*my_list) + tissues_input_final = [] + for subject_tissue_list in zip(*tissues_input): + tissues_input_final.append(subject_tissue_list) + tissues_input = tissues_input_final except ClinicaException as e: - all_errors.append(e) + all_errors += e + + # Flowfields + flowfields_caps, flowfields_errors = clinica_file_reader( + self.subjects, + self.sessions, + self.caps_directory, + t1_volume_deformation_to_template(self.parameters["group_label"]), + ) + if flowfields_errors: + all_errors.append( + format_clinica_file_reader_errors( + flowfields_errors, + t1_volume_deformation_to_template(self.parameters["group_label"]), + ) + ) # Dartel Template try: @@ -197,28 +203,27 @@ def _build_input_node(self): if self.parameters["apply_pvc"]: # pvc tissues input - pvc_tissues_input = [] - for tissue_number in self.parameters["pvc_mask_tissues"]: - try: - current_file, _ = clinica_file_reader( - self.subjects, - self.sessions, - self.caps_directory, - t1_volume_native_tpm(tissue_number), - ) - pvc_tissues_input.append(current_file) - except ClinicaException as e: - all_errors.append(e) - - if len(all_errors) == 0: + try: + pvc_tissues_input = clinica_list_of_files_reader( + self.subjects, + self.sessions, + self.caps_directory, + [ + t1_volume_native_tpm(tissue_number) + for tissue_number in self.parameters["pvc_mask_tissues"] + ], + ) pvc_tissues_input_final = [] for subject_tissue_list in zip(*pvc_tissues_input): pvc_tissues_input_final.append(subject_tissue_list) pvc_tissues_input = pvc_tissues_input_final + + except ClinicaException as e: + all_errors.append(e) else: pvc_tissues_input = [] - if len(all_errors) > 0: + if any(all_errors): error_message = "Clinica faced error(s) while trying to read files in your CAPS/BIDS directories.\n" for msg in all_errors: error_message += str(msg) diff --git a/clinica/pipelines/pet_surface/pet_surface_pipeline.py b/clinica/pipelines/pet_surface/pet_surface_pipeline.py index 71e9292b0..54046a037 100644 --- a/clinica/pipelines/pet_surface/pet_surface_pipeline.py +++ b/clinica/pipelines/pet_surface/pet_surface_pipeline.py @@ -77,7 +77,11 @@ def _build_input_node_longitudinal(self): check_relative_volume_location_in_world_coordinate_system, ) from clinica.utils.exceptions import ClinicaException - from clinica.utils.inputs import clinica_file_reader + from clinica.utils.inputs import ( + clinica_file_reader, + clinica_list_of_files_reader, + format_clinica_file_reader_errors, + ) read_parameters_node = npe.Node( name="LoadingCLIArguments", @@ -88,93 +92,44 @@ def _build_input_node_longitudinal(self): ) all_errors = [] - try: - read_parameters_node.inputs.pet, _ = clinica_file_reader( - self.subjects, - self.sessions, - self.bids_directory, - self._get_pet_scans_query(), - ) - except ClinicaException as e: - all_errors.append(e) - - try: - read_parameters_node.inputs.orig_nu, _ = clinica_file_reader( - self.subjects, - self.sessions, - self.caps_directory, - input_files.T1_FS_LONG_ORIG_NU, - ) - - except ClinicaException as e: - all_errors.append(e) - - try: - read_parameters_node.inputs.white_surface_right, _ = clinica_file_reader( - self.subjects, - self.sessions, - self.caps_directory, - input_files.T1_FS_LONG_SURF_R, - ) - except ClinicaException as e: - all_errors.append(e) - - try: - read_parameters_node.inputs.white_surface_left, _ = clinica_file_reader( - self.subjects, - self.sessions, - self.caps_directory, - input_files.T1_FS_LONG_SURF_L, - ) - except ClinicaException as e: - all_errors.append(e) - - try: - read_parameters_node.inputs.destrieux_left, _ = clinica_file_reader( - self.subjects, - self.sessions, - self.caps_directory, - input_files.T1_FS_LONG_DESTRIEUX_PARC_L, - ) - - except ClinicaException as e: - all_errors.append(e) - - try: - read_parameters_node.inputs.destrieux_right, _ = clinica_file_reader( - self.subjects, - self.sessions, - self.caps_directory, - input_files.T1_FS_LONG_DESTRIEUX_PARC_R, - ) - - except ClinicaException as e: - all_errors.append(e) - - try: - read_parameters_node.inputs.desikan_left, _ = clinica_file_reader( - self.subjects, - self.sessions, - self.caps_directory, - input_files.T1_FS_LONG_DESIKAN_PARC_L, - ) - - except ClinicaException as e: - all_errors.append(e) + read_parameters_node.inputs.pet, pet_errors = clinica_file_reader( + self.subjects, + self.sessions, + self.bids_directory, + self._get_pet_scans_query(), + ) + if pet_errors: + all_errors.append(format_clinica_file_reader_errors(pet_errors)) try: - read_parameters_node.inputs.desikan_right, _ = clinica_file_reader( + ( + read_parameters_node.inputs.orig_nu, + read_parameters_node.inputs.white_surface_right, + read_parameters_node.inputs.white_surface_left, + read_parameters_node.inputs.destrieux_left, + read_parameters_node.inputs.destrieux_right, + read_parameters_node.inputs.desikan_left, + read_parameters_node.inputs.desikan_right, + ) = clinica_list_of_files_reader( self.subjects, self.sessions, self.caps_directory, - input_files.T1_FS_LONG_DESIKAN_PARC_R, + [ + input_files.T1_FS_LONG_ORIG_NU, + input_files.T1_FS_LONG_SURF_R, + input_files.T1_FS_LONG_SURF_L, + input_files.T1_FS_LONG_DESTRIEUX_PARC_L, + input_files.T1_FS_LONG_DESTRIEUX_PARC_R, + input_files.T1_FS_LONG_DESIKAN_PARC_L, + input_files.T1_FS_LONG_DESIKAN_PARC_R, + ], ) except ClinicaException as e: all_errors.append(e) - if len(all_errors) > 0: + if any(all_errors): error_message = "Clinica faced errors while trying to read files in your BIDS or CAPS directories.\n" for msg in all_errors: error_message += str(msg) @@ -214,7 +169,11 @@ def _build_input_node_cross_sectional(self): check_relative_volume_location_in_world_coordinate_system, ) from clinica.utils.exceptions import ClinicaException - from clinica.utils.inputs import clinica_file_reader + from clinica.utils.inputs import ( + clinica_file_reader, + clinica_list_of_files_reader, + format_clinica_file_reader_errors, + ) read_parameters_node = npe.Node( name="LoadingCLIArguments", @@ -225,87 +184,42 @@ def _build_input_node_cross_sectional(self): ) all_errors = [] - try: - read_parameters_node.inputs.pet, _ = clinica_file_reader( - self.subjects, - self.sessions, - self.bids_directory, - self._get_pet_scans_query(), - ) - except ClinicaException as e: - all_errors.append(e) - - try: - read_parameters_node.inputs.orig_nu, _ = clinica_file_reader( - self.subjects, - self.sessions, - self.caps_directory, - input_files.T1_FS_ORIG_NU, - ) - except ClinicaException as e: - all_errors.append(e) - - try: - read_parameters_node.inputs.white_surface_right, _ = clinica_file_reader( - self.subjects, - self.sessions, - self.caps_directory, - input_files.T1_FS_WM_SURF_R, - ) - except ClinicaException as e: - all_errors.append(e) - - try: - read_parameters_node.inputs.white_surface_left, _ = clinica_file_reader( - self.subjects, - self.sessions, - self.caps_directory, - input_files.T1_FS_WM_SURF_L, - ) - except ClinicaException as e: - all_errors.append(e) - - try: - read_parameters_node.inputs.destrieux_left, _ = clinica_file_reader( - self.subjects, - self.sessions, - self.caps_directory, - input_files.T1_FS_DESTRIEUX_PARC_L, - ) - except ClinicaException as e: - all_errors.append(e) - - try: - read_parameters_node.inputs.destrieux_right, _ = clinica_file_reader( - self.subjects, - self.sessions, - self.caps_directory, - input_files.T1_FS_DESTRIEUX_PARC_R, - ) - except ClinicaException as e: - all_errors.append(e) - - try: - read_parameters_node.inputs.desikan_left, _ = clinica_file_reader( - self.subjects, - self.sessions, - self.caps_directory, - input_files.T1_FS_DESIKAN_PARC_L, - ) - except ClinicaException as e: - all_errors.append(e) + read_parameters_node.inputs.pet, pet_errors = clinica_file_reader( + self.subjects, + self.sessions, + self.bids_directory, + self._get_pet_scans_query(), + ) + if pet_errors: + all_errors.append(format_clinica_file_reader_errors(pet_errors)) try: - read_parameters_node.inputs.desikan_right, _ = clinica_file_reader( + ( + read_parameters_node.inputs.orig_nu, + read_parameters_node.inputs.white_surface_right, + read_parameters_node.inputs.white_surface_left, + read_parameters_node.inputs.destrieux_left, + read_parameters_node.inputs.destrieux_right, + read_parameters_node.inputs.desikan_left, + read_parameters_node.inputs.desikan_right, + ) = clinica_list_of_files_reader( self.subjects, self.sessions, self.caps_directory, - input_files.T1_FS_DESIKAN_PARC_R, + [ + input_files.T1_FS_ORIG_NU, + input_files.T1_FS_WM_SURF_R, + input_files.T1_FS_WM_SURF_L, + input_files.T1_FS_DESTRIEUX_PARC_L, + input_files.T1_FS_DESTRIEUX_PARC_R, + input_files.T1_FS_DESIKAN_PARC_L, + input_files.T1_FS_DESIKAN_PARC_R, + ], ) except ClinicaException as e: all_errors.append(e) - if len(all_errors) > 0: + if any(all_errors): error_message = "Clinica faced errors while trying to read files in your BIDS or CAPS directories.\n" for msg in all_errors: error_message += str(msg) diff --git a/clinica/pipelines/statistics_surface/pipeline.py b/clinica/pipelines/statistics_surface/pipeline.py index 6ec1d09da..7939898ec 100644 --- a/clinica/pipelines/statistics_surface/pipeline.py +++ b/clinica/pipelines/statistics_surface/pipeline.py @@ -114,7 +114,7 @@ def get_output_fields(self) -> List[str]: def _build_input_node(self): """Build and connect an input node to the pipeline.""" from clinica.utils.exceptions import ClinicaException - from clinica.utils.inputs import clinica_file_reader + from clinica.utils.inputs import clinica_list_of_files_reader # Check if already present in CAPS # ================================ @@ -133,7 +133,7 @@ def _build_input_node(self): ) # Check input files - all_errors = [] + surface_query = [] # clinica_files_reader expects regexp to start at subjects/ so sub-*/ses-*/ is removed here fwhm = str(self.parameters["full_width_at_half_maximum"]) for direction, hemi in zip(["left", "right"], ["lh", "rh"]): @@ -146,21 +146,16 @@ def _build_input_node(self): ], "description": f"surface-based features on {direction} hemisphere at FWHM = {fwhm}", } - try: - clinica_file_reader( - self.subjects, - self.sessions, - self.caps_directory, - surface_based_info, - ) - except ClinicaException as e: - all_errors.append(e) - - if len(all_errors) > 0: - error_message = "Clinica faced errors while trying to read files in your CAPS directory.\n" - for msg in all_errors: - error_message += str(msg) - raise RuntimeError(error_message) + surface_query.append(surface_based_info) + try: + clinica_list_of_files_reader( + self.subjects, + self.sessions, + self.caps_directory, + surface_query, + ) + except ClinicaException as e: + raise RuntimeError(e) def _build_output_node(self): """Build and connect an output node to the pipeline.""" diff --git a/clinica/pipelines/statistics_volume/statistics_volume_pipeline.py b/clinica/pipelines/statistics_volume/statistics_volume_pipeline.py index b5d9976ba..e94d1cdc8 100644 --- a/clinica/pipelines/statistics_volume/statistics_volume_pipeline.py +++ b/clinica/pipelines/statistics_volume/statistics_volume_pipeline.py @@ -100,11 +100,10 @@ def _build_input_node(self): pet_volume_normalized_suvr_pet, t1_volume_template_tpm_in_mni, ) - from clinica.utils.inputs import clinica_file_reader + from clinica.utils.inputs import clinica_file_filter from clinica.utils.stream import cprint from clinica.utils.ux import print_begin_image, print_images_to_process - all_errors = [] if self.parameters["orig_input_data_volume"] == "pet-volume": if not ( self.parameters["acq_label"] @@ -151,18 +150,9 @@ def _build_input_node(self): f"Input data {self.parameters['orig_input_data_volume']} unknown." ) - try: - input_files, _ = clinica_file_reader( - self.subjects, self.sessions, self.caps_directory, information_dict - ) - except ClinicaException as e: - all_errors.append(e) - - if len(all_errors) > 0: - error_message = "Clinica faced errors while trying to read files in your CAPS directories.\n" - for msg in all_errors: - error_message += str(msg) - raise ClinicaException(error_message) + input_files, self.subjects, self.sessions = clinica_file_filter( + self.subjects, self.sessions, self.caps_directory, information_dict + ) read_parameters_node = npe.Node( name="LoadingCLIArguments", diff --git a/clinica/pipelines/t1_linear/anat_linear_pipeline.py b/clinica/pipelines/t1_linear/anat_linear_pipeline.py index a2c60469e..daa0c714d 100644 --- a/clinica/pipelines/t1_linear/anat_linear_pipeline.py +++ b/clinica/pipelines/t1_linear/anat_linear_pipeline.py @@ -78,7 +78,10 @@ def get_processed_images( image_ids: List[str] = [] if caps_directory.is_dir(): cropped_files, _ = clinica_file_reader( - subjects, sessions, caps_directory, T1W_LINEAR_CROPPED, False + subjects, + sessions, + caps_directory, + T1W_LINEAR_CROPPED, ) image_ids = extract_image_ids(cropped_files) return image_ids @@ -120,7 +123,7 @@ def _build_input_node(self): from clinica.utils.filemanip import extract_subjects_sessions_from_filename from clinica.utils.image import get_mni_template from clinica.utils.input_files import T1W_NII, Flair_T2W_NII - from clinica.utils.inputs import clinica_file_reader + from clinica.utils.inputs import clinica_file_filter from clinica.utils.stream import cprint from clinica.utils.ux import print_images_to_process @@ -152,17 +155,13 @@ def _build_input_node(self): # Inputs from anat/ folder # ======================== # anat image file: - try: - file = T1W_NII if self.name == "t1-linear" else Flair_T2W_NII - anat_files, _ = clinica_file_reader( - self.subjects, self.sessions, self.bids_directory, file - ) - except ClinicaException as e: - err = ( - "Clinica faced error(s) while trying to read files in your BIDS directory.\n" - + str(e) - ) - raise ClinicaBIDSError(err) + query = T1W_NII if self.name == "t1-linear" else Flair_T2W_NII + + anat_files, filtered_subjects, filtered_sessions = clinica_file_filter( + self.subjects, self.sessions, self.bids_directory, query + ) + self.subjects = filtered_subjects + self.sessions = filtered_sessions if len(self.subjects): print_images_to_process(self.subjects, self.sessions) diff --git a/clinica/pipelines/t1_volume_create_dartel/t1_volume_create_dartel_pipeline.py b/clinica/pipelines/t1_volume_create_dartel/t1_volume_create_dartel_pipeline.py index b548df49f..f4efd2215 100644 --- a/clinica/pipelines/t1_volume_create_dartel/t1_volume_create_dartel_pipeline.py +++ b/clinica/pipelines/t1_volume_create_dartel/t1_volume_create_dartel_pipeline.py @@ -52,7 +52,7 @@ def _build_input_node(self): from clinica.utils.exceptions import ClinicaException from clinica.utils.input_files import t1_volume_dartel_input_tissue - from clinica.utils.inputs import clinica_file_reader + from clinica.utils.inputs import clinica_list_of_files_reader from clinica.utils.stream import cprint from clinica.utils.ux import ( print_begin_image, @@ -91,30 +91,22 @@ def _build_input_node(self): fields=self.get_input_fields(), mandatory_inputs=True ), ) - all_errors = [] - d_input = [] - for tissue_number in self.parameters["dartel_tissues"]: - try: - current_file, _ = clinica_file_reader( - self.subjects, - self.sessions, - self.caps_directory, - t1_volume_dartel_input_tissue(tissue_number), - ) - d_input.append(current_file) - except ClinicaException as e: - all_errors.append(e) - - # Raise all errors if some happened - if len(all_errors) > 0: - error_message = "Clinica faced errors while trying to read files in your BIDS or CAPS directories.\n" - for msg in all_errors: - error_message += str(msg) - raise RuntimeError(error_message) - - # d_input is a list of size len(self.parameters['dartel_tissues']) - # Each element of this list is a list of size len(self.subjects) - read_parameters_node.inputs.dartel_inputs = d_input + + try: + d_input = clinica_list_of_files_reader( + self.subjects, + self.sessions, + self.caps_directory, + [ + t1_volume_dartel_input_tissue(tissue_number) + for tissue_number in self.parameters["dartel_tissues"] + ], + ) + # d_input is a list of size len(self.parameters['dartel_tissues']) + # Each element of this list is a list of size len(self.subjects) + read_parameters_node.inputs.dartel_inputs = d_input + except ClinicaException as e: + raise RuntimeError(e) if len(self.subjects): print_images_to_process(self.subjects, self.sessions) diff --git a/clinica/pipelines/t1_volume_dartel2mni/t1_volume_dartel2mni_pipeline.py b/clinica/pipelines/t1_volume_dartel2mni/t1_volume_dartel2mni_pipeline.py index a4fafe39a..f01b1ac16 100644 --- a/clinica/pipelines/t1_volume_dartel2mni/t1_volume_dartel2mni_pipeline.py +++ b/clinica/pipelines/t1_volume_dartel2mni/t1_volume_dartel2mni_pipeline.py @@ -57,7 +57,12 @@ def _build_input_node(self): t1_volume_final_group_template, t1_volume_native_tpm, ) - from clinica.utils.inputs import clinica_file_reader, clinica_group_reader + from clinica.utils.inputs import ( + clinica_file_reader, + clinica_group_reader, + clinica_list_of_files_reader, + format_clinica_file_reader_errors, + ) from clinica.utils.stream import cprint from clinica.utils.ux import ( print_groups_in_caps_directory, @@ -83,39 +88,37 @@ def _build_input_node(self): # Segmented Tissues # ================= - tissues_input = [] - for tissue_number in self.parameters["tissues"]: - try: - native_space_tpm, _ = clinica_file_reader( - self.subjects, - self.sessions, - self.caps_directory, - t1_volume_native_tpm(tissue_number), - ) - tissues_input.append(native_space_tpm) - except ClinicaException as e: - all_errors.append(e) - # Tissues_input has a length of len(self.parameters['mask_tissues']). Each of these elements has a size of - # len(self.subjects). We want the opposite : a list of size len(self.subjects) whose elements have a size of - # len(self.parameters['mask_tissues']. The trick is to iter on elements with zip(*my_list) - tissues_input_rearranged = [] - for subject_tissue_list in zip(*tissues_input): - tissues_input_rearranged.append(subject_tissue_list) - - read_input_node.inputs.native_segmentations = tissues_input_rearranged - - # Flow Fields - # =========== try: - read_input_node.inputs.flowfield_files, _ = clinica_file_reader( + tissues_input = clinica_list_of_files_reader( self.subjects, self.sessions, self.caps_directory, - t1_volume_deformation_to_template(self.parameters["group_label"]), + [ + t1_volume_native_tpm(tissue_number) + for tissue_number in self.parameters["tissues"] + ], ) + # Tissues_input has a length of len(self.parameters['mask_tissues']). Each of these elements has a size of + # len(self.subjects). We want the opposite : a list of size len(self.subjects) whose elements have a size of + # len(self.parameters['mask_tissues']. The trick is to iter on elements with zip(*my_list) + tissues_input_rearranged = [] + for subject_tissue_list in zip(*tissues_input): + tissues_input_rearranged.append(subject_tissue_list) + read_input_node.inputs.native_segmentations = tissues_input_rearranged except ClinicaException as e: all_errors.append(e) + # Flow Fields + # =========== + read_input_node.inputs.flowfield_files, flowfield_errors = clinica_file_reader( + self.subjects, + self.sessions, + self.caps_directory, + t1_volume_deformation_to_template(self.parameters["group_label"]), + ) + if flowfield_errors: + all_errors.append(format_clinica_file_reader_errors(flowfield_errors)) + # Dartel Template # ================ try: @@ -126,7 +129,7 @@ def _build_input_node(self): except ClinicaException as e: all_errors.append(e) - if len(all_errors) > 0: + if any(all_errors): error_message = "Clinica faced error(s) while trying to read files in your CAPS/BIDS directories.\n" for msg in all_errors: error_message += str(msg) diff --git a/clinica/pipelines/t1_volume_parcellation/t1_volume_parcellation_pipeline.py b/clinica/pipelines/t1_volume_parcellation/t1_volume_parcellation_pipeline.py index 0e7a118de..284df607b 100644 --- a/clinica/pipelines/t1_volume_parcellation/t1_volume_parcellation_pipeline.py +++ b/clinica/pipelines/t1_volume_parcellation/t1_volume_parcellation_pipeline.py @@ -51,7 +51,7 @@ def _build_input_node(self): from clinica.utils.exceptions import ClinicaCAPSError, ClinicaException from clinica.utils.input_files import t1_volume_template_tpm_in_mni - from clinica.utils.inputs import clinica_file_reader + from clinica.utils.inputs import clinica_file_filter, clinica_file_reader from clinica.utils.stream import cprint from clinica.utils.ux import ( print_groups_in_caps_directory, @@ -67,21 +67,16 @@ def _build_input_node(self): "Did you run t1-volume or t1-volume-create-dartel pipeline?" ) - try: - gm_mni, _ = clinica_file_reader( - self.subjects, - self.sessions, - self.caps_directory, - t1_volume_template_tpm_in_mni( - group_label=self.parameters["group_label"], - tissue_number=1, - modulation=self.parameters["modulate"], - ), - ) - except ClinicaException as e: - final_error_str = "Clinica faced error(s) while trying to read files in your CAPS directory.\n" - final_error_str += str(e) - raise ClinicaCAPSError(final_error_str) + gm_mni, self.subjects, self.sessions = clinica_file_filter( + self.subjects, + self.sessions, + self.caps_directory, + t1_volume_template_tpm_in_mni( + group_label=self.parameters["group_label"], + tissue_number=1, + modulation=self.parameters["modulate"], + ), + ) read_parameters_node = npe.Node( name="LoadingCLIArguments", diff --git a/clinica/pipelines/t1_volume_register_dartel/t1_volume_register_dartel_pipeline.py b/clinica/pipelines/t1_volume_register_dartel/t1_volume_register_dartel_pipeline.py index 9b8759ac5..6e8c17687 100644 --- a/clinica/pipelines/t1_volume_register_dartel/t1_volume_register_dartel_pipeline.py +++ b/clinica/pipelines/t1_volume_register_dartel/t1_volume_register_dartel_pipeline.py @@ -53,7 +53,11 @@ def _build_input_node(self): t1_volume_dartel_input_tissue, t1_volume_i_th_iteration_group_template, ) - from clinica.utils.inputs import clinica_file_reader, clinica_group_reader + from clinica.utils.inputs import ( + clinica_file_reader, + clinica_group_reader, + clinica_list_of_files_reader, + ) from clinica.utils.ux import print_images_to_process read_input_node = npe.Node( @@ -67,18 +71,19 @@ def _build_input_node(self): # Dartel Input Tissues # ==================== - d_input = [] - for tissue_number in self.parameters["tissues"]: - try: - current_file, _ = clinica_file_reader( - self.subjects, - self.sessions, - self.caps_directory, - t1_volume_dartel_input_tissue(tissue_number), - ) - d_input.append(current_file) - except ClinicaException as e: - all_errors.append(e) + try: + d_input = clinica_list_of_files_reader( + self.subjects, + self.sessions, + self.caps_directory, + [ + t1_volume_dartel_input_tissue(tissue_number) + for tissue_number in self.parameters["tissues"] + ], + ) + read_input_node.inputs.dartel_input_images = d_input + except ClinicaException as e: + all_errors.append(e) # Dartel Templates # ================ @@ -96,13 +101,12 @@ def _build_input_node(self): except ClinicaException as e: all_errors.append(e) - if len(all_errors) > 0: + if any(all_errors): error_message = "Clinica faced error(s) while trying to read files in your CAPS/BIDS directories.\n" for msg in all_errors: error_message += str(msg) raise ClinicaCAPSError(error_message) - read_input_node.inputs.dartel_input_images = d_input read_input_node.inputs.dartel_iteration_templates = dartel_iter_templates if len(self.subjects): diff --git a/clinica/pipelines/t1_volume_tissue_segmentation/t1_volume_tissue_segmentation_pipeline.py b/clinica/pipelines/t1_volume_tissue_segmentation/t1_volume_tissue_segmentation_pipeline.py index 9485c18c6..46f9f1c1c 100644 --- a/clinica/pipelines/t1_volume_tissue_segmentation/t1_volume_tissue_segmentation_pipeline.py +++ b/clinica/pipelines/t1_volume_tissue_segmentation/t1_volume_tissue_segmentation_pipeline.py @@ -77,20 +77,18 @@ def _build_input_node(self): ) from clinica.utils.exceptions import ClinicaBIDSError, ClinicaException from clinica.utils.input_files import T1W_NII - from clinica.utils.inputs import clinica_file_reader + from clinica.utils.inputs import clinica_file_filter from clinica.utils.stream import cprint from clinica.utils.ux import print_images_to_process # Inputs from anat/ folder # ======================== # T1w file: - try: - t1w_files, _ = clinica_file_reader( - self.subjects, self.sessions, self.bids_directory, T1W_NII - ) - except ClinicaException as e: - err = f"Clinica faced error(s) while trying to read files in your BIDS directory.\n{str(e)}" - raise ClinicaBIDSError(err) + t1w_files, subjects, sessions = clinica_file_filter( + self.subjects, self.sessions, self.bids_directory, T1W_NII + ) + self.subjects = subjects + self.sessions = sessions check_volume_location_in_world_coordinate_system( t1w_files, diff --git a/clinica/utils/inputs.py b/clinica/utils/inputs.py index 319690eb5..47a75c0dd 100644 --- a/clinica/utils/inputs.py +++ b/clinica/utils/inputs.py @@ -6,7 +6,7 @@ from enum import Enum from functools import partial from pathlib import Path -from typing import Callable, Dict, List, Optional, Tuple, Union +from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union class DatasetType(str, Enum): @@ -17,6 +17,7 @@ class DatasetType(str, Enum): RemoteFileStructure = namedtuple("RemoteFileStructure", ["filename", "url", "checksum"]) +InvalidSubjectSession = namedtuple("InvalidSubjectSession", ["subject", "session"]) def insensitive_glob(pattern_glob: str, recursive: Optional[bool] = False) -> List[str]: @@ -270,18 +271,17 @@ def check_caps_folder(caps_directory: Union[str, os.PathLike]) -> None: raise ClinicaCAPSError(error_string) -def find_sub_ses_pattern_path( +def find_images_path( input_directory: os.PathLike, subject: str, session: str, - error_encountered: list, - results: list, + errors: List[InvalidSubjectSession], + valid_paths: List[str], is_bids: bool, pattern: str, ) -> None: - """Appends the output path corresponding to subject, session and pattern in results. - - If an error is encountered, its corresponding message is added to the list `error_encountered`. + """Appends the resulting path corresponding to subject, session and pattern in valid_paths. + If an error is encountered, its (subject,session) couple is added to the list `errors`. Parameters ---------- @@ -300,10 +300,10 @@ def find_sub_ses_pattern_path( session : str Name given to the folder of a session (ex: ses-M00). - error_encountered : List + errors : List List to which errors encountered in this function are added. - results : List + valid_paths : List List to which the output path corresponding to subject, session and pattern is added. @@ -343,17 +343,14 @@ def find_sub_ses_pattern_path( f"Clinica will proceed with the latest run available, that is \n\n-{selected}.", lvl="warning", ) - results.append(selected) + valid_paths.append(selected) else: - error_str = f"\t* ({subject} | {session}): More than 1 file found:\n" - for found_file in current_glob_found: - error_str += f"\t\t{found_file}\n" - error_encountered.append(error_str) + errors.append(InvalidSubjectSession(subject, session)) elif len(current_glob_found) == 0: - error_encountered.append(f"\t* ({subject} | {session}): No file found\n") + errors.append(InvalidSubjectSession(subject, session)) # Otherwise the file found is added to the result else: - results.append(current_glob_found[0]) + valid_paths.append(current_glob_found[0]) def _are_multiple_runs(files: List[str]) -> bool: @@ -569,29 +566,76 @@ def _check_information(information: Dict) -> None: ) -def _format_errors(errors: List, information: Dict) -> str: - error_message = ( +def clinica_file_filter( + subjects: List[str], + sessions: List[str], + input_directory: Path, + information: Dict, + n_procs: int = 1, +) -> Tuple[List[str], List[str], List[str]]: + from clinica.utils.stream import cprint + + files, errors = clinica_file_reader( + subjects, sessions, input_directory, information, n_procs + ) + cprint(format_clinica_file_reader_errors(errors, information), "warning") + filtered_subjects, filtered_sessions = _remove_sub_ses_from_list( + subjects, sessions, errors + ) + return files, filtered_subjects, filtered_sessions + + +def format_clinica_file_reader_errors( + errors: Iterable[InvalidSubjectSession], information: Dict +) -> str: + message = ( f"Clinica encountered {len(errors)} " f"problem(s) while getting {information['description']}:\n" ) if "needed_pipeline" in information and information["needed_pipeline"]: - error_message += ( + message += ( "Please note that the following clinica pipeline(s) must " f"have run to obtain these files: {information['needed_pipeline']}\n" ) - error_message += "\n".join(errors) - - return error_message + if errors: + message += "".join(f"\t* ({err.subject} | {err.session})\n" for err in errors) + message += ( + "Clinica could not identify which file to use (missing or too many) for these sessions. " + "They will not be processed." + ) + return message -def clinica_file_reader( +def _remove_sub_ses_from_list( subjects: List[str], sessions: List[str], + errors: Iterable[InvalidSubjectSession], +) -> Tuple[List[str], List[str]]: + subjects = subjects.copy() + sessions = sessions.copy() + for invalid in errors: + sub_indexes = [ + i for i, subject in enumerate(subjects) if subject == invalid.subject + ] + session_indexes = [ + i for i, session in enumerate(sessions) if session == invalid.session + ] + to_remove = list(set(sub_indexes) & set(session_indexes)) + to_remove.sort(reverse=True) + for index in to_remove: + subjects.pop(index) + sessions.pop(index) + return subjects, sessions + + +# todo : generalize +def clinica_file_reader( + subjects: Iterable[str], + sessions: Iterable[str], input_directory: os.PathLike, information: Dict, - raise_exception: Optional[bool] = True, - n_procs: Optional[int] = 1, -): + n_procs: int = 1, +) -> Tuple[List[str], List[InvalidSubjectSession]]: """Read files in BIDS or CAPS directory based on participant ID(s). This function grabs files relative to a subject and session list according to a glob pattern (using *) @@ -616,10 +660,6 @@ def clinica_file_reader( - `needed_pipeline` : Optional. String describing the pipeline(s) needed to obtain the related file. - raise_exception : bool, optional - If True, an exception is raised if errors happen. If not, we return the file - list as it is. Default=True. - n_procs : int, optional Number of cores used to fetch files in parallel. If set to 1, subjects and sessions will be processed sequentially. @@ -629,9 +669,8 @@ def clinica_file_reader( ------- results : List[str] List of files respecting the subject/session order provided in input. - - error_message : str - Error message which contains all errors encountered while reading the files. + Iterable[InvalidSubjectSession] + List of tuples (subject, session) which were identified as invalid (too many files or none). Raises ------ @@ -643,26 +682,11 @@ def clinica_file_reader( for more details. If the length of `subjects` is different from the length of `sessions`. - ClinicaCAPSError or ClinicaBIDSError - If multiples files are found for 1 subject/session, or if no file is found. - - .. note:: - If `raise_exception` is False, no exception is raised. - Notes ----- This function is case-insensitive, meaning that the pattern argument can, for example, contain upper case letters that do not exist in the existing file path. - You should always use `clinica_file_reader` in the following manner: - - .. code-block:: python - - try: - file_list = clinica_file_reader(...) - except ClinicaException as e: - # Deal with the error - Examples -------- The paths are shortened for readability. @@ -725,12 +749,11 @@ def clinica_file_reader( or even more precise: 't1/freesurfer_cross_sectional/sub-*_ses-*/surf/rh.white' It then gives: ['/caps/subjects/sub-ADNI011S4105/ses-M00/t1/freesurfer_cross_sectional/sub-ADNI011S4105_ses-M00/surf/rh.white'] """ - from clinica.utils.exceptions import ClinicaBIDSError, ClinicaCAPSError + input_directory = Path(input_directory) _check_information(information) pattern = information["pattern"] - input_directory = Path(input_directory) is_bids = determine_caps_or_bids(input_directory) if is_bids: check_bids_folder(input_directory) @@ -741,10 +764,10 @@ def clinica_file_reader( raise ValueError("Subjects and sessions must have the same length.") if len(subjects) == 0: - return [], "" + return [], [] file_reader = _read_files_parallel if n_procs > 1 else _read_files_sequential - results, errors_encountered = file_reader( + return file_reader( input_directory, subjects, sessions, @@ -752,25 +775,16 @@ def clinica_file_reader( pattern, n_procs=n_procs, ) - error_message = _format_errors(errors_encountered, information) - - if len(errors_encountered) > 0 and raise_exception: - if is_bids: - raise ClinicaBIDSError(error_message) - else: - raise ClinicaCAPSError(error_message) - - return results, error_message def _read_files_parallel( input_directory: os.PathLike, - subjects: List[str], - sessions: List[str], + subjects: Iterable[str], + sessions: Iterable[str], is_bids: bool, pattern: str, n_procs: int, -) -> Tuple[List[str], List[str]]: +) -> Tuple[List[str], List[InvalidSubjectSession]]: from multiprocessing import Manager from joblib import Parallel, delayed @@ -779,7 +793,7 @@ def _read_files_parallel( shared_results = manager.list() shared_errors_encountered = manager.list() Parallel(n_jobs=n_procs)( - delayed(find_sub_ses_pattern_path)( + delayed(find_images_path)( input_directory, sub, ses, @@ -797,15 +811,15 @@ def _read_files_parallel( def _read_files_sequential( input_directory: os.PathLike, - subjects: List[str], - sessions: List[str], + subjects: Iterable[str], + sessions: Iterable[str], is_bids: bool, pattern: str, **kwargs, -) -> Tuple[List[str], List[str]]: +) -> Tuple[List[str], List[InvalidSubjectSession]]: errors_encountered, results = [], [] for sub, ses in zip(subjects, sessions): - find_sub_ses_pattern_path( + find_images_path( input_directory, sub, ses, errors_encountered, results, is_bids, pattern ) return results, errors_encountered @@ -847,29 +861,24 @@ def clinica_list_of_files_reader( list_found_files : List[List[str]] List of lists of found files following order of `list_information` """ - from .exceptions import ClinicaBIDSError, ClinicaException + from .exceptions import ClinicaBIDSError all_errors = [] list_found_files = [] for info_file in list_information: - try: - list_found_files.append( - clinica_file_reader( - participant_ids, - session_ids, - bids_or_caps_directory, - info_file, - True, - )[0] - ) - except ClinicaException as e: - list_found_files.append([]) - all_errors.append(e) + files, errors = clinica_file_reader( + participant_ids, + session_ids, + bids_or_caps_directory, + info_file, + ) + all_errors.append(errors) + list_found_files.append([] if errors else files) - if len(all_errors) > 0 and raise_exception: + if any(all_errors) and raise_exception: error_message = "Clinica faced error(s) while trying to read files in your BIDS or CAPS directory.\n" - for msg in all_errors: - error_message += str(msg) + for error, info in zip(all_errors, list_information): + error_message += format_clinica_file_reader_errors(error, info) raise ClinicaBIDSError(error_message) return list_found_files @@ -935,6 +944,7 @@ def _format_and_raise_group_reader_errors( found_files: List, information: Dict, ) -> None: + # todo : TEST from clinica.utils.exceptions import ClinicaCAPSError error_string = ( diff --git a/test/unittests/utils/test_utils_inputs.py b/test/unittests/utils/test_utils_inputs.py index 4659ebe02..265ca48da 100644 --- a/test/unittests/utils/test_utils_inputs.py +++ b/test/unittests/utils/test_utils_inputs.py @@ -6,7 +6,7 @@ import pytest from clinica.utils.exceptions import ClinicaBIDSError, ClinicaCAPSError -from clinica.utils.inputs import DatasetType +from clinica.utils.inputs import DatasetType, InvalidSubjectSession from clinica.utils.testing_utils import ( build_bids_directory, build_caps_directory, @@ -14,6 +14,41 @@ ) +@pytest.mark.parametrize( + "input_subjects, input_sessions, to_remove, expected_subjects, expected_sessions", + [ + ( + ["sub1", "sub1", "sub2"], + ["ses1", "ses2", "ses1"], + [ + InvalidSubjectSession("sub1", "ses1"), + InvalidSubjectSession("sub3", "ses1"), + ], + ["sub1", "sub2"], + ["ses2", "ses1"], + ), + (["sub1"], ["ses1"], [], ["sub1"], ["ses1"]), + ( + ["sub1", "sub2", "sub2"], + ["ses1", "ses1", "ses1"], + [InvalidSubjectSession("sub2", "ses1")], + ["sub1"], + ["ses1"], + ), + ], +) +def test_remove_sub_ses_from_list( + input_subjects, input_sessions, to_remove, expected_subjects, expected_sessions +): + from clinica.utils.inputs import _remove_sub_ses_from_list + + result_subjects, result_sessions = _remove_sub_ses_from_list( + input_subjects, input_sessions, to_remove + ) + assert result_subjects == expected_subjects and result_sessions == expected_sessions + assert input_subjects == input_subjects and input_sessions == input_sessions + + def test_get_parent_path(tmp_path): from clinica.utils.inputs import _get_parent_path @@ -376,25 +411,25 @@ def test_check_caps_folder(tmp_path): check_caps_folder(tmp_path) -def test_find_sub_ses_pattern_path_error_no_file(tmp_path): - """Test function `find_sub_ses_pattern_path`.""" - from clinica.utils.inputs import find_sub_ses_pattern_path +def test_find_images_path_error_no_file(tmp_path): + """Test function `find_images_path`.""" + from clinica.utils.inputs import find_images_path (tmp_path / "sub-01" / "ses-M00" / "anat").mkdir(parents=True) errors, results = [], [] - find_sub_ses_pattern_path( + find_images_path( tmp_path, "sub-01", "ses-M00", errors, results, True, "sub-*_ses-*_t1w.nii*" ) assert len(results) == 0 assert len(errors) == 1 - assert errors[0] == "\t* (sub-01 | ses-M00): No file found\n" + assert errors[0] == ("sub-01", "ses-M00") -def test_find_sub_ses_pattern_path_error_more_than_one_file(tmp_path): - """Test function `find_sub_ses_pattern_path`.""" - from clinica.utils.inputs import find_sub_ses_pattern_path +def test_find_images_path_error_more_than_one_file(tmp_path): + """Test function `find_images_path`.""" + from clinica.utils.inputs import find_images_path errors, results = [], [] (tmp_path / "sub-01" / "ses-M00" / "anat" / "sub-01_ses-M00_T1w.nii.gz").mkdir( @@ -404,25 +439,25 @@ def test_find_sub_ses_pattern_path_error_more_than_one_file(tmp_path): tmp_path / "sub-01" / "ses-M00" / "anat" / "sub-01_ses-M00_foo-bar_T1w.nii.gz" ).mkdir(parents=True) - find_sub_ses_pattern_path( + find_images_path( tmp_path, "sub-01", "ses-M00", errors, results, True, "sub-*_ses-*_t1w.nii*" ) assert len(results) == 0 assert len(errors) == 1 - assert "\t* (sub-01 | ses-M00): More than 1 file found:" in errors[0] + assert errors[0] == ("sub-01", "ses-M00") -def test_find_sub_ses_pattern_path(tmp_path): - """Test function `find_sub_ses_pattern_path`.""" - from clinica.utils.inputs import find_sub_ses_pattern_path +def test_find_images_path(tmp_path): + """Test function `find_images_path`.""" + from clinica.utils.inputs import find_images_path (tmp_path / "sub-01" / "ses-M00" / "anat" / "sub-01_ses-M00_T1w.nii.gz").mkdir( parents=True ) errors, results = [], [] - find_sub_ses_pattern_path( + find_images_path( tmp_path, "sub-01", "ses-M00", errors, results, True, "sub-*_ses-*_t1w.nii*" ) @@ -433,8 +468,8 @@ def test_find_sub_ses_pattern_path(tmp_path): ) -def test_find_sub_ses_pattern_path_multiple_runs(tmp_path): - from clinica.utils.inputs import find_sub_ses_pattern_path +def test_find_images_path_multiple_runs(tmp_path): + from clinica.utils.inputs import find_images_path errors, results = [], [] ( @@ -452,7 +487,7 @@ def test_find_sub_ses_pattern_path_multiple_runs(tmp_path): / "sub-01_ses-M06_run-02_foo-bar_T1w.nii.gz" ).mkdir(parents=True) - find_sub_ses_pattern_path( + find_images_path( tmp_path, "sub-01", "ses-M06", errors, results, True, "sub-*_ses-*_t1w.nii*" ) @@ -494,30 +529,36 @@ def test_check_information(): def test_format_errors(): """Test utility function `_format_errors`.""" - from clinica.utils.inputs import _format_errors + from clinica.utils.inputs import format_clinica_file_reader_errors information = {"description": "foo bar baz"} assert ( - _format_errors([], information) + format_clinica_file_reader_errors([], information) == "Clinica encountered 0 problem(s) while getting foo bar baz:\n" ) information["needed_pipeline"] = ["pipeline_1", "pipeline_3"] - assert _format_errors([], information) == ( + assert format_clinica_file_reader_errors([], information) == ( "Clinica encountered 0 problem(s) while getting foo bar baz:\n" "Please note that the following clinica pipeline(s) must have " "run to obtain these files: ['pipeline_1', 'pipeline_3']\n" ) - errors = ["error 1: foo", "error 2: bar", "error 3: baz"] - assert _format_errors(errors, information) == ( + errors = [ + InvalidSubjectSession("sub1", "ses1"), + InvalidSubjectSession("sub2", "ses1"), + InvalidSubjectSession("sub3", "ses1"), + ] + assert format_clinica_file_reader_errors(errors, information) == ( "Clinica encountered 3 problem(s) while getting foo bar baz:\n" "Please note that the following clinica pipeline(s) must have " "run to obtain these files: ['pipeline_1', 'pipeline_3']\n" - "error 1: foo\nerror 2: bar\nerror 3: baz" + "\t* (sub1 | ses1)\n\t* (sub2 | ses1)\n\t* (sub3 | ses1)\n" + "Clinica could not identify which file to use (missing or too many) for these sessions. They will not be processed." ) information.pop("needed_pipeline") - assert _format_errors(errors, information) == ( + assert format_clinica_file_reader_errors(errors, information) == ( "Clinica encountered 3 problem(s) while getting foo bar baz:\n" - "error 1: foo\nerror 2: bar\nerror 3: baz" + "\t* (sub1 | ses1)\n\t* (sub2 | ses1)\n\t* (sub3 | ses1)\n" + "Clinica could not identify which file to use (missing or too many) for these sessions. They will not be processed." ) @@ -549,31 +590,27 @@ def test_clinica_file_reader_bids_directory(tmp_path, data_type): ["ses-M00", "ses-M06"], tmp_path, information, - raise_exception=True, n_procs=1, ) - assert clinica_file_reader( - [], [], tmp_path, information, raise_exception=True, n_procs=1 - ) == ([], "") - results, error_msg = clinica_file_reader( - ["sub-01"], ["ses-M00"], tmp_path, information, raise_exception=True, n_procs=1 + assert clinica_file_reader([], [], tmp_path, information, n_procs=1) == ([], []) + results, errors = clinica_file_reader( + ["sub-01"], ["ses-M00"], tmp_path, information, n_procs=1 ) assert len(results) == 1 assert Path(results[0]).relative_to(tmp_path) == Path( f"sub-01/ses-M00/anat/sub-01_ses-M00_{data_type}.nii.gz" ) - assert error_msg == f"Clinica encountered 0 problem(s) while getting {desc}:\n" + assert not errors - results, error_msg = clinica_file_reader( + results, errors = clinica_file_reader( ["sub-01", "sub-02", "sub-02", "sub-06"], ["ses-M00", "ses-M00", "ses-M06", "ses-M00"], tmp_path, information, - raise_exception=True, n_procs=4, ) assert len(results) == 4 - assert error_msg == f"Clinica encountered 0 problem(s) while getting {desc}:\n" + assert not errors ( tmp_path @@ -582,26 +619,11 @@ def test_clinica_file_reader_bids_directory(tmp_path, data_type): / "anat" / f"sub-01_ses-M00_foo-bar_{data_type}.nii.gz" ).mkdir() - results, error_msg = clinica_file_reader( - ["sub-01"], ["ses-M00"], tmp_path, information, raise_exception=False, n_procs=1 + results, errors = clinica_file_reader( + ["sub-01"], ["ses-M00"], tmp_path, information, n_procs=1 ) assert len(results) == 0 - expected_msg = ( - f"Clinica encountered 1 problem(s) while getting {desc}:\n" - "\t* (sub-01 | ses-M00): More than 1 file found:\n\t\t" - ) - assert expected_msg in error_msg - with pytest.raises( - ClinicaBIDSError, - ): - clinica_file_reader( - ["sub-01"], - ["ses-M00"], - tmp_path, - information, - raise_exception=True, - n_procs=1, - ) + assert errors == [InvalidSubjectSession("sub-01", "ses-M00")] def test_clinica_file_reader_caps_directory(tmp_path): @@ -634,36 +656,26 @@ def test_clinica_file_reader_caps_directory(tmp_path): ["ses-M00", "ses-M06"], tmp_path, information, - raise_exception=True, n_procs=1, ) - assert clinica_file_reader( - [], [], tmp_path, information, raise_exception=True, n_procs=1 - ) == ([], "") + assert clinica_file_reader([], [], tmp_path, information, n_procs=1) == ([], []) - results, error_msg = clinica_file_reader( - ["sub-01"], ["ses-M00"], tmp_path, information, raise_exception=True, n_procs=1 + results, errors = clinica_file_reader( + ["sub-01"], ["ses-M00"], tmp_path, information, n_procs=1 ) assert len(results) == 1 - expected_error_msg = ( - "Clinica encountered 0 problem(s) while getting T1w image registered " - "in MNI152NLin2009cSym space using t1-linear pipeline:\n" - "Please note that the following clinica pipeline(s) must have run to " - "obtain these files: t1-linear\n" - ) - assert error_msg == expected_error_msg + assert not errors - results, error_msg = clinica_file_reader( + results, errors = clinica_file_reader( ["sub-01", "sub-02", "sub-02", "sub-06"], ["ses-M00", "ses-M00", "ses-M06", "ses-M00"], tmp_path, information, - raise_exception=True, n_procs=4, ) assert len(results) == 4 - assert error_msg == expected_error_msg + assert not errors ( tmp_path @@ -673,38 +685,21 @@ def test_clinica_file_reader_caps_directory(tmp_path): / "t1_linear" / "sub-01_ses-M00_foo-bar_T1w_space-MNI152NLin2009cSym_res-1x1x1_T1w.nii.gz" ).mkdir() - results, error_msg = clinica_file_reader( - ["sub-01"], ["ses-M00"], tmp_path, information, raise_exception=False, n_procs=1 + results, errors = clinica_file_reader( + ["sub-01"], ["ses-M00"], tmp_path, information, n_procs=1 ) assert len(results) == 0 - expected_msg = ( - "Clinica encountered 1 problem(s) while getting T1w image registered " - "in MNI152NLin2009cSym space using t1-linear pipeline:\n" - "Please note that the following clinica pipeline(s) must have run to " - "obtain these files: t1-linear\n" - "\t* (sub-01 | ses-M00): More than 1 file found:\n" - ) - assert expected_msg in error_msg - with pytest.raises(ClinicaCAPSError): - clinica_file_reader( - ["sub-01"], - ["ses-M00"], - tmp_path, - information, - raise_exception=True, - n_procs=1, - ) + assert errors == [InvalidSubjectSession("sub-01", "ses-M00")] def test_clinica_file_reader_dwi_dti_error(tmp_path): from clinica.utils.input_files import dwi_dti from clinica.utils.inputs import clinica_file_reader + # todo : should be tested by check_caps_folder instead ? query = dwi_dti("FA", space="T1w") with pytest.raises(ClinicaCAPSError): - clinica_file_reader( - ["sub-01"], ["ses-M000"], tmp_path, query, raise_exception=True - ) + clinica_file_reader(["sub-01"], ["ses-M000"], tmp_path, query) def test_clinica_file_reader_dwi_dti(tmp_path): @@ -730,15 +725,14 @@ def test_clinica_file_reader_dwi_dti(tmp_path): for measure in DTIBasedMeasure: (dti_folder / f"sub-01_ses-M000_space-T1w_{measure.value}.nii.gz").touch() query = dwi_dti("FA", space="T1w") - found_files, errors = clinica_file_reader( - ["sub-01"], ["ses-M000"], tmp_path, query, raise_exception=True - ) + found_files, _ = clinica_file_reader(["sub-01"], ["ses-M000"], tmp_path, query) assert found_files == [str(dti_folder / "sub-01_ses-M000_space-T1w_FA.nii.gz")] queries = [dwi_dti(measure) for measure in DTIBasedMeasure] found_files = clinica_list_of_files_reader( ["sub-01"], ["ses-M000"], tmp_path, queries, raise_exception=True ) + assert found_files == [ [str(x)] for x in ( @@ -805,6 +799,7 @@ def test_clinica_list_of_files_reader(tmp_path): information, raise_exception=False, ) + assert len(results) == 2 assert len(results[0]) == 3 assert len(results[1]) == 0