Merge branch nnu-net:master into master

MIC-DKFZ · Mar 1, 2024 · f2d6ef7 · f2d6ef7
2 parents 1f37d8f + 83dad35
commit f2d6ef7
Show file tree

Hide file tree

Showing 45 changed files with 1,305 additions and 841 deletions.
diff --git a/documentation/competitions/AutoPETII.md b/documentation/competitions/AutoPETII.md
@@ -46,7 +46,7 @@ Add the following to the 'configurations' dict in 'nnUNetPlans.json':
 ```json
         "3d_fullres_resenc": {
             "inherits_from": "3d_fullres",
-            "UNet_class_name": "ResidualEncoderUNet",
+            "network_arch_class_name": "ResidualEncoderUNet",
             "n_conv_per_stage_encoder": [
                 1,
                 3,

diff --git a/documentation/dataset_format.md b/documentation/dataset_format.md
@@ -26,7 +26,8 @@ T2 MRI, …) and FILE_ENDING is the file extension used by your image format (.p
 The dataset.json file connects channel names with the channel identifiers in the 'channel_names' key (see below for details).
 
 Side note: Typically, each channel/modality needs to be stored in a separate file and is accessed with the XXXX channel identifier. 
-Exception are natural images (RGB; .png) where the three color channels can all be stored in one file (see the [road segmentation](../nnunetv2/dataset_conversion/Dataset120_RoadSegmentation.py) dataset as an example). 
+Exception are natural images (RGB; .png) where the three color channels can all be stored in one file (see the 
+[road segmentation](../nnunetv2/dataset_conversion/Dataset120_RoadSegmentation.py) dataset as an example). 
 
 **Segmentations** must share the same geometry with their corresponding images (same shape etc.). Segmentations are 
 integer maps with each value representing a semantic class. The background must be 0. If there is no background, then 
@@ -57,14 +58,14 @@ of what the raw data was provided in! This is for performance reasons.
 
 
 By default, the following file formats are supported:
+
 - NaturalImage2DIO: .png, .bmp, .tif
 - NibabelIO: .nii.gz, .nrrd, .mha
 - NibabelIOWithReorient: .nii.gz, .nrrd, .mha. This reader will reorient images to RAS!
 - SimpleITKIO: .nii.gz, .nrrd, .mha
 - Tiff3DIO: .tif, .tiff. 3D tif images! Since TIF does not have a standardized way of storing spacing information, 
-nnU-Net expects each TIF file to be accompanied by an identically named .json file that contains three numbers 
-(no units, no comma. Just separated by whitespace), one for each dimension.
-
+nnU-Net expects each TIF file to be accompanied by an identically named .json file that contains this information (see
+[here](#datasetjson)).
 
 The file extension lists are not exhaustive and depend on what the backend supports. For example, nibabel and SimpleITK 
 support more than the three given here. The file endings given here are just the ones we tested!
@@ -200,6 +201,27 @@ There is a utility with which you can generate the dataset.json automatically. Y
 [here](../nnunetv2/dataset_conversion/generate_dataset_json.py). 
 See our examples in [dataset_conversion](../nnunetv2/dataset_conversion) for how to use it. And read its documentation!
 
+As described above, a json file that contains spacing information is required for TIFF files.
+An example for a 3D TIFF stack with units corresponding to 7.6 in x and y, 80 in z is:
+
+```
+{
+    "spacing": [7.6, 7.6, 80.0]
+}
+```
+
+Within the dataset folder, this file (named `cell6.json` in this example) would be placed in the following folders:
+
+    nnUNet_raw/Dataset123_Foo/
+    ├── dataset.json
+    ├── imagesTr
+    │   ├── cell6.json
+    │   └── cell6_0000.tif
+    └── labelsTr
+        ├── cell6.json
+        └── cell6.tif
+
+
 ## How to use nnU-Net v1 Tasks
 If you are migrating from the old nnU-Net, convert your existing datasets with `nnUNetv2_convert_old_nnUNet_dataset`!
 

diff --git a/documentation/explanation_plans_files.md b/documentation/explanation_plans_files.md
@@ -74,7 +74,7 @@ nnunetv2.preprocessing.resampling
 resampling function must be callable(data, current_spacing, new_spacing, **kwargs). It must be located in 
 nnunetv2.preprocessing.resampling
 - `resampling_fn_seg_kwargs`: kwargs for resampling_fn_seg
-- `UNet_class_name`: UNet class name, can be used to integrate custom dynamic architectures
+- `network_arch_class_name`: UNet class name, can be used to integrate custom dynamic architectures
 - `UNet_base_num_features`: The number of starting features for the UNet architecture. Default is 32. Default: Features
 are doubled with each downsampling 
 - `unet_max_num_features`: Maximum number of features (default: capped at 320 for 3D and 512 for 2d). The purpose is to 

diff --git a/nnunetv2/batch_running/collect_results_custom_Decathlon.py b/nnunetv2/batch_running/collect_results_custom_Decathlon.py
@@ -94,21 +94,19 @@ def summarize(input_file, output_file, folds: Tuple[int, ...], configs: Tuple[st
 
 if __name__ == '__main__':
     use_these_trainers = {
-        'nnUNetTrainer': ('nnUNetPlans',),
-        'nnUNetTrainerDiceCELoss_noSmooth': ('nnUNetPlans',),
-        'nnUNetTrainer_DASegOrd0': ('nnUNetPlans',),
+        'nnUNetTrainer': ('nnUNetPlans', 'nnUNetResEncUNetPlans', 'nnUNetResEncUNet2Plans', 'nnUNetResBottleneckEncUNetPlans', 'nnUNetResUNetPlans', 'nnUNetResUNet2Plans', 'nnUNetResUNet3Plans', 'nnUNetDeeperResBottleneckEncUNetPlans'),
      }
     all_results_file= join(nnUNet_results, 'customDecResults.csv')
-    datasets = [2, 3, 4, 17, 20, 24, 27, 38, 55, 64, 82]
+    datasets = [2, 3, 4, 17, 24, 27, 38, 55, 137, 217, 220, 221, 223] # amos post challenge, kits2023
     collect_results(use_these_trainers, datasets, all_results_file)
 
     folds = (0, 1, 2, 3, 4)
-    configs = ("3d_fullres", "3d_lowres")
+    configs = ("3d_fullres", )
     output_file = join(nnUNet_results, 'customDecResults_summary5fold.csv')
     summarize(all_results_file, output_file, folds, configs, datasets, use_these_trainers)
 
     folds = (0, )
-    configs = ("3d_fullres", "3d_lowres")
+    configs = ("3d_fullres", )
     output_file = join(nnUNet_results, 'customDecResults_summaryfold0.csv')
     summarize(all_results_file, output_file, folds, configs, datasets, use_these_trainers)
 
diff --git a/nnunetv2/batch_running/generate_lsf_runs_customDecathlon.py b/nnunetv2/batch_running/generate_lsf_runs_customDecathlon.py
@@ -21,18 +21,18 @@ def merge(dict1, dict2):
     # after the Nature Methods paper we switch our evaluation to a different (more stable/high quality) set of
     # datasets for evaluation and future development
     configurations_all = {
-        2: ("3d_fullres", "2d"),
-        3: ("2d", "3d_lowres", "3d_fullres", "3d_cascade_fullres"),
-        4: ("2d", "3d_fullres"),
+        # 2: ("3d_fullres", "2d"),
+        # 3: ("2d", "3d_lowres", "3d_fullres", "3d_cascade_fullres"),
+        # 4: ("2d", "3d_fullres"),
         17: ("2d", "3d_lowres", "3d_fullres", "3d_cascade_fullres"),
-        20: ("2d", "3d_fullres"),
-        24: ("2d", "3d_fullres"),
-        27: ("2d", "3d_fullres"),
-        38: ("2d", "3d_fullres"),
-        55: ("2d", "3d_lowres", "3d_fullres", "3d_cascade_fullres"),
-        64: ("2d", "3d_lowres", "3d_fullres", "3d_cascade_fullres"),
-        82: ("2d", "3d_fullres"),
-        # 83: ("2d", "3d_fullres"),
+        # 24: ("2d", "3d_fullres"),
+        # 27: ("2d", "3d_fullres"),
+        # 38: ("2d", "3d_fullres"),
+        # 55: ("2d", "3d_lowres", "3d_fullres", "3d_cascade_fullres"),
+        137: ("2d", "3d_fullres"),
+        220: ("2d", "3d_lowres", "3d_fullres", "3d_cascade_fullres"),
+        # 221: ("2d", "3d_lowres", "3d_fullres", "3d_cascade_fullres"),
+        223: ("2d", "3d_lowres", "3d_fullres", "3d_cascade_fullres"),
     }
 
     configurations_3d_fr_only = {
@@ -52,25 +52,23 @@ def merge(dict1, dict2):
     }
 
     num_gpus = 1
-    exclude_hosts = "-R \"select[hname!='e230-dgx2-2']\" -R \"select[hname!='e230-dgx2-1']\" -R \"select[hname!='e230-dgx1-1']\" -R \"select[hname!='e230-dgxa100-1']\" -R \"select[hname!='e230-dgxa100-2']\" -R \"select[hname!='e230-dgxa100-3']\" -R \"select[hname!='e230-dgxa100-4']\""
-    resources = "-R \"tensorcore\""
+    exclude_hosts = "-R \"select[hname!='e230-dgx2-2']\" -R \"select[hname!='e230-dgx2-1']\""
+    resources = ""
     gpu_requirements = f"-gpu num={num_gpus}:j_exclusive=yes:gmem=33G"
-    queue = "-q gpu-lowprio"
-    preamble = "-L /bin/bash \"source ~/load_env_cluster4.sh && "
-    train_command = 'nnUNet_results=/dkfz/cluster/gpu/checkpoints/OE0441/isensee/nnUNet_results_remake_release nnUNetv2_train'
+    queue = "-q gpu"
+    preamble = "-L /bin/bash \"source ~/load_env_mamba_slumber.sh && "
+    train_command = 'nnUNetv2_train'
 
-    folds = (0, )
+    folds = (1, 2, 3, 4)
     # use_this = configurations_2d_only
-    use_this = merge(configurations_3d_fr_only, configurations_3d_lr_only)
+    use_this = configurations_3d_fr_only
     # use_this = merge(use_this, configurations_3d_c_only)
 
     use_these_modules = {
-        'nnUNetTrainer': ('nnUNetPlans',),
-        'nnUNetTrainerDiceCELoss_noSmooth': ('nnUNetPlans',),
-        # 'nnUNetTrainer_DASegOrd0': ('nnUNetPlans',),
+        'nnUNetTrainer': ('nnUNetPlans', 'nnUNetResEncUNetMPlans', 'nnUNetResEncUNetLPlans', 'nnUNetResEncUNetXLPlans'),
     }
 
-    additional_arguments = f'--disable_checkpointing -num_gpus {num_gpus}'  # ''
+    additional_arguments = f' -num_gpus {num_gpus}'  # ''
 
     output_file = "/home/isensee/deleteme.txt"
     with open(output_file, 'w') as f:

diff --git a/nnunetv2/dataset_conversion/Dataset027_ACDC.py b/nnunetv2/dataset_conversion/Dataset027_ACDC.py
@@ -1,9 +1,12 @@
 import os
 import shutil
 from pathlib import Path
+from typing import List
 
+from batchgenerators.utilities.file_and_folder_operations import nifti_files, join, maybe_mkdir_p, save_json
 from nnunetv2.dataset_conversion.generate_dataset_json import generate_dataset_json
-from nnunetv2.paths import nnUNet_raw
+from nnunetv2.paths import nnUNet_raw, nnUNet_preprocessed
+import numpy as np
 
 
 def make_out_dirs(dataset_id: int, task_name="ACDC"):
@@ -22,6 +25,22 @@ def make_out_dirs(dataset_id: int, task_name="ACDC"):
     return out_dir, out_train_dir, out_labels_dir, out_test_dir
 
 
+def create_ACDC_split(labelsTr_folder: str, seed: int = 1234) -> List[dict[str, List]]:
+    # labelsTr_folder = '/home/isensee/drives/gpu_data_root/OE0441/isensee/nnUNet_raw/nnUNet_raw_remake/Dataset027_ACDC/labelsTr'
+    nii_files = nifti_files(labelsTr_folder, join=False)
+    patients = np.unique([i[:len('patient000')] for i in nii_files])
+    rs = np.random.RandomState(seed)
+    rs.shuffle(patients)
+    splits = []
+    for fold in range(5):
+        val_patients = patients[fold::5]
+        train_patients = [i for i in patients if i not in val_patients]
+        val_cases = [i[:-7] for i in nii_files for j in val_patients if i.startswith(j)]
+        train_cases = [i[:-7] for i in nii_files for j in train_patients if i.startswith(j)]
+        splits.append({'train': train_cases, 'val': val_cases})
+    return splits
+
+
 def copy_files(src_data_folder: Path, train_dir: Path, labels_dir: Path, test_dir: Path):
     """Copy files from the ACDC dataset to the nnUNet dataset folder. Returns the number of training cases."""
     patients_train = sorted([f for f in (src_data_folder / "training").iterdir() if f.is_dir()])
@@ -84,4 +103,12 @@ def convert_acdc(src_data_folder: str, dataset_id=27):
     args = parser.parse_args()
     print("Converting...")
     convert_acdc(args.input_folder, args.dataset_id)
+
+    dataset_name = f"Dataset{args.dataset_id:03d}_{'ACDC'}"
+    labelsTr = join(nnUNet_raw, dataset_name, 'labelsTr')
+    preprocessed_folder = join(nnUNet_preprocessed, dataset_name)
+    maybe_mkdir_p(preprocessed_folder)
+    split = create_ACDC_split(labelsTr)
+    save_json(split, join(preprocessed_folder, 'splits_final.json'), sort_keys=False)
+
     print("Done!")
diff --git a/nnunetv2/dataset_conversion/Dataset220_KiTS2023.py b/nnunetv2/dataset_conversion/Dataset220_KiTS2023.py
@@ -31,7 +31,7 @@ def convert_kits2023(kits_base_dir: str, nnunet_dataset_id: int = 220):
                           regions_class_order=(1, 3, 2),
                           num_training_cases=len(cases), file_ending='.nii.gz',
                           dataset_name=task_name, reference='none',
-                          release='prerelease',
+                          release='0.1.3',
                           overwrite_image_reader_writer='NibabelIOWithReorient',
                           description="KiTS2023")
 

diff --git a/nnunetv2/dataset_conversion/Dataset223_AMOS2022postChallenge.py b/nnunetv2/dataset_conversion/Dataset223_AMOS2022postChallenge.py
@@ -0,0 +1,59 @@
+import shutil
+
+from batchgenerators.utilities.file_and_folder_operations import *
+from nnunetv2.paths import nnUNet_raw
+from nnunetv2.dataset_conversion.generate_dataset_json import generate_dataset_json
+
+if __name__ == '__main__':
+    downloaded_amos_dir = '/home/isensee/amos22/amos22' # downloaded and extracted from https://zenodo.org/record/7155725#.Y0OOCOxBztM
+
+    target_dataset_id = 223
+    target_dataset_name = f'Dataset{target_dataset_id:3.0f}_AMOS2022postChallenge'
+
+    maybe_mkdir_p(join(nnUNet_raw, target_dataset_name))
+    imagesTr = join(nnUNet_raw, target_dataset_name, 'imagesTr')
+    imagesTs = join(nnUNet_raw, target_dataset_name, 'imagesTs')
+    labelsTr = join(nnUNet_raw, target_dataset_name, 'labelsTr')
+    maybe_mkdir_p(imagesTr)
+    maybe_mkdir_p(imagesTs)
+    maybe_mkdir_p(labelsTr)
+
+    train_identifiers = []
+    # copy images
+    source = join(downloaded_amos_dir, 'imagesTr')
+    source_files = nifti_files(source, join=False)
+    train_identifiers += source_files
+    for s in source_files:
+        shutil.copy(join(source, s), join(imagesTr, s[:-7] + '_0000.nii.gz'))
+
+    source = join(downloaded_amos_dir, 'imagesVa')
+    source_files = nifti_files(source, join=False)
+    train_identifiers += source_files
+    for s in source_files:
+        shutil.copy(join(source, s), join(imagesTr, s[:-7] + '_0000.nii.gz'))
+
+    source = join(downloaded_amos_dir, 'imagesTs')
+    source_files = nifti_files(source, join=False)
+    for s in source_files:
+        shutil.copy(join(source, s), join(imagesTs, s[:-7] + '_0000.nii.gz'))
+
+    # copy labels
+    source = join(downloaded_amos_dir, 'labelsTr')
+    source_files = nifti_files(source, join=False)
+    for s in source_files:
+        shutil.copy(join(source, s), join(labelsTr, s))
+
+    source = join(downloaded_amos_dir, 'labelsVa')
+    source_files = nifti_files(source, join=False)
+    for s in source_files:
+        shutil.copy(join(source, s), join(labelsTr, s))
+
+    old_dataset_json = load_json(join(downloaded_amos_dir, 'dataset.json'))
+    new_labels = {v: k for k, v in old_dataset_json['labels'].items()}
+
+    generate_dataset_json(join(nnUNet_raw, target_dataset_name), {0: 'nonCT'}, new_labels,
+                          num_training_cases=len(train_identifiers), file_ending='.nii.gz', regions_class_order=None,
+                          dataset_name=target_dataset_name, reference='https://zenodo.org/record/7155725#.Y0OOCOxBztM',
+                          license=old_dataset_json['licence'],  # typo in OG dataset.json
+                          description=old_dataset_json['description'],
+                          release=old_dataset_json['release'])
diff --git a/nnunetv2/evaluation/evaluate_predictions.py b/nnunetv2/evaluation/evaluate_predictions.py
@@ -33,7 +33,7 @@ def key_to_label_or_region(key: str):
 
 def save_summary_json(results: dict, output_file: str):
     """
-    stupid json does not support tuples as keys (why does it have to be so shitty) so we need to convert that shit
+    json does not support tuples as keys (why does it have to be so shitty) so we need to convert that shit
     ourselves
     """
     results_converted = deepcopy(results)