Merge pull request #38 from aecelaya/main

Refactor preprocessing to production-quality code.
mist-medical · Sep 20, 2024 · f55a825 · f55a825
2 parents 8b34d81 + 60f4e65
commit f55a825
Show file tree

Hide file tree

Showing 7 changed files with 1,145 additions and 497 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,42 +1,12 @@
-ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:24.05-py3
-FROM ${FROM_IMAGE_NAME}
+FROM pytorch/pytorch:2.4.0-cuda12.4-cudnn9-runtime
 
-# Set environment variables 
-#ENV OMP_NUM_THREADS=2
-#ENV OMPI_MCA_coll_hcoll_enable 0
-#ENV HCOLL_ENABLE_MCAST 0
-ENV DEBIAN_FRONTEND noninteractive
+# Set environment variables for non-interactive installation.
+ENV DEBIAN_FRONTEND=noninteractive
 
-# Install mist
+# Install MIST.
 RUN pip install --upgrade pip \
     && pip install --upgrade --no-cache-dir mist-medical
 
-# Install dependencies
-#RUN apt-get update -y --fix-missing \
-#    && apt-get install -y cmake git
-#
-## Install ANTs
-#RUN mkdir /opt/ants \
-#    && cd /opt/ants \
-#    && git clone https://github.com/ANTsX/ANTs.git \
-#    && cd /opt/ants/ANTs \
-#    && git checkout v2.5.0 \
-#    && cd /opt/ants \
-#    && mkdir build install \
-#    && cd /opt/ants/build \
-#    && cmake -DCMAKE_INSTALL_PREFIX=/opt/ants/install ../ANTs 2>&1 | tee cmake.log \
-#    && make -j 4 2>&1 | tee build.log \
-#    && cd /opt/ants/build/ANTS-build \
-#    && make install 2>&1 | tee install.log \
-#
-## Install c3d
-#RUN mkdir /opt/c3d \
-#    && cd /opt/c3d/ \
-#    && wget https://downloads.sourceforge.net/project/c3d/c3d/Nightly/c3d-nightly-Linux-x86_64.tar.gz \
-#    && tar -xvf c3d-nightly-Linux-x86_64.tar.gz \
-#    && cp c3d-1.1.0-Linux-x86_64/bin/c3d /usr/local/bin/ \
-
-# Create working directory
-#ENV PATH /opt/ants/install/bin:$PATH
+# Create app directory.
 RUN mkdir /app
 WORKDIR /app
diff --git a/mist/analyze_data/analyze.py b/mist/analyze_data/analyze.py
@@ -75,15 +75,13 @@ def check_crop_fg(self):
                 patient = self.paths_dataframe.iloc[i].to_dict()
                 image_list = list(patient.values())[3:len(patient)]
 
-                # Read original images
+                # Read original images.
                 image = ants.image_read(image_list[0])
 
-                # Get foreground mask and save it to save computation time later
-                fg_bbox = utils.get_fg_mask_bbox(
-                    image, patient_id=patient["id"]
-                )
+                # Get foreground mask and save it to save computation time.
+                fg_bbox = utils.get_fg_mask_bbox(image)
 
-                # Get cropped dimensions from bounding box
+                # Get cropped dimensions from bounding box.
                 cropped_dims[i, :] = [
                     fg_bbox["x_end"] - fg_bbox["x_start"] + 1,
                     fg_bbox["y_end"] - fg_bbox["y_start"] + 1,
@@ -94,7 +92,8 @@ def check_crop_fg(self):
                     1. - (np.prod(cropped_dims[i, :]) / np.prod(image.shape))
                 )
 
-                # Update bounding box dataframe to save for later
+                # Update bounding box dataframe with foreground bounding box.
+                fg_bbox["id"] = patient["id"]
                 bbox_df = pd.concat(
                     [bbox_df, pd.DataFrame(fg_bbox, index=[0])],
                     ignore_index=True
@@ -152,10 +151,10 @@ def check_nz_ratio(self):
                 patient = self.paths_dataframe.iloc[i].to_dict()
                 image_list = list(patient.values())[3:len(patient)]
 
-                # Read original images
+                # Read original images.
                 image = ants.image_read(image_list[0])
 
-                # Get nonzero ratio
+                # Get nonzero ratio.
                 nz_ratio.append(
                     np.sum(image.numpy() != 0) / np.prod(image.shape)
                 )
@@ -170,7 +169,7 @@ def get_target_spacing(self):
         """Get target spacing for preprocessing."""
         progress = utils.get_progress_bar("Getting target spacing")
 
-        # If data is anisotropic, get median image spacing
+        # If data is anisotropic, get median image spacing.
         original_spacings = np.zeros((len(self.paths_dataframe), 3))
 
         with progress as pb:
@@ -180,7 +179,7 @@ def get_target_spacing(self):
                 # Read mask image. This is faster to load.
                 spacing = ants.image_header_info(patient["mask"])["spacing"]
 
-                # Get spacing
+                # Get voxel spacing.
                 original_spacings[i, :] = spacing
 
         # Initialize target spacing
@@ -276,7 +275,7 @@ def check_resampled_dims(self, cropped_dims):
                 resampled_dims[i, :] = new_dims
 
         if len(messages) > 0:
-            text = rich.text.Text(messages)
+            text = rich.text.Text(messages) # type: ignore
             console.print(text)
 
         median_resampled_dims = list(np.median(resampled_dims, axis=0))
@@ -301,7 +300,7 @@ def get_ct_normalization_parameters(self):
                 # You don"t need to use all of the voxels for this.
                 fg_intensities += (
                     image[mask != 0]
-                ).tolist()[::analyzer_constants.AnalyzeConstants.CT_GATHER_EVERY_ITH_VOXEL_VALUE]
+                ).tolist()[::analyzer_constants.AnalyzeConstants.CT_GATHER_EVERY_ITH_VOXEL_VALUE] # type: ignore
 
         global_z_score_mean = np.mean(fg_intensities)
         global_z_score_std = np.std(fg_intensities)
@@ -349,7 +348,7 @@ def analyze_dataset(self):
         target_spacing = self.get_target_spacing()
 
         if self.dataset_information["modality"] == "ct":
-            # Get CT normalization parameters
+            # Get CT normalization parameters.
             ct_normalization_parameters = (
                 self.get_ct_normalization_parameters()
             )
@@ -490,7 +489,7 @@ def validate_dataset(self):
         # If there are any bad examples, print their ids.
         if len(messages) > 0:
             messages += "Excluding these from training\n"
-            text = rich.text.Text(messages)
+            text = rich.text.Text(messages) # type: ignore
             console.print(text)
 
         # If all of the data is bad, then raise an error.
@@ -504,7 +503,7 @@ def validate_dataset(self):
 
     def run(self):
         """Run the analyzer to get configuration file."""
-        text = rich.text.Text("\nAnalyzing dataset\n")
+        text = rich.text.Text("\nAnalyzing dataset\n") # type: ignore
         text.stylize("bold")
         console.print(text)
 

diff --git a/mist/evaluate_preds/evaluate.py b/mist/evaluate_preds/evaluate.py
@@ -1,11 +1,11 @@
+"""Evaluate predictions against ground truth masks."""
 import os
 import json
 import ants
 import pandas as pd
 import numpy as np
-import SimpleITK as sitk
 
-# Rich progres bar
+# Rich progress bar
 from rich.console import Console
 from rich.text import Text
 
@@ -21,7 +21,8 @@
     init_results_df,
     compute_results_stats,
     convert_dict_to_df,
-    get_progress_bar
+    get_progress_bar,
+    listdir_with_no_hidden_files
 )
 
 
@@ -126,7 +127,7 @@ def evaluate(config_json, paths, source_dir, output_csv, metrics, use_native_spa
     results_df = init_results_df(config, metrics)
 
     # Get predictions from source directory
-    predictions = os.listdir(source_dir)
+    predictions = listdir_with_no_hidden_files(source_dir)
 
     # Set up rich progress bar and error logging
     eval_progress = get_progress_bar("Evaluating")

diff --git a/mist/inference/main_inference.py b/mist/inference/main_inference.py
@@ -337,7 +337,7 @@ def test_time_inference(df,
                         no_preprocess=False,
                         output_std=False):
     config = read_json_file(config_file)
-    
+
     create_empty_dir(dest)
 
     # Set up rich progress bar
@@ -367,18 +367,15 @@ def test_time_inference(df,
                 og_ants_img = ants.image_read(image_list[0])
 
                 if no_preprocess:
-                    torch_img, _, fg_bbox, _ = convert_nifti_to_numpy(image_list, None)
+                    preprocessed_example = convert_nifti_to_numpy(image_list)
                 else:
-                    torch_img, _, fg_bbox, _ = preprocess_example(
-                        config, 
-                        image_list, 
-                        None, 
-                        False, 
-                        False, 
-                        None
+                    preprocessed_example = preprocess_example(
+                        config,
+                        image_list,
                     )
 
                 # Make image channels first and add batch dimension
+                torch_img = preprocessed_example["image"]
                 torch_img = np.transpose(torch_img, axes=(3, 0, 1, 2))
                 torch_img = np.expand_dims(torch_img, axis=0)
 
@@ -394,7 +391,7 @@ def test_time_inference(df,
                     blend_mode,
                     tta,
                     output_std,
-                    fg_bbox
+                    preprocessed_example["fg_bbox"]
                 )
 
                 # Apply postprocessing if required