features extraction speedup

AllenInstitute · Nov 13, 2023 · 5a116ad · 5a116ad
1 parent 97d0a35
commit 5a116ad
Show file tree

Hide file tree

Showing 9 changed files with 271 additions and 61 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -17,6 +17,18 @@ readme = "README.md"
 dynamic = ["version"]
 
 dependencies = [
+    'boto3',
+    'lightning',
+    'more_itertools',
+    'networkx',
+    'plotly',
+    'scikit-learn',
+    'scipy',
+    'tensorstore',
+    'torch',
+    'torcheval',
+    'torchio',
+    'zarr',
 ]
 
 [project.optional-dependencies]

diff --git a/src/deep_neurographs/deep_learning/datasets.py b/src/deep_neurographs/deep_learning/datasets.py
@@ -248,7 +248,7 @@ def __init__(self):
         self.transform = tio.Compose(
             [
                 tio.RandomBlur(std=(0, 0.4)),
-                tio.RandomNoise(std=(0, 0.03)),
+                tio.RandomNoise(std=(0, 0.0125)),
                 tio.RandomFlip(axes=(0, 1, 2)),
                 # tio.RandomAffine(
                 # degrees=20, scales=(0.8, 1), image_interpolation="nearest"

diff --git a/src/deep_neurographs/deep_learning/train.py b/src/deep_neurographs/deep_learning/train.py
@@ -99,6 +99,7 @@ def train_network(
         train_set,
         num_workers=NUM_WORKERS,
         batch_size=BATCH_SIZE,
+        pin_memory=True,
         shuffle=SHUFFLE,
     )
     valid_loader = DataLoader(

diff --git a/src/deep_neurographs/evaluation.py b/src/deep_neurographs/evaluation.py
@@ -0,0 +1,89 @@
+"""
+Created on Sat July 15 9:00:00 2023
+
+@author: Anna Grim
+@email: [email protected]
+
+Evaluates performance of edge classifier.
+
+"""
+from copy import deepcopy
+
+import numpy as np
+
+
+def run_evaluation(
+    target_graphs, pred_graphs, y_pred, block_to_idxs, idx_to_edge, blocks
+):
+    stats = init_counters()
+    stats_by_type = {"simple": init_counters(), "complex": init_counters()}
+    for block_id in blocks:
+        # Get predicted edges
+        pred_edges = get_predictions(
+            block_to_idxs[block_id], idx_to_edge, y_pred
+        )
+
+        # Overall performance
+        num_fixes, num_mistakes = __reconstruction_stats(
+            target_graphs[block_id], pred_graphs[block_id], pred_edges
+        )
+        stats["# splits fixed"].append(num_fixes)
+        stats["# merges created"].append(num_mistakes)
+
+        # In-depth performance
+        simple_stats, complex_stats = __reconstruction_type_stats(
+            target_graphs[block_id], pred_graphs[block_id], pred_edges
+        )
+        for key in stats.keys():
+            stats_by_type["simple"][key].append(simple_stats[key])
+            stats_by_type["complex"][key].append(complex_stats[key])
+    return stats, stats_by_type
+
+
+def init_counters(val=[]):
+    return {"# splits fixed": deepcopy(val), "# merges created": deepcopy(val)}
+
+
+def get_predictions(idxs, idx_to_edge, y_pred):
+    edge_idxs = set(np.where(y_pred > 0)[0]).intersection(idxs)
+    return set([idx_to_edge[idx] for idx in edge_idxs])
+
+
+def __reconstruction_stats(target_graph, pred_graph, pred_edges):
+    true_positives = 0
+    false_positives = 0
+    for edge in pred_edges:
+        if edge in pred_graph.target_edges:
+            true_positives += 1
+        else:
+            false_positives += 1
+    return true_positives, false_positives
+
+
+def __reconstruction_type_stats(target_graph, pred_graph, pred_edges):
+    simple_stats = init_counters(val=0)
+    complex_stats = init_counters(val=0)
+    for edge in pred_edges:
+        i, j = tuple(edge)
+        deg_i = pred_graph.immutable_degree(i)
+        deg_j = pred_graph.immutable_degree(j)
+        if edge in pred_graph.target_edges:
+            if deg_i == 1 and deg_j == 1:
+                simple_stats["# splits fixed"] += 1
+            else:
+                complex_stats["# splits fixed"] += 1
+        else:
+            if deg_i == 1 and deg_j == 1:
+                simple_stats["# merges created"] += 1
+            else:
+                complex_stats["# merges created"] += 1
+    return simple_stats, complex_stats
+
+
+def compute_accuracy(stats, type_key, num_edges):
+    tp = deepcopy(stats[type_key]["# splits fixed"])
+    fp = deepcopy(stats[type_key]["# merges created"])
+
+    recall = tp / num_edges
+    precision = tp / (tp + fp)
+    f1 = (2 * recall * precision) / (recall + precision)
diff --git a/src/deep_neurographs/feature_extraction.py b/src/deep_neurographs/feature_extraction.py
@@ -4,7 +4,7 @@
 @author: Anna Grim
 @email: [email protected]
 
-Builds graph for postprocessing with GNN.
+Generates features.
 
 """
 
@@ -16,6 +16,7 @@
 from deep_neurographs import geometry_utils, utils
 
 CHUNK_SIZE = [64, 64, 64]
+BUFFER = 256
 HALF_CHUNK_SIZE = [CHUNK_SIZE[i] // 2 for i in range(3)]
 WINDOW_SIZE = [5, 5, 5]
 
@@ -67,51 +68,86 @@ def generate_mutable_features(
 def generate_mutable_img_chunk_features(
     neurograph, img_path, labels_path, anisotropy=[1.0, 1.0, 1.0]
 ):
-    img = utils.open_zarr(img_path)
-    pred_labels = utils.open_tensorstore(labels_path)
     features = dict()
+    shape = neurograph.shape
+    origin = neurograph.bbox["min"]  # world coordinates
+    origin = utils.apply_anisotropy(
+        origin, anisotropy, return_int=True
+    )  # global image coordinates
+    img, labels = utils.get_superchunks(
+        img_path, labels_path, origin, shape, from_center=False
+    )
     for edge in neurograph.mutable_edges:
-        # Extract coordinates
-        edge_xyz = neurograph.edges[edge]["xyz"]
-        edge_xyz[0] = utils.apply_anisotropy(
-            edge_xyz[0], anisotropy=anisotropy
-        )
-        edge_xyz[1] = utils.apply_anisotropy(
-            edge_xyz[1], anisotropy=anisotropy
-        )
+        # Compute image coordinates
+        edge_xyz = deepcopy(neurograph.edges[edge]["xyz"])
+        edge_xyz = [
+            utils.apply_anisotropy(
+                edge_xyz[0] - origin, anisotropy=anisotropy
+            ),
+            utils.apply_anisotropy(
+                edge_xyz[1] - origin, anisotropy=anisotropy
+            ),
+        ]
 
-        # Read chunks
-        midpoint = geometry_utils.compute_midpoint(edge_xyz[0], edge_xyz[1])
-        origin = tuple(np.round(midpoint).astype(int))
-        img_chunk = utils.read_img_chunk(img, origin, CHUNK_SIZE)
-        labels_chunk = utils.read_tensorstore(pred_labels, origin, CHUNK_SIZE)
+        # Extract chunks
+        midpoint = geometry_utils.compute_midpoint(
+            edge_xyz[0], edge_xyz[1]
+        ).astype(int)
+        img_chunk = utils.get_chunk(img, midpoint, CHUNK_SIZE)
+        labels_chunk = utils.get_chunk(labels, midpoint, CHUNK_SIZE)
 
-        # Add path
-        d = geometry_utils.dist(edge_xyz[0], edge_xyz[1])
+        # Compute path
+        d = int(geometry_utils.dist(edge_xyz[0], edge_xyz[1]) + 5)
         img_coords_1 = np.round(
-            (edge_xyz[0] - midpoint + HALF_CHUNK_SIZE)
+            edge_xyz[0] - midpoint + HALF_CHUNK_SIZE
         ).astype(int)
         img_coords_2 = np.round(
             edge_xyz[1] - midpoint + HALF_CHUNK_SIZE
         ).astype(int)
-        path = geometry_utils.make_line(img_coords_1, img_coords_2, int(d + 5))
+        path = geometry_utils.make_line(img_coords_1, img_coords_2, d)
 
+        # Fill path
         labels_chunk[labels_chunk > 0] = 1
         labels_chunk = geometry_utils.fill_path(labels_chunk, path, val=-1)
         features[edge] = np.stack([img_chunk, labels_chunk], axis=0)
+
     return features
 
 
+def get_chunk(superchunk, xyz):
+    return deepcopy(
+        superchunk[
+            (xyz[0] - CHUNK_SIZE[0] // 2) : xyz[0] + CHUNK_SIZE[0] // 2,
+            (xyz[1] - CHUNK_SIZE[1] // 2) : xyz[1] + CHUNK_SIZE[1] // 2,
+            (xyz[2] - CHUNK_SIZE[2] // 2) : xyz[2] + CHUNK_SIZE[2] // 2,
+        ]
+    )
+
+
 def generate_mutable_img_profile_features(
     neurograph, path, anisotropy=[1.0, 1.0, 1.0]
 ):
-    img = utils.open_zarr(path)
     features = dict()
+    origin = utils.apply_anisotropy(
+        neurograph.bbox["min"], anisotropy, return_int=True
+    )
+    shape = [neurograph.shape[i] + BUFFER for i in range(3)]
+    superchunk = utils.get_superchunk(
+        path, "zarr", origin, shape, from_center=False
+    )
     for edge in neurograph.mutable_edges:
-        xyz = neurograph.edges[edge]["xyz"]
-        line = geometry_utils.make_line(xyz[0], xyz[1], NUM_POINTS)
+        edge_xyz = deepcopy(neurograph.edges[edge]["xyz"])
+        edge_xyz = [
+            utils.apply_anisotropy(
+                edge_xyz[0] - neurograph.origin, anisotropy=anisotropy
+            ),
+            utils.apply_anisotropy(
+                edge_xyz[1] - neurograph.origin, anisotropy=anisotropy
+            ),
+        ]
+        line = geometry_utils.make_line(edge_xyz[0], edge_xyz[1], NUM_POINTS)
         features[edge] = geometry_utils.get_profile(
-            img, line, anisotropy=anisotropy, window_size=WINDOW_SIZE
+            superchunk, line, window_size=WINDOW_SIZE
         )
     return features
 

diff --git a/src/deep_neurographs/geometry_utils.py b/src/deep_neurographs/geometry_utils.py
@@ -129,21 +129,37 @@ def smooth_end(branch_xyz, radii, ref_xyz, num_pts=8):
 
 
 # Image feature extraction
-def get_profile(
-    img, xyz_arr, anisotropy=[1.0, 1.0, 1.0], window_size=[4, 4, 4]
+def get_profile(img, xyz_arr, window_size=[5, 5, 5]):
+    return [np.max(utils.get_chunk(img, xyz, window_size)) for xyz in xyz_arr]
+
+
+"""
+def get_profile_old(
+    img, xyz_arr, anisotropy=[1.0, 1.0, 1.0], window_size=[5, 5, 5]
 ):
+    #xyz_arr = get_coords(xyz_arr, anisotropy=anisotropy)
+    profile = []
+    for xyz in xyz_arr:
+        xyz = xyz.astype(int)
+        img_chunk = utils.get_chunk(img, xyz, window_size)
+        profile.append(np.max(img_chunk))
+    return np.array(profile)
+
+
     xyz_arr = get_coords(xyz_arr, anisotropy=anisotropy)
     profile = []
     for xyz in xyz_arr:
         img_chunk = utils.read_img_chunk(img, xyz, window_size)
         profile.append(np.max(img_chunk))
     return np.array(profile)
+"""
 
 
 def fill_path(img, path, val=-1):
     for xyz in path:
         x, y, z = tuple(np.round(xyz).astype(int))
-        img[(x - 1) : x + 1, (y - 1) : y + 1, (z - 1) : z + 1] = val
+        img[x, y, z] = val
+        # img[(x - 1) : x + 1, (y - 1) : y + 1, (z - 1) : z + 1] = val
     return img
 
 
@@ -185,8 +201,10 @@ def dist(x, y, metric="l2"):
 
 
 def make_line(xyz_1, xyz_2, num_steps):
+    xyz_1 = np.array(xyz_1)
+    xyz_2 = np.array(xyz_2)
     t_steps = np.linspace(0, 1, num_steps)
-    return np.array([(1 - t) * xyz_1 + t * xyz_2 for t in t_steps])
+    return np.array([(1 - t) * xyz_1 + t * xyz_2 for t in t_steps], dtype=int)
 
 
 def normalize(x, norm="l2"):

diff --git a/src/deep_neurographs/intake.py b/src/deep_neurographs/intake.py
@@ -52,9 +52,10 @@ def build_neurograph(
             prune=prune,
             prune_depth=prune_depth,
         )
-    neurograph.generate_mutables(
-        max_degree=max_mutable_degree, search_radius=search_radius
-    )
+    if search_radius > 0:
+        neurograph.generate_mutables(
+            max_degree=max_mutable_degree, search_radius=search_radius
+        )
     return neurograph