Add normalized_instance_similarity method (#1939)

* Add normalize function * Expose normalization function * Fix tests * Expose object keypoint sim function * Fix tests
talmolab · Sep 18, 2024 · 3c7f5af · 3c7f5af
1 parent e4bb444
commit 3c7f5af
Show file tree

Hide file tree

Showing 9 changed files with 59 additions and 15 deletions.
diff --git a/docs/guides/cli.md b/docs/guides/cli.md
@@ -207,7 +207,7 @@ optional arguments:
   --tracking.clean_iou_threshold TRACKING.CLEAN_IOU_THRESHOLD
                         IOU to use when culling instances *after* tracking. (default: 0)
   --tracking.similarity TRACKING.SIMILARITY
-                        Options: instance, centroid, iou (default: instance)
+                        Options: instance, normalized_instance, object_keypoint, centroid, iou (default: instance)
   --tracking.match TRACKING.MATCH
                         Options: hungarian, greedy (default: greedy)
   --tracking.robust TRACKING.ROBUST

diff --git a/docs/guides/proofreading.md b/docs/guides/proofreading.md
@@ -50,6 +50,8 @@ There are currently three methods for matching instances in frame N against thes
 - “**centroid**” measures similarity by the distance between the instance centroids
 - “**iou**” measures similarity by the intersection/overlap of the instance bounding boxes
 - “**instance**” measures similarity by looking at the distances between corresponding nodes in the instances, normalized by the number of valid nodes in the candidate instance.
+- “**normalized_instance**” measures similarity by looking at the distances between corresponding nodes in the instances, normalized by the number of valid nodes in the candidate instance and the keypoints normalized by the image size.
+- “**object_keypoint**” measures similarity by measuring the distance between each keypoints from a reference instance and a query instance, takes the exp(-d**2), sum for all the keypoints and divide by the number of visible keypoints in the reference instance.
 
 Once SLEAP has measured the similarity between all the candidates and the instances in frame N, you need to choose a way to pair them up. You can do this either by picking the best match, and the picking the best remaining match for each remaining instance in turn—this is “**greedy**” matching—or you can find the way of matching identities which minimizes the total cost (or: maximizes the total similarity)—this is “**Hungarian**” matching.
 

diff --git a/sleap/config/pipeline_form.yaml b/sleap/config/pipeline_form.yaml
@@ -439,7 +439,7 @@ inference:
       label: Similarity Method
       type: list
       default: instance
-      options: "instance,centroid,iou,object keypoint"
+      options: "instance,normalized_instance,centroid,iou,object keypoint"
     - name: tracking.match
       label: Matching Method
       type: list
@@ -538,7 +538,7 @@ inference:
       label: Similarity Method
       type: list
       default: instance
-      options: "instance,centroid,iou,object keypoint"
+      options: "instance,normalized_instance,centroid,iou,object keypoint"
     - name: tracking.match
       label: Matching Method
       type: list

diff --git a/sleap/nn/inference.py b/sleap/nn/inference.py
@@ -2622,6 +2622,7 @@ def _object_builder():
                         # Set tracks for predicted instances in this frame.
                         predicted_instances = self.tracker.track(
                             untracked_instances=predicted_instances,
+                            img_hw=ex["image"].shape[-3:-1],
                             img=image,
                             t=frame_ind,
                         )
@@ -3264,6 +3265,7 @@ def _object_builder():
                         # Set tracks for predicted instances in this frame.
                         predicted_instances = self.tracker.track(
                             untracked_instances=predicted_instances,
+                            img_hw=ex["image"].shape[-3:-1],
                             img=image,
                             t=frame_ind,
                         )

diff --git a/sleap/nn/tracker/components.py b/sleap/nn/tracker/components.py
@@ -12,6 +12,7 @@
 
 
 """
+
 import operator
 from collections import defaultdict
 import logging
@@ -29,6 +30,21 @@
 InstanceType = TypeVar("InstanceType", Instance, PredictedInstance)
 
 
+def normalized_instance_similarity(
+    ref_instance: InstanceType, query_instance: InstanceType, img_hw: Tuple[int]
+) -> float:
+    """Computes similarity between instances with normalized keypoints."""
+
+    normalize_factors = np.array((img_hw[1], img_hw[0]))
+    ref_visible = ~(np.isnan(ref_instance.points_array).any(axis=1))
+    normalized_query_keypoints = query_instance.points_array / normalize_factors
+    normalized_ref_keypoints = ref_instance.points_array / normalize_factors
+    dists = np.sum((normalized_query_keypoints - normalized_ref_keypoints) ** 2, axis=1)
+    similarity = np.nansum(np.exp(-dists)) / np.sum(ref_visible)
+
+    return similarity
+
+
 def instance_similarity(
     ref_instance: InstanceType, query_instance: InstanceType
 ) -> float:

diff --git a/sleap/nn/tracking.py b/sleap/nn/tracking.py
@@ -5,13 +5,15 @@
 import attr
 import numpy as np
 import cv2
+import functools
 from typing import Callable, Deque, Dict, Iterable, List, Optional, Tuple
 
 from sleap import Track, LabeledFrame, Skeleton
 
 from sleap.nn.tracker.components import (
     factory_object_keypoint_similarity,
     instance_similarity,
+    normalized_instance_similarity,
     centroid_distance,
     instance_iou,
     hungarian_matching,
@@ -495,7 +497,8 @@ def get_candidates(
     instance=instance_similarity,
     centroid=centroid_distance,
     iou=instance_iou,
-    object_keypoint=instance_similarity,
+    normalized_instance=normalized_instance_similarity,
+    object_keypoint=factory_object_keypoint_similarity,
 )
 
 match_policies = dict(
@@ -639,19 +642,26 @@ def uses_image(self):
     def track(
         self,
         untracked_instances: List[InstanceType],
+        img_hw: Tuple[int],
         img: Optional[np.ndarray] = None,
         t: int = None,
     ) -> List[InstanceType]:
         """Performs a single step of tracking.
 
         Args:
             untracked_instances: List of instances to assign to tracks.
+            img_hw: (height, width) of the image used to normalize the keypoints.
             img: Image data of the current frame for flow shifting.
             t: Current timestep. If not provided, increments from the internal queue.
 
         Returns:
             A list of the instances that were tracked.
         """
+        if self.similarity_function == normalized_instance_similarity:
+            factory_normalized_instance = functools.partial(
+                normalized_instance_similarity, img_hw=img_hw
+            )
+            self.similarity_function = factory_normalized_instance
 
         if self.candidate_maker is None:
             return untracked_instances
@@ -1520,6 +1530,7 @@ def run_tracker(frames: List[LabeledFrame], tracker: BaseTracker) -> List[Labele
             track_args["img"] = lf.video[lf.frame_idx]
         else:
             track_args["img"] = None
+        track_args["img_hw"] = lf.image.shape[-3:-1]
 
         new_lf = LabeledFrame(
             frame_idx=lf.frame_idx,

diff --git a/tests/nn/test_inference.py b/tests/nn/test_inference.py
@@ -1932,7 +1932,11 @@ def test_flow_tracker(centered_pair_predictions_sorted: Labels, tmpdir):
         for inst in lf.instances:
             inst.track = None
 
-        track_args = dict(untracked_instances=lf.instances, img=lf.video[lf.frame_idx])
+        track_args = dict(
+            untracked_instances=lf.instances,
+            img=lf.video[lf.frame_idx],
+            img_hw=lf.image.shape[-3:-1],
+        )
         tracker.track(**track_args)
 
         # Check that saved instances are pruned to track window
@@ -1975,7 +1979,11 @@ def test_max_tracks_matching_queue(
         for inst in lf.instances:
             inst.track = None
 
-        track_args = dict(untracked_instances=lf.instances, img=lf.video[lf.frame_idx])
+        track_args = dict(
+            untracked_instances=lf.instances,
+            img=lf.video[lf.frame_idx],
+            img_hw=lf.image.shape[-3:-1],
+        )
         tracker.track(**track_args)
 
         if trackername == "flowmaxtracks":

diff --git a/tests/nn/test_tracker_components.py b/tests/nn/test_tracker_components.py
@@ -30,14 +30,17 @@ def tracker_by_name(frames=None, **kwargs):
             inst.track = None
 
         track_args = dict(untracked_instances=lf.instances, img=lf.video[lf.frame_idx])
-        t.track(**track_args)
+        t.track(**track_args, img_hw=(1, 1))
         t.final_pass(frames)
 
 
 @pytest.mark.parametrize(
     "tracker", ["simple", "flow", "simplemaxtracks", "flowmaxtracks"]
 )
-@pytest.mark.parametrize("similarity", ["instance", "iou", "centroid"])
+@pytest.mark.parametrize(
+    "similarity",
+    ["instance", "normalized_instance", "iou", "centroid", "object_keypoint"],
+)
 @pytest.mark.parametrize("match", ["greedy", "hungarian"])
 @pytest.mark.parametrize("count", [0, 2])
 def test_tracker_by_name(
@@ -288,7 +291,7 @@ def test_max_tracking_large_gap_single_track():
 
     tracked = []
     for insts in preds:
-        tracked_insts = tracker.track(insts)
+        tracked_insts = tracker.track(insts, img_hw=(1, 1))
         tracked.append(tracked_insts)
     all_tracks = list(set([inst.track for frame in tracked for inst in frame]))
 
@@ -305,7 +308,7 @@ def test_max_tracking_large_gap_single_track():
 
     tracked = []
     for insts in preds:
-        tracked_insts = tracker.track(insts)
+        tracked_insts = tracker.track(insts, img_hw=(1, 1))
         tracked.append(tracked_insts)
     all_tracks = list(set([inst.track for frame in tracked for inst in frame]))
 
@@ -352,7 +355,7 @@ def test_max_tracking_small_gap_on_both_tracks():
 
     tracked = []
     for insts in preds:
-        tracked_insts = tracker.track(insts)
+        tracked_insts = tracker.track(insts, img_hw=(1, 1))
         tracked.append(tracked_insts)
     all_tracks = list(set([inst.track for frame in tracked for inst in frame]))
 
@@ -369,7 +372,7 @@ def test_max_tracking_small_gap_on_both_tracks():
 
     tracked = []
     for insts in preds:
-        tracked_insts = tracker.track(insts)
+        tracked_insts = tracker.track(insts, img_hw=(1, 1))
         tracked.append(tracked_insts)
     all_tracks = list(set([inst.track for frame in tracked for inst in frame]))
 
@@ -421,7 +424,7 @@ def test_max_tracking_extra_detections():
 
     tracked = []
     for insts in preds:
-        tracked_insts = tracker.track(insts)
+        tracked_insts = tracker.track(insts, img_hw=(1, 1))
         tracked.append(tracked_insts)
     all_tracks = list(set([inst.track for frame in tracked for inst in frame]))
 
@@ -438,7 +441,7 @@ def test_max_tracking_extra_detections():
 
     tracked = []
     for insts in preds:
-        tracked_insts = tracker.track(insts)
+        tracked_insts = tracker.track(insts, img_hw=(1, 1))
         tracked.append(tracked_insts)
     all_tracks = list(set([inst.track for frame in tracked for inst in frame]))
 

diff --git a/tests/nn/test_tracking_integration.py b/tests/nn/test_tracking_integration.py
@@ -102,7 +102,7 @@ def run_tracker(frames, tracker):
         new_lf = LabeledFrame(
             frame_idx=lf.frame_idx,
             video=lf.video,
-            instances=tracker.track(**track_args),
+            instances=tracker.track(**track_args, img_hw=lf.image.shape[-3:-1]),
         )
         new_lfs.append(new_lf)
 
@@ -138,6 +138,8 @@ def main(f, dir):
         instance=sleap.nn.tracker.components.instance_similarity,
         centroid=sleap.nn.tracker.components.centroid_distance,
         iou=sleap.nn.tracker.components.instance_iou,
+        normalized_instance=sleap.nn.tracker.components.normalized_instance_similarity,
+        object_keypoint=sleap.nn.tracker.components.factory_object_keypoint_similarity(),
     )
     scales = (
         1,